aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-29 16:25:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-29 16:25:25 +0000
commitab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch)
tree568d786a59d49bef961dcb9bd09d422701b9da5b /lib
parentb5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff)
downloadsrc-ab44ce3d598882e51a25eb82eb7ae6308de85ae6.tar.gz
src-ab44ce3d598882e51a25eb82eb7ae6308de85ae6.zip
Vendor import of llvm trunk r304149:vendor/llvm/llvm-trunk-r304149
Notes
Notes: svn path=/vendor/llvm/dist/; revision=319140 svn path=/vendor/llvm/llvm-trunk-r304149/; revision=319141; tag=vendor/llvm/llvm-trunk-r304149
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/ConstantFolding.cpp7
-rw-r--r--lib/Analysis/InstructionSimplify.cpp152
-rw-r--r--lib/Analysis/Lint.cpp8
-rw-r--r--lib/Analysis/LoopPass.cpp23
-rw-r--r--lib/Analysis/ScalarEvolution.cpp74
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp20
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/ValueTracking.cpp6
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp34
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp10
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp120
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h55
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHashAttributes.def55
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp9
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp125
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h9
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp2
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp2
-rw-r--r--lib/CodeGen/BranchCoalescing.cpp6
-rw-r--r--lib/CodeGen/BranchFolding.cpp11
-rw-r--r--lib/CodeGen/BranchFolding.h1
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp6
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp4
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp4
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp3
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp4
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp8
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp2
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp2
-rw-r--r--lib/CodeGen/FuncletLayout.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/CMakeLists.txt5
-rw-r--r--lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp125
-rw-r--r--lib/CodeGen/GlobalMerge.cpp5
-rw-r--r--lib/CodeGen/IfConversion.cpp33
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp4
-rw-r--r--lib/CodeGen/InterleavedAccessPass.cpp6
-rw-r--r--lib/CodeGen/LexicalScopes.cpp5
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp4
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp6
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp49
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp89
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp4
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp4
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp2
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp6
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp4
-rw-r--r--lib/CodeGen/MachineCSE.cpp12
-rw-r--r--lib/CodeGen/MachineCombiner.cpp4
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp4
-rw-r--r--lib/CodeGen/MachineLICM.cpp10
-rw-r--r--lib/CodeGen/MachineOutliner.cpp2
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp4
-rw-r--r--lib/CodeGen/MachineScheduler.cpp14
-rw-r--r--lib/CodeGen/MachineSink.cpp8
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp8
-rw-r--r--lib/CodeGen/MachineVerifier.cpp5
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp4
-rw-r--r--lib/CodeGen/PHIElimination.cpp4
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp4
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp9
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp4
-rw-r--r--lib/CodeGen/SafeStack.cpp6
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp8
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp206
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp176
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp81
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp90
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp55
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp34
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp6
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp5
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--lib/CodeGen/SlotIndexes.cpp2
-rw-r--r--lib/CodeGen/SpillPlacement.cpp6
-rw-r--r--lib/CodeGen/StackColoring.cpp10
-rw-r--r--lib/CodeGen/StackProtector.cpp4
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp6
-rw-r--r--lib/CodeGen/TailDuplication.cpp3
-rw-r--r--lib/CodeGen/TailDuplicator.cpp4
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp6
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp2
-rw-r--r--lib/DebugInfo/CodeView/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp20
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp371
-rw-r--r--lib/DebugInfo/CodeView/TypeSerializer.cpp230
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp428
-rw-r--r--lib/DebugInfo/CodeView/TypeTableCollection.cpp3
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp119
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp8
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp13
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp4
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp39
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp32
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp25
-rw-r--r--lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp19
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp4
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp54
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp37
-rw-r--r--lib/Fuzzer/FuzzerUtilPosix.cpp17
-rw-r--r--lib/Fuzzer/test/fuzzer-segv.test2
-rw-r--r--lib/IR/AttributeImpl.h47
-rw-r--r--lib/IR/Attributes.cpp396
-rw-r--r--lib/IR/BasicBlock.cpp16
-rw-r--r--lib/IR/DebugLoc.cpp2
-rw-r--r--lib/IR/Instructions.cpp6
-rw-r--r--lib/IR/IntrinsicInst.cpp26
-rw-r--r--lib/IR/Module.cpp4
-rw-r--r--lib/IR/Verifier.cpp40
-rw-r--r--lib/LTO/LTO.cpp44
-rw-r--r--lib/LTO/LTOBackend.cpp21
-rw-r--r--lib/Linker/IRMover.cpp17
-rw-r--r--lib/MC/WasmObjectWriter.cpp34
-rw-r--r--lib/Object/COFFObjectFile.cpp4
-rw-r--r--lib/Object/MachOObjectFile.cpp4
-rw-r--r--lib/Object/WasmObjectFile.cpp4
-rw-r--r--lib/Option/OptTable.cpp14
-rw-r--r--lib/Passes/PassBuilder.cpp25
-rw-r--r--lib/ProfileData/InstrProf.cpp16
-rw-r--r--lib/Support/APInt.cpp4
-rw-r--r--lib/Support/BinaryStreamReader.cpp27
-rw-r--r--lib/Support/ConvertUTF.cpp31
-rw-r--r--lib/Support/DebugCounter.cpp2
-rw-r--r--lib/Support/DynamicLibrary.cpp11
-rw-r--r--lib/Support/GraphWriter.cpp1
-rw-r--r--lib/Support/Host.cpp1
-rw-r--r--lib/Support/Path.cpp1
-rw-r--r--lib/Support/Triple.cpp17
-rw-r--r--lib/Support/YAMLParser.cpp4
-rw-r--r--lib/TableGen/Record.cpp39
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp5
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp119
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp11
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp31
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp127
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp13
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkor.td86
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td1109
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorWriteRes.td403
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp1
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp12
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td20
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp45
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp213
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h8
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp83
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp93
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h5
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNMinRegStrategy.cpp2
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp6
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp46
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp8
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.td2
-rw-r--r--lib/Target/AMDGPU/SIDefines.h19
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp52
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp4
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td180
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td4
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp11
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h4
-rw-r--r--lib/Target/AMDGPU/VOP1Instructions.td33
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td70
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td9
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td37
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td114
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp111
-rw-r--r--lib/Target/ARM/ARMCallLowering.h5
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp4
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td496
-rw-r--r--lib/Target/ARM/ARMSchedule.td11
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td9
-rw-r--r--lib/Target/ARM/ARMScheduleR52.td103
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td10
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp63
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h62
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp4
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp10
-rw-r--r--lib/Target/AVR/AVRInstrInfo.td1
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp9
-rw-r--r--lib/Target/BPF/BPFISelLowering.h4
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonPseudo.td39
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h1
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp4
-rw-r--r--lib/Target/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/MSP430.td14
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp27
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td5
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp27
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h11
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp34
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp7
-rw-r--r--lib/Target/Mips/MipsSubtarget.h7
-rw-r--r--lib/Target/Nios2/CMakeLists.txt18
-rw-r--r--lib/Target/Nios2/LLVMBuild.txt61
-rw-r--r--lib/Target/Nios2/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt25
-rw-r--r--lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp25
-rw-r--r--lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h34
-rw-r--r--lib/Target/Nios2/Nios2.h25
-rw-r--r--lib/Target/Nios2/Nios2.td29
-rw-r--r--lib/Target/Nios2/Nios2InstrFormats.td117
-rw-r--r--lib/Target/Nios2/Nios2InstrInfo.td50
-rw-r--r--lib/Target/Nios2/Nios2RegisterInfo.td60
-rw-r--r--lib/Target/Nios2/Nios2TargetMachine.cpp46
-rw-r--r--lib/Target/Nios2/Nios2TargetMachine.h30
-rw-r--r--lib/Target/Nios2/TargetInfo/CMakeLists.txt1
-rw-r--r--lib/Target/Nios2/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp24
-rw-r--r--lib/Target/PowerPC/PPCExpandISEL.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp92
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td76
-rw-r--r--lib/Target/SystemZ/SystemZExpandPseudo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp30
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h1
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp5
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--lib/Target/X86/X86InstrAVX512.td152
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td24
-rw-r--r--lib/Target/X86/X86InstrFMA.td13
-rw-r--r--lib/Target/X86/X86InstrFormats.td10
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp3406
-rw-r--r--lib/Target/X86/X86InstrInfo.td23
-rw-r--r--lib/Target/X86/X86InstrMMX.td5
-rw-r--r--lib/Target/X86/X86InstrSSE.td66
-rw-r--r--lib/Target/X86/X86InstrXOP.td8
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp20
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp17
-rw-r--r--lib/Target/X86/X86LegalizerInfo.h1
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Transforms/Coroutines/CoroCleanup.cpp1
-rw-r--r--lib/Transforms/Coroutines/CoroEarly.cpp3
-rw-r--r--lib/Transforms/Coroutines/CoroElide.cpp1
-rw-r--r--lib/Transforms/Coroutines/CoroFrame.cpp33
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp86
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp16
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp48
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h6
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp92
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp19
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp5
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp164
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp872
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp4
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp7
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp42
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp33
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp7
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp70
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp3
-rw-r--r--lib/Transforms/Scalar/SROA.cpp2
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp76
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp2
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp10
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp7
-rw-r--r--lib/Transforms/Utils/Local.cpp68
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp50
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp4
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp157
299 files changed, 7765 insertions, 7193 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 0ca712bbfe70..79517ec6a3a8 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -687,11 +687,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
// bits.
if (Opc == Instruction::And) {
- unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
- KnownBits Known0(BitWidth);
- KnownBits Known1(BitWidth);
- computeKnownBits(Op0, Known0, DL);
- computeKnownBits(Op1, Known1, DL);
+ KnownBits Known0 = computeKnownBits(Op0, DL);
+ KnownBits Known1 = computeKnownBits(Op1, DL);
if ((Known1.One | Known0.Zero).isAllOnesValue()) {
// All the bits of Op0 that the 'and' could be masking are already zero.
return Op0;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 2e72d5aa8269..122442bafb11 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -688,9 +688,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (isNUW)
return Op0;
- unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (Known.Zero.isMaxSignedValue()) {
// Op1 is either 0 or the minimum signed value. If the sub is NSW, then
// Op1 must be 0 because negating the minimum signed value is undefined.
@@ -1309,15 +1307,13 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// If any bits in the shift amount make that value greater than or equal to
// the number of bits in the type, the shift is undefined.
- unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (Known.One.getLimitedValue() >= BitWidth)
+ KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (Known.One.getLimitedValue() >= Known.getBitWidth())
return UndefValue::get(Op0->getType());
// If all valid bits in the shift amount are known zero, the first operand is
// unchanged.
- unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth);
+ unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth());
if (Known.countMinTrailingZeros() >= NumValidShiftBits)
return Op0;
@@ -1343,9 +1339,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
// The low bit cannot be shifted out of an exact shift if it is set.
if (isExact) {
- unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
- KnownBits Op0Known(BitWidth);
- computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
if (Op0Known.One[0])
return Op0;
}
@@ -1428,6 +1422,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
}
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
ICmpInst *UnsignedICmp, bool IsAnd) {
Value *X, *Y;
@@ -1560,20 +1556,8 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
}
-/// Commuted variants are assumed to be handled by calling this function again
-/// with the parameters swapped.
-static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
- if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
- return X;
-
- if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
- return X;
-
- if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true))
- return X;
-
+static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
// (icmp (add V, C0), C1) & (icmp V, C0)
- Type *ITy = Op0->getType();
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
Value *V;
@@ -1587,6 +1571,7 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (AddInst->getOperand(1) != Op1->getOperand(1))
return nullptr;
+ Type *ITy = Op0->getType();
bool isNSW = AddInst->hasNoSignedWrap();
bool isNUW = AddInst->hasNoUnsignedWrap();
@@ -1617,18 +1602,29 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-/// Commuted variants are assumed to be handled by calling this function again
-/// with the parameters swapped.
-static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
- if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+ return X;
+ if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
return X;
- if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
+ if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
+ return X;
+ if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0))
return X;
- if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false))
+ if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true))
+ return X;
+
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1))
+ return X;
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0))
return X;
+ return nullptr;
+}
+
+static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
// (icmp (add V, C0), C1) | (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
@@ -1674,19 +1670,24 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-static Value *simplifyPossiblyCastedAndOrOfICmps(ICmpInst *Cmp0, ICmpInst *Cmp1,
- bool IsAnd, CastInst *Cast) {
- Value *V =
- IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1);
- if (!V)
- return nullptr;
- if (!Cast)
- return V;
+static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+ return X;
+ if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
+ return X;
- // If we looked through casts, we can only handle a constant simplification
- // because we are not allowed to create a cast instruction here.
- if (auto *C = dyn_cast<Constant>(V))
- return ConstantExpr::getCast(Cast->getOpcode(), C, Cast->getType());
+ if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
+ return X;
+ if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0))
+ return X;
+
+ if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false))
+ return X;
+
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1))
+ return X;
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0))
+ return X;
return nullptr;
}
@@ -1706,11 +1707,18 @@ static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) {
if (!Cmp0 || !Cmp1)
return nullptr;
- if (Value *V = simplifyPossiblyCastedAndOrOfICmps(Cmp0, Cmp1, IsAnd, Cast0))
- return V;
- if (Value *V = simplifyPossiblyCastedAndOrOfICmps(Cmp1, Cmp0, IsAnd, Cast0))
+ Value *V =
+ IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1);
+ if (!V)
+ return nullptr;
+ if (!Cast0)
return V;
+ // If we looked through casts, we can only handle a constant simplification
+ // because we are not allowed to create a cast instruction here.
+ if (auto *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType());
+
return nullptr;
}
@@ -1927,37 +1935,27 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
MaxRecurse))
return V;
- // (A & C)|(B & D)
- Value *C = nullptr, *D = nullptr;
- if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
- match(Op1, m_And(m_Value(B), m_Value(D)))) {
- ConstantInt *C1 = dyn_cast<ConstantInt>(C);
- ConstantInt *C2 = dyn_cast<ConstantInt>(D);
- if (C1 && C2 && (C1->getValue() == ~C2->getValue())) {
+ // (A & C1)|(B & C2)
+ const APInt *C1, *C2;
+ if (match(Op0, m_And(m_Value(A), m_APInt(C1))) &&
+ match(Op1, m_And(m_Value(B), m_APInt(C2)))) {
+ if (*C1 == ~*C2) {
// (A & C1)|(B & C2)
// If we have: ((V + N) & C1) | (V & C2)
// .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
// replace with V+N.
- Value *V1, *V2;
- if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
- match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+ Value *N;
+ if (C2->isMask() && // C2 == 0+1+
+ match(A, m_c_Add(m_Specific(B), m_Value(N)))) {
// Add commutes, try both ways.
- if (V1 == B &&
- MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
- return A;
- if (V2 == B &&
- MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return A;
}
// Or commutes, try both ways.
- if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
- match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+ if (C1->isMask() &&
+ match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
// Add commutes, try both ways.
- if (V1 == A &&
- MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
- return B;
- if (V2 == A &&
- MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
}
}
@@ -3372,9 +3370,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (ICmpInst::isEquality(Pred)) {
const APInt *RHSVal;
if (match(RHS, m_APInt(RHSVal))) {
- unsigned BitWidth = RHSVal->getBitWidth();
- KnownBits LHSKnown(BitWidth);
- computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
if (LHSKnown.Zero.intersects(*RHSVal) ||
!LHSKnown.One.isSubsetOf(*RHSVal))
return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
@@ -3539,6 +3535,10 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (V == Op)
return RepOp;
+ // We cannot replace a constant, and shouldn't even try.
+ if (isa<Constant>(Op))
+ return nullptr;
+
auto *I = dyn_cast<Instruction>(V);
if (!I)
return nullptr;
@@ -4444,19 +4444,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow: {
// X + undef -> undef
- if (isa<UndefValue>(RHS))
+ if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
return UndefValue::get(ReturnType);
return nullptr;
}
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow: {
+ // 0 * X -> { 0, false }
// X * 0 -> { 0, false }
- if (match(RHS, m_Zero()))
+ if (match(LHS, m_Zero()) || match(RHS, m_Zero()))
return Constant::getNullValue(ReturnType);
+ // undef * X -> { 0, false }
// X * undef -> { 0, false }
- if (match(RHS, m_Undef()))
+ if (match(LHS, m_Undef()) || match(RHS, m_Undef()))
return Constant::getNullValue(ReturnType);
return nullptr;
@@ -4680,9 +4682,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
// In general, it is possible for computeKnownBits to determine all bits in a
// value even when the operands are not all constants.
if (!Result && I->getType()->isIntOrIntVectorTy()) {
- unsigned BitWidth = I->getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE);
+ KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE);
if (Known.isConstant())
Result = ConstantInt::get(I->getType(), Known.getConstant());
}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 471ccb62970d..e6391792bc23 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -534,9 +534,7 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
VectorType *VecTy = dyn_cast<VectorType>(V->getType());
if (!VecTy) {
- unsigned BitWidth = V->getType()->getIntegerBitWidth();
- KnownBits Known(BitWidth);
- computeKnownBits(V, Known, DL, 0, AC, dyn_cast<Instruction>(V), DT);
+ KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
return Known.isZero();
}
@@ -550,14 +548,12 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
// For a vector, KnownZero will only be true if all values are zero, so check
// this per component
- unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
Constant *Elem = C->getAggregateElement(I);
if (isa<UndefValue>(Elem))
return true;
- KnownBits Known(BitWidth);
- computeKnownBits(Elem, Known, DL);
+ KnownBits Known = computeKnownBits(Elem, DL);
if (Known.isZero())
return true;
}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 0b5f6266e373..e988f6444a58 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -73,30 +73,23 @@ LPPassManager::LPPassManager()
CurrentLoop = nullptr;
}
-// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
-Loop &LPPassManager::addLoop(Loop *ParentLoop) {
- // Create a new loop. LI will take ownership.
- Loop *L = new Loop();
-
- // Insert into the loop nest and the loop queue.
- if (!ParentLoop) {
+// Insert loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::addLoop(Loop &L) {
+ if (!L.getParentLoop()) {
// This is the top level loop.
- LI->addTopLevelLoop(L);
- LQ.push_front(L);
- return *L;
+ LQ.push_front(&L);
+ return;
}
- ParentLoop->addChildLoop(L);
// Insert L into the loop queue after the parent loop.
for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) {
- if (*I == L->getParentLoop()) {
+ if (*I == L.getParentLoop()) {
// deque does not support insert after.
++I;
- LQ.insert(I, 1, L);
- break;
+ LQ.insert(I, 1, &L);
+ return;
}
}
- return *L;
}
/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 78ded8141c08..d280fda0a162 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2178,6 +2178,63 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
return Flags;
}
+bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L,
+ DominatorTree &DT, LoopInfo &LI) {
+ if (!isLoopInvariant(S, L))
+ return false;
+ // If a value depends on a SCEVUnknown which is defined after the loop, we
+ // conservatively assume that we cannot calculate it at the loop's entry.
+ struct FindDominatedSCEVUnknown {
+ bool Found = false;
+ const Loop *L;
+ DominatorTree &DT;
+ LoopInfo &LI;
+
+ FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
+ : L(L), DT(DT), LI(LI) {}
+
+ bool checkSCEVUnknown(const SCEVUnknown *SU) {
+ if (auto *I = dyn_cast<Instruction>(SU->getValue())) {
+ if (DT.dominates(L->getHeader(), I->getParent()))
+ Found = true;
+ else
+ assert(DT.dominates(I->getParent(), L->getHeader()) &&
+ "No dominance relationship between SCEV and loop?");
+ }
+ return false;
+ }
+
+ bool follow(const SCEV *S) {
+ switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+ case scConstant:
+ return false;
+ case scAddRecExpr:
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ case scUDivExpr:
+ return true;
+ case scUnknown:
+ return checkSCEVUnknown(cast<SCEVUnknown>(S));
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ }
+ return false;
+ }
+
+ bool isDone() { return Found; }
+ };
+
+ FindDominatedSCEVUnknown FSU(L, DT, LI);
+ SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU);
+ ST.visitAll(S);
+ return !FSU.Found;
+}
+
/// Get a canonical add expression, or something simpler if possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags,
@@ -2459,7 +2516,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -2734,7 +2791,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (isLoopInvariant(Ops[i], AddRecLoop)) {
+ if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -4648,10 +4705,7 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
- unsigned BitWidth = getTypeSizeInBits(U->getType());
- KnownBits Known(BitWidth);
- computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC,
- nullptr, &DT);
+ KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
return Known.countMinTrailingZeros();
}
@@ -4831,8 +4885,7 @@ ScalarEvolution::getRange(const SCEV *S,
const DataLayout &DL = getDataLayout();
if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
// For a SCEVUnknown, ask ValueTracking.
- KnownBits Known(BitWidth);
- computeKnownBits(U->getValue(), Known, DL, 0, &AC, nullptr, &DT);
+ KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (Known.One != ~Known.Zero + 1)
ConservativeResult =
ConservativeResult.intersectWith(ConstantRange(Known.One,
@@ -9537,8 +9590,11 @@ struct SCEVCollectAddRecMultiplies {
bool HasAddRec = false;
SmallVector<const SCEV *, 0> Operands;
for (auto Op : Mul->operands()) {
- if (isa<SCEVUnknown>(Op)) {
+ const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
+ if (Unknown && !isa<CallInst>(Unknown->getValue())) {
Operands.push_back(Op);
+ } else if (Unknown) {
+ HasAddRec = true;
} else {
bool ContainsAddRec;
SCEVHasAddRec ContiansAddRec(ContainsAddRec);
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 86cbd79aa84e..f9b9df2bc707 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1305,12 +1305,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Expand the core addrec. If we need post-loop scaling, force it to
// expand to an integer type to avoid the need for additional casting.
Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ // We can't use a pointer type for the addrec if the pointer type is
+ // non-integral.
+ Type *AddRecPHIExpandTy =
+ DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
+
// In some cases, we decide to reuse an existing phi node but need to truncate
// it and/or invert the step.
Type *TruncTy = nullptr;
bool InvertStep = false;
- PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
- TruncTy, InvertStep);
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
+ IntTy, TruncTy, InvertStep);
// Accommodate post-inc mode, if necessary.
Value *Result;
@@ -1383,8 +1388,15 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Re-apply any non-loop-dominating offset.
if (PostLoopOffset) {
if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
- const SCEV *const OffsetArray[1] = { PostLoopOffset };
- Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+ if (Result->getType()->isIntegerTy()) {
+ Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
+ const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)};
+ Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base);
+ } else {
+ const SCEV *const OffsetArray[1] = {PostLoopOffset};
+ Result =
+ expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result);
+ }
} else {
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateAdd(Result,
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 8a5d10473662..7a8d4f3be24f 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -149,6 +149,10 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
return TTIImpl->isLegalMaskedGather(DataType);
}
+bool TargetTransformInfo::prefersVectorizedAddressing() const {
+ return TTIImpl->prefersVectorizedAddressing();
+}
+
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 8e6c1096eec8..bd79cd56a18b 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -149,8 +149,10 @@ static KnownBits computeKnownBits(const Value *V, unsigned Depth,
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DominatorTree *DT,
+ OptimizationRemarkEmitter *ORE) {
+ return ::computeKnownBits(V, Depth,
+ Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
}
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1f8b50342c2d..c1d81ac203a1 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -660,10 +660,12 @@ void ModuleBitcodeWriter::writeAttributeTable() {
SmallVector<uint64_t, 64> Record;
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- const AttributeList &A = Attrs[i];
- for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i)
- Record.push_back(
- VE.getAttributeGroupID({A.getSlotIndex(i), A.getSlotAttributes(i)}));
+ AttributeList AL = Attrs[i];
+ for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) {
+ AttributeSet AS = AL.getAttributes(i);
+ if (AS.hasAttributes())
+ Record.push_back(VE.getAttributeGroupID({i, AS}));
+ }
Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
Record.clear();
@@ -3413,30 +3415,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// Create value IDs for undefined references.
forEachSummary([&](GVInfo I) {
- if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) {
- for (auto &RI : VS->refs())
- assignValueId(RI.getGUID());
- return;
- }
-
- auto *FS = dyn_cast<FunctionSummary>(I.second);
- if (!FS)
- return;
- for (auto &RI : FS->refs())
+ for (auto &RI : I.second->refs())
assignValueId(RI.getGUID());
-
- for (auto &EI : FS->calls()) {
- GlobalValue::GUID GUID = EI.first.getGUID();
- if (!hasValueId(GUID)) {
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- GUID = Index.getGUIDFromOriginalID(GUID);
- if (GUID == 0 || !hasValueId(GUID))
- continue;
- }
- assignValueId(GUID);
- }
});
for (const auto &GVI : valueIds()) {
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index fd76400331d9..bb626baabd12 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -902,8 +902,11 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
}
// Do lookups for all attribute groups.
- for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
- IndexAndAttrSet Pair = {PAL.getSlotIndex(i), PAL.getSlotAttributes(i)};
+ for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) {
+ AttributeSet AS = PAL.getAttributes(i);
+ if (!AS.hasAttributes())
+ continue;
+ IndexAndAttrSet Pair = {i, AS};
unsigned &Entry = AttributeGroupMap[Pair];
if (Entry == 0) {
AttributeGroups.push_back(Pair);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7ddb86d80bf0..d72cf5922987 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -628,12 +628,15 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::EmitFunctionHeader() {
+ const Function *F = MF->getFunction();
+
+ if (isVerbose())
+ OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n';
+
// Print out constants referenced by the function
EmitConstantPool();
// Print the 'header' of function.
- const Function *F = MF->getFunction();
-
OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
EmitVisibility(CurrentFnSym, F->getVisibility());
@@ -1107,6 +1110,9 @@ void AsmPrinter::EmitFunctionBody() {
HI.Handler->endFunction(MF);
}
+ if (isVerbose())
+ OutStreamer->GetCommentOS() << "-- End function\n";
+
OutStreamer->AddBlankLine();
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 1b39e46ee466..114aea391a86 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1025,11 +1025,11 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
bool EmptyPrologue = true;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
MI.getDebugLoc()) {
PrologEndLoc = MI.getDebugLoc();
break;
- } else if (!MI.isDebugValue()) {
+ } else if (!MI.isMetaInstruction()) {
EmptyPrologue = false;
}
}
@@ -1562,7 +1562,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
EnumeratorCount++;
}
}
- FTI = FLRB.end();
+ FTI = FLRB.end(true);
}
std::string FullName = getFullyQualifiedName(Ty);
@@ -1869,7 +1869,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
MemberCount++;
}
- TypeIndex FieldTI = FLBR.end();
+ TypeIndex FieldTI = FLBR.end(true);
return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
!Info.NestedClasses.empty());
}
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 8e3b88d0af0e..201030f0ac5c 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -116,65 +116,17 @@ void DIEHash::addParentContext(const DIE &Parent) {
// Collect all of the attributes for a particular DIE in single structure.
void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
-#define COLLECT_ATTR(NAME) \
- case dwarf::NAME: \
- Attrs.NAME = V; \
- break
for (const auto &V : Die.values()) {
DEBUG(dbgs() << "Attribute: "
<< dwarf::AttributeString(V.getAttribute())
<< " added.\n");
switch (V.getAttribute()) {
- COLLECT_ATTR(DW_AT_name);
- COLLECT_ATTR(DW_AT_accessibility);
- COLLECT_ATTR(DW_AT_address_class);
- COLLECT_ATTR(DW_AT_allocated);
- COLLECT_ATTR(DW_AT_artificial);
- COLLECT_ATTR(DW_AT_associated);
- COLLECT_ATTR(DW_AT_binary_scale);
- COLLECT_ATTR(DW_AT_bit_offset);
- COLLECT_ATTR(DW_AT_bit_size);
- COLLECT_ATTR(DW_AT_bit_stride);
- COLLECT_ATTR(DW_AT_byte_size);
- COLLECT_ATTR(DW_AT_byte_stride);
- COLLECT_ATTR(DW_AT_const_expr);
- COLLECT_ATTR(DW_AT_const_value);
- COLLECT_ATTR(DW_AT_containing_type);
- COLLECT_ATTR(DW_AT_count);
- COLLECT_ATTR(DW_AT_data_bit_offset);
- COLLECT_ATTR(DW_AT_data_location);
- COLLECT_ATTR(DW_AT_data_member_location);
- COLLECT_ATTR(DW_AT_decimal_scale);
- COLLECT_ATTR(DW_AT_decimal_sign);
- COLLECT_ATTR(DW_AT_default_value);
- COLLECT_ATTR(DW_AT_digit_count);
- COLLECT_ATTR(DW_AT_discr);
- COLLECT_ATTR(DW_AT_discr_list);
- COLLECT_ATTR(DW_AT_discr_value);
- COLLECT_ATTR(DW_AT_encoding);
- COLLECT_ATTR(DW_AT_enum_class);
- COLLECT_ATTR(DW_AT_endianity);
- COLLECT_ATTR(DW_AT_explicit);
- COLLECT_ATTR(DW_AT_is_optional);
- COLLECT_ATTR(DW_AT_location);
- COLLECT_ATTR(DW_AT_lower_bound);
- COLLECT_ATTR(DW_AT_mutable);
- COLLECT_ATTR(DW_AT_ordering);
- COLLECT_ATTR(DW_AT_picture_string);
- COLLECT_ATTR(DW_AT_prototyped);
- COLLECT_ATTR(DW_AT_small);
- COLLECT_ATTR(DW_AT_segment);
- COLLECT_ATTR(DW_AT_string_length);
- COLLECT_ATTR(DW_AT_threads_scaled);
- COLLECT_ATTR(DW_AT_upper_bound);
- COLLECT_ATTR(DW_AT_use_location);
- COLLECT_ATTR(DW_AT_use_UTF8);
- COLLECT_ATTR(DW_AT_variable_parameter);
- COLLECT_ATTR(DW_AT_virtuality);
- COLLECT_ATTR(DW_AT_visibility);
- COLLECT_ATTR(DW_AT_vtable_elem_location);
- COLLECT_ATTR(DW_AT_type);
+#define HANDLE_DIE_HASH_ATTR(NAME) \
+ case dwarf::NAME: \
+ Attrs.NAME = V; \
+ break;
+#include "DIEHashAttributes.def"
default:
break;
}
@@ -366,62 +318,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
// Go through the attributes from \param Attrs in the order specified in 7.27.4
// and hash them.
void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
-#define ADD_ATTR(ATTR) \
+#define HANDLE_DIE_HASH_ATTR(NAME) \
{ \
- if (ATTR) \
- hashAttribute(ATTR, Tag); \
+ if (Attrs.NAME) \
+ hashAttribute(Attrs.NAME, Tag); \
}
-
- ADD_ATTR(Attrs.DW_AT_name);
- ADD_ATTR(Attrs.DW_AT_accessibility);
- ADD_ATTR(Attrs.DW_AT_address_class);
- ADD_ATTR(Attrs.DW_AT_allocated);
- ADD_ATTR(Attrs.DW_AT_artificial);
- ADD_ATTR(Attrs.DW_AT_associated);
- ADD_ATTR(Attrs.DW_AT_binary_scale);
- ADD_ATTR(Attrs.DW_AT_bit_offset);
- ADD_ATTR(Attrs.DW_AT_bit_size);
- ADD_ATTR(Attrs.DW_AT_bit_stride);
- ADD_ATTR(Attrs.DW_AT_byte_size);
- ADD_ATTR(Attrs.DW_AT_byte_stride);
- ADD_ATTR(Attrs.DW_AT_const_expr);
- ADD_ATTR(Attrs.DW_AT_const_value);
- ADD_ATTR(Attrs.DW_AT_containing_type);
- ADD_ATTR(Attrs.DW_AT_count);
- ADD_ATTR(Attrs.DW_AT_data_bit_offset);
- ADD_ATTR(Attrs.DW_AT_data_location);
- ADD_ATTR(Attrs.DW_AT_data_member_location);
- ADD_ATTR(Attrs.DW_AT_decimal_scale);
- ADD_ATTR(Attrs.DW_AT_decimal_sign);
- ADD_ATTR(Attrs.DW_AT_default_value);
- ADD_ATTR(Attrs.DW_AT_digit_count);
- ADD_ATTR(Attrs.DW_AT_discr);
- ADD_ATTR(Attrs.DW_AT_discr_list);
- ADD_ATTR(Attrs.DW_AT_discr_value);
- ADD_ATTR(Attrs.DW_AT_encoding);
- ADD_ATTR(Attrs.DW_AT_enum_class);
- ADD_ATTR(Attrs.DW_AT_endianity);
- ADD_ATTR(Attrs.DW_AT_explicit);
- ADD_ATTR(Attrs.DW_AT_is_optional);
- ADD_ATTR(Attrs.DW_AT_location);
- ADD_ATTR(Attrs.DW_AT_lower_bound);
- ADD_ATTR(Attrs.DW_AT_mutable);
- ADD_ATTR(Attrs.DW_AT_ordering);
- ADD_ATTR(Attrs.DW_AT_picture_string);
- ADD_ATTR(Attrs.DW_AT_prototyped);
- ADD_ATTR(Attrs.DW_AT_small);
- ADD_ATTR(Attrs.DW_AT_segment);
- ADD_ATTR(Attrs.DW_AT_string_length);
- ADD_ATTR(Attrs.DW_AT_threads_scaled);
- ADD_ATTR(Attrs.DW_AT_upper_bound);
- ADD_ATTR(Attrs.DW_AT_use_location);
- ADD_ATTR(Attrs.DW_AT_use_UTF8);
- ADD_ATTR(Attrs.DW_AT_variable_parameter);
- ADD_ATTR(Attrs.DW_AT_virtuality);
- ADD_ATTR(Attrs.DW_AT_visibility);
- ADD_ATTR(Attrs.DW_AT_vtable_elem_location);
- ADD_ATTR(Attrs.DW_AT_type);
-
+#include "DIEHashAttributes.def"
// FIXME: Add the extended attributes.
}
@@ -478,10 +380,12 @@ void DIEHash::computeHash(const DIE &Die) {
/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
/// with the inclusion of the full CU and all top level CU entities.
// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
-uint64_t DIEHash::computeCUSignature(const DIE &Die) {
+uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) {
Numbering.clear();
Numbering[&Die] = 1;
+ if (!DWOName.empty())
+ Hash.update(DWOName);
// Hash the DIE.
computeHash(Die);
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 996cd7ef3d2e..29337ae38a99 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -28,64 +28,15 @@ class CompileUnit;
class DIEHash {
// Collection of all attributes used in hashing a particular DIE.
struct DIEAttrs {
- DIEValue DW_AT_name;
- DIEValue DW_AT_accessibility;
- DIEValue DW_AT_address_class;
- DIEValue DW_AT_allocated;
- DIEValue DW_AT_artificial;
- DIEValue DW_AT_associated;
- DIEValue DW_AT_binary_scale;
- DIEValue DW_AT_bit_offset;
- DIEValue DW_AT_bit_size;
- DIEValue DW_AT_bit_stride;
- DIEValue DW_AT_byte_size;
- DIEValue DW_AT_byte_stride;
- DIEValue DW_AT_const_expr;
- DIEValue DW_AT_const_value;
- DIEValue DW_AT_containing_type;
- DIEValue DW_AT_count;
- DIEValue DW_AT_data_bit_offset;
- DIEValue DW_AT_data_location;
- DIEValue DW_AT_data_member_location;
- DIEValue DW_AT_decimal_scale;
- DIEValue DW_AT_decimal_sign;
- DIEValue DW_AT_default_value;
- DIEValue DW_AT_digit_count;
- DIEValue DW_AT_discr;
- DIEValue DW_AT_discr_list;
- DIEValue DW_AT_discr_value;
- DIEValue DW_AT_encoding;
- DIEValue DW_AT_enum_class;
- DIEValue DW_AT_endianity;
- DIEValue DW_AT_explicit;
- DIEValue DW_AT_is_optional;
- DIEValue DW_AT_location;
- DIEValue DW_AT_lower_bound;
- DIEValue DW_AT_mutable;
- DIEValue DW_AT_ordering;
- DIEValue DW_AT_picture_string;
- DIEValue DW_AT_prototyped;
- DIEValue DW_AT_small;
- DIEValue DW_AT_segment;
- DIEValue DW_AT_string_length;
- DIEValue DW_AT_threads_scaled;
- DIEValue DW_AT_upper_bound;
- DIEValue DW_AT_use_location;
- DIEValue DW_AT_use_UTF8;
- DIEValue DW_AT_variable_parameter;
- DIEValue DW_AT_virtuality;
- DIEValue DW_AT_visibility;
- DIEValue DW_AT_vtable_elem_location;
- DIEValue DW_AT_type;
-
- // Insert any additional ones here...
+#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME;
+#include "DIEHashAttributes.def"
};
public:
DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
/// \brief Computes the CU signature.
- uint64_t computeCUSignature(const DIE &Die);
+ uint64_t computeCUSignature(StringRef DWOName, const DIE &Die);
/// \brief Computes the type signature.
uint64_t computeTypeSignature(const DIE &Die);
diff --git a/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
new file mode 100644
index 000000000000..28a02390fccb
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
@@ -0,0 +1,55 @@
+#ifndef HANDLE_DIE_HASH_ATTR
+#error "Missing macro definition of HANDLE_DIE_HASH_ATTR"
+#endif
+
+HANDLE_DIE_HASH_ATTR(DW_AT_name)
+HANDLE_DIE_HASH_ATTR(DW_AT_accessibility)
+HANDLE_DIE_HASH_ATTR(DW_AT_address_class)
+HANDLE_DIE_HASH_ATTR(DW_AT_allocated)
+HANDLE_DIE_HASH_ATTR(DW_AT_artificial)
+HANDLE_DIE_HASH_ATTR(DW_AT_associated)
+HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_size)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride)
+HANDLE_DIE_HASH_ATTR(DW_AT_byte_size)
+HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride)
+HANDLE_DIE_HASH_ATTR(DW_AT_const_expr)
+HANDLE_DIE_HASH_ATTR(DW_AT_const_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_containing_type)
+HANDLE_DIE_HASH_ATTR(DW_AT_count)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale)
+HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign)
+HANDLE_DIE_HASH_ATTR(DW_AT_default_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_digit_count)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr_list)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_encoding)
+HANDLE_DIE_HASH_ATTR(DW_AT_enum_class)
+HANDLE_DIE_HASH_ATTR(DW_AT_endianity)
+HANDLE_DIE_HASH_ATTR(DW_AT_explicit)
+HANDLE_DIE_HASH_ATTR(DW_AT_is_optional)
+HANDLE_DIE_HASH_ATTR(DW_AT_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound)
+HANDLE_DIE_HASH_ATTR(DW_AT_mutable)
+HANDLE_DIE_HASH_ATTR(DW_AT_ordering)
+HANDLE_DIE_HASH_ATTR(DW_AT_picture_string)
+HANDLE_DIE_HASH_ATTR(DW_AT_prototyped)
+HANDLE_DIE_HASH_ATTR(DW_AT_small)
+HANDLE_DIE_HASH_ATTR(DW_AT_segment)
+HANDLE_DIE_HASH_ATTR(DW_AT_string_length)
+HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled)
+HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound)
+HANDLE_DIE_HASH_ATTR(DW_AT_use_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8)
+HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter)
+HANDLE_DIE_HASH_ATTR(DW_AT_virtuality)
+HANDLE_DIE_HASH_ATTR(DW_AT_visibility)
+HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_type)
+
+#undef HANDLE_DIE_HASH_ATTR
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 826162ad47c4..0971c5942203 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -115,7 +115,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
return getBaseTypeSize(BaseType);
}
-bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) {
+static bool hasDebugInfo(const MachineModuleInfo *MMI,
+ const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return false;
auto *SP = MF->getFunction()->getSubprogram();
@@ -223,9 +224,9 @@ void DebugHandlerBase::endInstruction() {
return;
assert(CurMI != nullptr);
- // Don't create a new label after DBG_VALUE instructions.
- // They don't generate code.
- if (!CurMI->isDebugValue()) {
+ // Don't create a new label after DBG_VALUE and other instructions that don't
+ // generate code.
+ if (!CurMI->isMetaInstruction()) {
PrevLabel = nullptr;
PrevInstBB = CurMI->getParent();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index e172712cf889..04073b3aed68 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -760,7 +760,7 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
/// addGlobalName - Add a new global name to the compile unit.
void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
- if (includeMinimalInlineScopes())
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
return;
std::string FullName = getParentContextString(Context) + Name.str();
GlobalNames[FullName] = &Die;
@@ -768,7 +768,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
const DIScope *Context) {
- if (includeMinimalInlineScopes())
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
return;
std::string FullName = getParentContextString(Context) + Name.str();
// Insert, allowing the entry to remain as-is if it's already present
@@ -781,7 +781,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
/// Add a new global type to the unit.
void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
- if (includeMinimalInlineScopes())
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
GlobalTypes[FullName] = &Die;
@@ -789,7 +789,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
const DIScope *Context) {
- if (includeMinimalInlineScopes())
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
// Insert, allowing the entry to remain as-is if it's already present
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 77e9e671529f..b8f57472f17c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -77,8 +77,6 @@ class DwarfCompileUnit final : public DwarfUnit {
bool isDwoUnit() const override;
- bool includeMinimalInlineScopes() const;
-
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return AbstractSPDies;
@@ -101,6 +99,8 @@ public:
return Skeleton;
}
+ bool includeMinimalInlineScopes() const;
+
void initStmtList();
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 3410b98d7776..bf27516e1ccd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -252,12 +252,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Handle split DWARF.
HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty();
- // Pubnames/pubtypes on by default for GDB.
- if (DwarfPubSections == Default)
- HasDwarfPubSections = tuneForGDB();
- else
- HasDwarfPubSections = DwarfPubSections == Enable;
-
// SCE defaults to linkage names only for abstract subprograms.
if (DwarfLinkageNames == DefaultLinkageNames)
UseAllLinkageNames = !tuneForSCE();
@@ -380,19 +374,35 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- auto &CU = *CUMap.lookup(SP->getUnit());
- if (auto *SkelCU = CU.getSkeleton()) {
- (shareAcrossDWOCUs() ? CU : SrcCU)
- .constructAbstractSubprogramScopeDIE(Scope);
- if (CU.getCUNode()->getSplitDebugInlining())
- SkelCU->constructAbstractSubprogramScopeDIE(Scope);
- } else {
- CU.constructAbstractSubprogramScopeDIE(Scope);
+ if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining())
+ // Avoid building the original CU if it won't be used
+ SrcCU.constructAbstractSubprogramScopeDIE(Scope);
+ else {
+ auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ if (auto *SkelCU = CU.getSkeleton()) {
+ (shareAcrossDWOCUs() ? CU : SrcCU)
+ .constructAbstractSubprogramScopeDIE(Scope);
+ if (CU.getCUNode()->getSplitDebugInlining())
+ SkelCU->constructAbstractSubprogramScopeDIE(Scope);
+ } else
+ CU.constructAbstractSubprogramScopeDIE(Scope);
}
}
-void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
- if (!GenerateGnuPubSections)
+bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const {
+ // Opting in to GNU Pubnames/types overrides the default to ensure these are
+ // generated for things like Gold's gdb_index generation.
+ if (GenerateGnuPubSections)
+ return true;
+
+ if (DwarfPubSections == Default)
+ return tuneForGDB() && !includeMinimalInlineScopes;
+
+ return DwarfPubSections == Enable;
+}
+
+void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
+ if (!hasDwarfPubSections(U.includeMinimalInlineScopes()))
return;
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
@@ -401,7 +411,9 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
// Create new DwarfCompileUnit for the given metadata node with tag
// DW_TAG_compile_unit.
DwarfCompileUnit &
-DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
+DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
+ if (auto *CU = CUMap.lookup(DIUnit))
+ return *CU;
StringRef FN = DIUnit->getFilename();
CompilationDir = DIUnit->getDirectory();
@@ -534,7 +546,12 @@ void DwarfDebug::beginModule() {
}
for (DICompileUnit *CUNode : M->debug_compile_units()) {
- DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
+ if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
+ CUNode->getGlobalVariables().empty() &&
+ CUNode->getImportedEntities().empty() && CUNode->getMacros().empty())
+ continue;
+
+ DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
CU.addImportedEntity(IE);
@@ -581,11 +598,12 @@ void DwarfDebug::finishVariableDefinitions() {
}
void DwarfDebug::finishSubprogramDefinitions() {
- for (const DISubprogram *SP : ProcessedSPNodes)
- if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
- forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
- CU.finishSubprogramDefinition(SP);
- });
+ for (const DISubprogram *SP : ProcessedSPNodes) {
+ assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug);
+ forBothCUs(
+ getOrCreateDwarfCompileUnit(SP->getUnit()),
+ [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); });
+ }
}
void DwarfDebug::finalizeModuleInfo() {
@@ -595,6 +613,13 @@ void DwarfDebug::finalizeModuleInfo() {
finishVariableDefinitions();
+ // Include the DWO file name in the hash if there's more than one CU.
+ // This handles ThinLTO's situation where imported CUs may very easily be
+ // duplicate with the same CU partially imported into another ThinLTO unit.
+ StringRef DWOName;
+ if (CUMap.size() > 1)
+ DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;
+
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -609,7 +634,8 @@ void DwarfDebug::finalizeModuleInfo() {
auto *SkCU = TheCU.getSkeleton();
if (useSplitDwarf()) {
// Emit a unique identifier for this CU.
- uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie());
+ uint64_t ID =
+ DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
@@ -718,7 +744,9 @@ void DwarfDebug::endModule() {
}
// Emit the pubnames and pubtypes sections if requested.
- if (HasDwarfPubSections) {
+ // The condition is optimistically correct - any CU not using GMLT (&
+ // implicit/default pubnames state) might still have pubnames.
+ if (hasDwarfPubSections(/* gmlt */ false)) {
emitDebugPubNames(GenerateGnuPubSections);
emitDebugPubTypes(GenerateGnuPubSections);
}
@@ -1028,8 +1056,12 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
assert(CurMI);
+ const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram();
+ if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
+ return;
+
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
- if (MI->isDebugValue() || MI->isCFIInstruction())
+ if (MI->isMetaInstruction())
return;
const DebugLoc &DL = MI->getDebugLoc();
// When we emit a line-0 record, we don't update PrevInstLoc; so look at
@@ -1111,7 +1143,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// the beginning of the function body.
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
- if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
MI.getDebugLoc())
return MI.getDebugLoc();
return DebugLoc();
@@ -1122,40 +1154,28 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
- if (LScopes.empty())
+ auto *SP = MF->getFunction()->getSubprogram();
+ assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
+ if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
+ DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- // FnScope->getScopeNode() and DI->second should represent the same function,
- // though they may not be the same MDNode due to inline functions merged in
- // LTO where the debug info metadata still differs (either due to distinct
- // written differences - two versions of a linkonce_odr function
- // written/copied into two separate files, or some sub-optimal metadata that
- // isn't structurally identical (see: file path/name info from clang, which
- // includes the directory of the cpp file being built, even when the file name
- // is absolute (such as an <> lookup header)))
- auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
- DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit());
- if (!TheCU) {
- assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug &&
- "DICompileUnit missing from llvm.dbg.cu?");
- return;
- }
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
else
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
// Record beginning of function.
PrologEndLoc = findPrologueEndLoc(MF);
- if (DILocation *L = PrologEndLoc) {
+ if (PrologEndLoc) {
// We'd like to list the prologue as "not statements" but GDB behaves
// poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
- auto *SP = L->getInlinedAtScope()->getSubprogram();
+ auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
}
}
@@ -1395,7 +1415,7 @@ void DwarfDebug::emitDebugPubSection(
const auto &Globals = (TheU->*Accessor)();
- if (Globals.empty())
+ if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes()))
continue;
if (auto *Skeleton = TheU->getSkeleton())
@@ -1544,6 +1564,9 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
// Emit locations into the debug loc section.
void DwarfDebug::emitDebugLoc() {
+ if (DebugLocs.getLists().empty())
+ return;
+
// Start the dwarf loc section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
@@ -1755,6 +1778,9 @@ void DwarfDebug::emitDebugARanges() {
/// Emit address ranges into a debug ranges section.
void DwarfDebug::emitDebugRanges() {
+ if (CUMap.empty())
+ return;
+
// Start the dwarf ranges section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
@@ -1834,6 +1860,9 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
/// Emit macros into a debug macinfo section.
void DwarfDebug::emitDebugMacinfo() {
+ if (CUMap.empty())
+ return;
+
// Start the dwarf macinfo section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfMacinfoSection());
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index b9c5aa9ffb23..ebfba4cfc275 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -246,9 +246,6 @@ class DwarfDebug : public DebugHandlerBase {
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
- /// Whether to emit the pubnames/pubtypes sections.
- bool HasDwarfPubSections;
-
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
@@ -415,11 +412,11 @@ class DwarfDebug : public DebugHandlerBase {
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
- void addGnuPubAttributes(DwarfUnit &U, DIE &D) const;
+ void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;
/// Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
- DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
+ DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
/// Construct imported_module or imported_declaration DIE.
void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
@@ -556,6 +553,8 @@ public:
/// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+ bool hasDwarfPubSections(bool includeMinimalInlineScopes) const;
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 984973cf3a3b..344136b1f195 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -96,7 +96,7 @@ namespace {
char AtomicExpand::ID = 0;
char &llvm::AtomicExpandID = AtomicExpand::ID;
-INITIALIZE_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
+INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
false, false)
FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index a67e194356d8..d3fced436b68 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -24,8 +24,6 @@
#include <utility>
using namespace llvm;
-#define DEBUG_TYPE "basictti"
-
// This flag is used by the template base class for BasicTTIImpl, and here to
// provide a definition.
cl::opt<unsigned>
diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp
index efdf300df850..2c41b597843c 100644
--- a/lib/CodeGen/BranchCoalescing.cpp
+++ b/lib/CodeGen/BranchCoalescing.cpp
@@ -27,7 +27,7 @@
using namespace llvm;
-#define DEBUG_TYPE "coal-branch"
+#define DEBUG_TYPE "branch-coalescing"
static cl::opt<cl::boolOrDefault>
EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
@@ -193,11 +193,11 @@ public:
char BranchCoalescing::ID = 0;
char &llvm::BranchCoalescingID = BranchCoalescing::ID;
-INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing",
+INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE,
"Branch Coalescing", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing",
+INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
false, false)
BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index b63d9f4a4351..03ceac10beec 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -44,7 +44,7 @@
#include <algorithm>
using namespace llvm;
-#define DEBUG_TYPE "branchfolding"
+#define DEBUG_TYPE "branch-folder"
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
@@ -89,7 +89,7 @@ namespace {
char BranchFolderPass::ID = 0;
char &llvm::BranchFolderPassID = BranchFolderPass::ID;
-INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,
"Control Flow Optimizer", false, false)
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
@@ -153,13 +153,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
TriedMerging.clear();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
AfterBlockPlacement = AfterPlacement;
TII = tii;
TRI = tri;
MMI = mmi;
MLI = mli;
+ this->MRI = &MRI;
- MachineRegisterInfo &MRI = MF.getRegInfo();
UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
if (!UpdateLiveIns)
MRI.invalidateLiveness();
@@ -351,7 +352,7 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
if (UpdateLiveIns) {
NewDest->clearLiveIns();
- computeLiveIns(LiveRegs, *TRI, *NewDest);
+ computeLiveIns(LiveRegs, *MRI, *NewDest);
}
++NumTailMerge;
@@ -388,7 +389,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
if (UpdateLiveIns)
- computeLiveIns(LiveRegs, *TRI, *NewMBB);
+ computeLiveIns(LiveRegs, *MRI, *NewMBB);
// Add the new block to the funclet.
const auto &FuncletI = FuncletMembership.find(&CurMBB);
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 4852721eea10..92681137e4c6 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -108,6 +108,7 @@ namespace llvm {
bool UpdateLiveIns;
unsigned MinCommonTailLength;
const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
MachineLoopInfo *MLI;
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 7af136941661..e3de61c7816f 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -259,7 +259,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
// Need to fix live-in lists if we track liveness.
if (TRI->trackLivenessAfterRegAlloc(*MF))
- computeLiveIns(LiveRegs, *TRI, *NewBB);
+ computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB);
++NumSplit;
@@ -345,6 +345,10 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
// Do it here since if there's no split, no update is needed.
MBB->replaceSuccessor(FBB, &NewBB);
NewBB.addSuccessor(FBB);
+
+ // Need to fix live-in lists if we track liveness.
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB);
}
// We now have an appropriate fall-through block in place (either naturally or
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 3a1a3020a8d4..4e85708efafc 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -257,10 +257,10 @@ class TypePromotionTransaction;
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 7ac2e5445435..265dda16bfa7 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -23,7 +23,7 @@
using namespace llvm;
-#define DEBUG_TYPE "codegen-dce"
+#define DEBUG_TYPE "dead-mi-elimination"
STATISTIC(NumDeletes, "Number of dead instructions deleted");
@@ -54,7 +54,7 @@ namespace {
char DeadMachineInstructionElim::ID = 0;
char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
-INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE,
"Remove dead machine instructions", false, false)
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index 6f4ea1912cf4..ab9a0592e017 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -132,8 +132,7 @@ private:
char DetectDeadLanes::ID = 0;
char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
-INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes",
- false, false)
+INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
/// Returns true if \p MI will get lowered to a series of COPY instructions.
/// We call this a COPY-like instruction.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 1ef4d8660657..06ae5cd72c85 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -71,12 +71,12 @@ namespace {
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE,
"Prepare DWARF exceptions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE,
"Prepare DWARF exceptions", false, false)
FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); }
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 729172796453..402afe75b141 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -616,13 +616,13 @@ private:
char EarlyIfConverter::ID = 0;
char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
-INITIALIZE_PASS_BEGIN(EarlyIfConverter,
- "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
+ "Early If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(EarlyIfConverter,
- "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
+ "Early If Converter", false, false)
void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 0ec79c2e69f9..88d422a0f545 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -41,7 +41,7 @@ namespace {
char ExpandISelPseudos::ID = 0;
char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
-INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE,
"Expand ISel Pseudo-instructions", false, false)
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e860906043dd..27cd639b2a49 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -58,7 +58,7 @@ private:
char ExpandPostRA::ID = 0;
char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
-INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE,
"Post-RA pseudo instruction expansion pass", false, false)
/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered
diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp
index d61afad4db57..0bdd5e64a7f2 100644
--- a/lib/CodeGen/FuncletLayout.cpp
+++ b/lib/CodeGen/FuncletLayout.cpp
@@ -37,7 +37,7 @@ public:
char FuncletLayout::ID = 0;
char &llvm::FuncletLayoutID = FuncletLayout::ID;
-INITIALIZE_PASS(FuncletLayout, "funclet-layout",
+INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE,
"Contiguously Lay Out Funclets", false, false)
bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt
index 03a8c4f5f909..eba7ea8132e3 100644
--- a/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -8,6 +8,7 @@ set(GLOBAL_ISEL_FILES
LegalizerHelper.cpp
Legalizer.cpp
LegalizerInfo.cpp
+ Localizer.cpp
RegBankSelect.cpp
RegisterBank.cpp
RegisterBankInfo.cpp
@@ -24,11 +25,11 @@ endif()
# In LLVMBuild.txt files, it is not possible to mark a dependency to a
# library as optional. So instead, generate an empty library if we did
-# not ask for it.
+# not ask for it.
add_llvm_library(LLVMGlobalISel
${GLOBAL_ISEL_BUILD_FILES}
GlobalISel.cpp
-
+
DEPENDS
intrinsics_gen
)
diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp
index fcd2722f1c2f..29d1209bb02a 100644
--- a/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -26,6 +26,7 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
initializeLegalizerPass(Registry);
+ initializeLocalizerPass(Registry);
initializeRegBankSelectPass(Registry);
initializeInstructionSelectPass(Registry);
}
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
new file mode 100644
index 000000000000..bdca732b4e33
--- /dev/null
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -0,0 +1,125 @@
+//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the Localizer class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "localizer"
+
+using namespace llvm;
+
+char Localizer::ID = 0;
+INITIALIZE_PASS(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use", false,
+ false);
+
+Localizer::Localizer() : MachineFunctionPass(ID) {
+ initializeLocalizerPass(*PassRegistry::getPassRegistry());
+}
+
+void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+
+bool Localizer::shouldLocalize(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Constants-like instructions should be close to their users.
+ // We don't want long live-ranges for them.
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FRAME_INDEX:
+ return true;
+ }
+}
+
+bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
+ MachineBasicBlock *&InsertMBB) {
+ MachineInstr &MIUse = *MOUse.getParent();
+ InsertMBB = MIUse.getParent();
+ if (MIUse.isPHI())
+ InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
+ return InsertMBB == Def.getParent();
+}
+
+bool Localizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
+
+ init(MF);
+
+ bool Changed = false;
+ // Keep track of the instructions we localized.
+ // We won't need to process them if we see them later in the CFG.
+ SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
+ DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+ // TODO: Do bottom up traversal.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
+ continue;
+ DEBUG(dbgs() << "Should localize: " << MI);
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "More than one definition not supported yet");
+ unsigned Reg = MI.getOperand(0).getReg();
+ // Check if all the users of MI are local.
+ // We are going to invalidation the list of use operands, so we
+ // can't use range iterator.
+ for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+ MOIt != MOItEnd;) {
+ MachineOperand &MOUse = *MOIt++;
+ // Check if the use is already local.
+ MachineBasicBlock *InsertMBB;
+ DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ if (isLocalUse(MOUse, MI, InsertMBB))
+ continue;
+ DEBUG(dbgs() << "Fixing non-local use\n");
+ Changed = true;
+ auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+ auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+ if (NewVRegIt == MBBWithLocalDef.end()) {
+ // Create the localized instruction.
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+ LocalizedInstrs.insert(LocalizedMI);
+ // Move it at the right place.
+ MachineInstr &MIUse = *MOUse.getParent();
+ if (MIUse.getParent() == InsertMBB)
+ InsertMBB->insert(MIUse, LocalizedMI);
+ else
+ InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI);
+
+ // Set a new register for the definition.
+ unsigned NewReg =
+ MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+ LocalizedMI->getOperand(0).setReg(NewReg);
+ NewVRegIt =
+ MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+ DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ }
+ DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second)
+ << '\n');
+ // Update the user reg.
+ MOUse.setReg(NewVRegIt->second);
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 1ea534939948..23812a2a2344 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -192,10 +192,7 @@ namespace {
} // end anonymous namespace
char GlobalMerge::ID = 0;
-INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables",
- false, false)
-INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
- false, false)
+INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 628d599a3cc7..1c33f3b6800e 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -39,7 +39,7 @@
using namespace llvm;
-#define DEBUG_TYPE "ifcvt"
+#define DEBUG_TYPE "if-converter"
// Hidden options for help debugging.
static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
@@ -316,9 +316,9 @@ namespace {
char &llvm::IfConverterID = IfConverter::ID;
-INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
@@ -1588,32 +1588,22 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
}
- // To be able to insert code freely at the end of BBI we sometimes remove
- // the branch from BBI to NextMBB temporarily. Remember if this happened.
- bool RemovedBranchToNextMBB = false;
if (CvtMBB.pred_size() > 1) {
BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
- // Keep the CFG updated.
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
- // Remove the branch from the entry of the triangle to NextBB to be able to
- // do the merge below. Keep the CFG updated, but remember we removed the
- // branch since we do want to execute NextMBB, either by introducing a
- // branch to it again, or merging it into the entry block.
- // How it's handled is decided further down.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
- BBI.BB->removeSuccessor(&NextMBB, true);
- RemovedBranchToNextMBB = true;
-
// Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI, false);
}
@@ -1651,19 +1641,12 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// block. By not merging them, we make it possible to iteratively
// ifcvt the blocks.
if (!HasEarlyExit &&
- // We might have removed BBI from NextMBB's predecessor list above but
- // we want it to be there, so consider that too.
- (NextMBB.pred_size() == (RemovedBranchToNextMBB ? 0 : 1)) &&
- !NextBBI->HasFallThrough &&
+ NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
!NextMBB.hasAddressTaken()) {
- // We will merge NextBBI into BBI, and thus remove the current
- // fallthrough from BBI into CvtBBI.
- BBI.BB->removeSuccessor(&CvtMBB, true);
MergeBlocks(BBI, *NextBBI);
FalseBBDead = true;
} else {
InsertUncondBranch(*BBI.BB, NextMBB, TII);
- BBI.BB->addSuccessor(&NextMBB);
BBI.HasFallThrough = false;
}
// Mixed predicated and unpredicated code. This cannot be iteratively
@@ -1671,6 +1654,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
IterIfcvt = false;
}
+ RemoveExtraEdges(BBI);
+
// Update block info. BB can be iteratively if-converted.
if (!IterIfcvt)
BBI.IsDone = true;
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 920c2a372a9b..24e289dd4f1b 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -674,8 +674,8 @@ void ImplicitNullChecks::rewriteNullChecks(
char ImplicitNullChecks::ID = 0;
char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
-INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
+INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE,
"Implicit null checks", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks",
+INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE,
"Implicit null checks", false, false)
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index bb29db301a95..ee4929c91482 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -107,13 +107,11 @@ private:
} // end anonymous namespace.
char InterleavedAccess::ID = 0;
-INITIALIZE_PASS_BEGIN(
- InterleavedAccess, "interleaved-access",
+INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(
- InterleavedAccess, "interleaved-access",
+INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 275d84e2c185..40ee7ea785f0 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -86,8 +86,9 @@ void LexicalScopes::extractLexicalScopes(
continue;
}
- // Ignore DBG_VALUE. It does not contribute to any instruction in output.
- if (MInsn.isDebugValue())
+ // Ignore DBG_VALUE and similar instruction that do not contribute to any
+ // instruction in the output.
+ if (MInsn.isMetaInstruction())
continue;
if (RangeBeginMI) {
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index f956974b1aaf..b5e705f6455d 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -43,7 +43,7 @@
using namespace llvm;
-#define DEBUG_TYPE "live-debug-values"
+#define DEBUG_TYPE "livedebugvalues"
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
@@ -283,7 +283,7 @@ public:
char LiveDebugValues::ID = 0;
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
-INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis",
+INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis",
false, false)
/// Default construct and initialize the pass.
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index bcf7c8e99c7f..bbd783367c9e 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -45,7 +45,7 @@
using namespace llvm;
-#define DEBUG_TYPE "livedebug"
+#define DEBUG_TYPE "livedebugvars"
static cl::opt<bool>
EnableLDV("live-debug-variables", cl::init(true),
@@ -54,11 +54,11 @@ EnableLDV("live-debug-variables", cl::init(true),
STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
char LiveDebugVariables::ID = 0;
-INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE,
"Debug Variable Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE,
"Debug Variable Analysis", false, false)
void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 3f5b8e19d1f0..0c05dbeacba0 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1,4 +1,4 @@
-//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,28 +14,45 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "LiveRangeCalc.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "regalloc"
@@ -59,11 +76,13 @@ static bool EnablePrecomputePhysRegs = false;
#endif // NDEBUG
namespace llvm {
+
cl::opt<bool> UseSegmentSetForPhysRegs(
"use-segment-set-for-physregs", cl::Hidden, cl::init(true),
cl::desc(
"Use segment set for the computation of the live ranges of physregs."));
-}
+
+} // end namespace llvm
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -78,8 +97,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
- DomTree(nullptr), LRCalc(nullptr) {
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) {
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
}
@@ -168,12 +186,10 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
#endif
LiveInterval* LiveIntervals::createInterval(unsigned reg) {
- float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ?
- llvm::huge_valf : 0.0F;
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
-
/// Compute the live interval of a virtual register, based on defs and uses.
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
@@ -337,7 +353,7 @@ static void createSegmentsForValues(LiveRange &LR,
}
}
-typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList;
+using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
ShrinkToUsesWorkList &WorkList,
@@ -593,7 +609,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// Find all blocks that are reachable from KillMBB without leaving VNI's live
// range. It is possible that KillMBB itself is reachable, so start a DFS
// from each successor.
- typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
+ using VisitedTy = df_iterator_default_set<MachineBasicBlock*,9>;
VisitedTy Visited;
for (MachineBasicBlock *Succ : KillMBB->successors()) {
for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
@@ -822,7 +838,6 @@ LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) {
return S;
}
-
//===----------------------------------------------------------------------===//
// Register mask functions
//===----------------------------------------------------------------------===//
@@ -855,7 +870,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
return false;
bool Found = false;
- for (;;) {
+ while (true) {
assert(*SlotI >= LiveI->start);
// Loop over all slots overlapping this segment.
while (*SlotI < LiveI->end) {
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 9f7d7cf54848..0dc1079b2ad4 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -53,7 +53,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
continue;
removeReg(Reg);
} else if (O->isRegMask())
- removeRegsInMask(*O, nullptr);
+ removeRegsInMask(*O);
}
// Add uses to the set.
@@ -142,66 +142,85 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins()) {
- MCSubRegIndexIterator S(LI.PhysReg, TRI);
- if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) {
- addReg(LI.PhysReg);
+ unsigned Reg = LI.PhysReg;
+ LaneBitmask Mask = LI.LaneMask;
+ MCSubRegIndexIterator S(Reg, TRI);
+ assert(Mask.any() && "Invalid livein mask");
+ if (Mask.all() || !S.isValid()) {
+ addReg(Reg);
continue;
}
for (; S.isValid(); ++S) {
unsigned SI = S.getSubRegIndex();
- if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any())
+ if ((Mask & TRI->getSubRegIndexLaneMask(SI)).any())
addReg(S.getSubReg());
}
}
}
-/// Add pristine registers to the given \p LiveRegs. This function removes
-/// actually saved callee save registers when \p InPrologueEpilogue is false.
-static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
- const MachineFrameInfo &MFI,
- const TargetRegisterInfo &TRI) {
+/// Adds all callee saved registers to \p LiveRegs.
+static void addCalleeSavedRegs(LivePhysRegs &LiveRegs,
+ const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
- ++CSR)
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
LiveRegs.addReg(*CSR);
+}
+
+/// Adds pristine registers to the given \p LiveRegs. Pristine registers are
+/// callee saved registers that are unused in the function.
+static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ addCalleeSavedRegs(LiveRegs, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveRegs.removeReg(Info.getReg());
}
void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addBlockLiveIns(*Succ);
+ if (!MBB.succ_empty()) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*Succ);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers that are saved and
+ // restored (somewhere); This does not include callee saved registers that
+ // are unused and hence not saved and restored; they are called pristine.
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ addReg(Info.getReg());
+ }
+ }
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
- const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid()) {
- if (MBB.isReturnBlock()) {
- // The return block has no successors whose live-ins we could merge
- // below. So instead we add the callee saved registers manually.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I)
- addReg(*I);
- } else {
- addPristines(*this, MF, MFI, *TRI);
- }
+ if (!MBB.succ_empty()) {
+ const MachineFunction &MF = *MBB.getParent();
+ addPristines(*this, MF);
+ addLiveOutsNoPristines(MBB);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers.
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addCalleeSavedRegs(*this, MF);
}
-
- addLiveOutsNoPristines(MBB);
}
void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid())
- addPristines(*this, MF, MFI, *TRI);
+ addPristines(*this, MF);
addBlockLiveIns(MBB);
}
-void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
+void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
+ const MachineRegisterInfo &MRI,
MachineBasicBlock &MBB) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
assert(MBB.livein_empty());
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
@@ -209,10 +228,12 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
LiveRegs.stepBackward(MI);
for (unsigned Reg : LiveRegs) {
+ if (MRI.isReserved(Reg))
+ continue;
// Skip the register if we are about to add one of its super registers.
bool ContainsSuperReg = false;
for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
- if (LiveRegs.contains(*SReg)) {
+ if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) {
ContainsSuperReg = true;
break;
}
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index dbf1f96102d1..b51f8b0aa6bb 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -25,10 +25,10 @@ using namespace llvm;
#define DEBUG_TYPE "livestacks"
char LiveStacks::ID = 0;
-INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE,
"Live Stack Slot Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_END(LiveStacks, "livestacks",
+INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE,
"Live Stack Slot Analysis", false, false)
char &llvm::LiveStacksID = LiveStacks::ID;
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index e189fb0dd89d..17cab0ae910e 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -103,10 +103,10 @@ namespace {
char LocalStackSlotPass::ID = 0;
char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
-INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc",
+INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
+INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 5fb5b747f471..0fc48d4e0b6b 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -53,7 +53,7 @@ private:
char LowerEmuTLS::ID = 0;
-INITIALIZE_PASS(LowerEmuTLS, "loweremutls",
+INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE,
"Add __emutls_[vt]. variables for emultated TLS model", false,
false)
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 9c7367b4c780..4d1ec11df46c 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -26,7 +26,7 @@
using namespace llvm;
-#define DEBUG_TYPE "block-freq"
+#define DEBUG_TYPE "machine-block-freq"
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
@@ -149,11 +149,11 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
} // end namespace llvm
-INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE,
"Machine Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE,
"Machine Block Frequency Analysis", true, true)
char MachineBlockFrequencyInfo::ID = 0;
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index adfca9a46239..c1ca8e8e83b4 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -499,13 +499,13 @@ public:
char MachineBlockPlacement::ID = 0;
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
+INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 0766f465456c..34f6bbd59e9b 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -108,12 +108,12 @@ namespace {
char MachineCSE::ID = 0;
char &llvm::MachineCSEID = MachineCSE::ID;
-INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
+ "Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
+ "Machine Common Subexpression Elimination", false, false)
/// The source register of a COPY machine instruction can be propagated to all
/// its users, and this propagation could increase the probability of finding
@@ -180,8 +180,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
I = skipDebugInstructionsForward(I, E);
if (I == E)
- // Reached end of block, register is obviously dead.
- return true;
+ // Reached end of block, we don't know if register is dead or not.
+ return false;
bool SeenDef = false;
for (const MachineOperand &MO : I->operands()) {
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 50e453e4067c..c176de16b593 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -86,11 +86,11 @@ private:
char MachineCombiner::ID = 0;
char &llvm::MachineCombinerID = MachineCombiner::ID;
-INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
+INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
"Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
+INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
false, false)
void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 7312dc5e94bd..f83b5481e0a5 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -27,7 +27,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "codegen-cp"
+#define DEBUG_TYPE "machine-cp"
STATISTIC(NumDeletes, "Number of dead copies deleted");
@@ -79,7 +79,7 @@ namespace {
char MachineCopyPropagation::ID = 0;
char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
-INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
/// Remove any entry in \p Map where the register is a subregister or equal to
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 7eb991744f01..95c62d820b0e 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -38,7 +38,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "machine-licm"
+#define DEBUG_TYPE "machinelicm"
static cl::opt<bool>
AvoidSpeculation("avoid-speculation",
@@ -237,13 +237,13 @@ namespace {
char MachineLICM::ID = 0;
char &llvm::MachineLICMID = MachineLICM::ID;
-INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
+ "Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
+ "Machine Loop Invariant Code Motion", false, false)
/// Test if the given loop is the outer-most loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index 581a8ad81149..9ea3c00a2fc4 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -901,7 +901,7 @@ namespace llvm {
ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
}
-INITIALIZE_PASS(MachineOutliner, "machine-outliner",
+INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE,
"Machine Function Outliner", false, false)
void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index d06c38cf4ed8..8f5ac8b3fc45 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -715,13 +715,13 @@ char MachinePipeliner::ID = 0;
int MachinePipeliner::NumTries = 0;
#endif
char &llvm::MachinePipelinerID = MachinePipeliner::ID;
-INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner",
+INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(MachinePipeliner, "pipeliner",
+INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
/// The "main" function for implementing Swing Modulo Scheduling.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 41e161f71e53..edc3783afa2f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -69,7 +69,7 @@
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
namespace llvm {
@@ -191,13 +191,13 @@ char MachineScheduler::ID = 0;
char &llvm::MachineSchedulerID = MachineScheduler::ID;
-INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
+INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
+INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
MachineScheduler::MachineScheduler()
@@ -532,7 +532,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// thumb2 size reduction is currently an exception, so the PostMIScheduler
// needs to do this.
if (FixKillFlags)
- Scheduler.fixupKills(&*MBB);
+ Scheduler.fixupKills(*MBB);
}
Scheduler.finalizeSchedule();
}
@@ -3233,6 +3233,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
+ // Keep clustered nodes together.
+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+ Cand.SU == DAG->getNextClusterSucc(),
+ TryCand, Cand, Cluster))
+ return;
+
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 5f87b68123f1..7c34e71a0cce 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -173,14 +173,14 @@ namespace {
char MachineSinking::ID = 0;
char &llvm::MachineSinkingID = MachineSinking::ID;
-INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
- "Machine code sinking", false, false)
+INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
+ "Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MachineSinking, "machine-sink",
- "Machine code sinking", false, false)
+INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
+ "Machine code sinking", false, false)
bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB) {
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 998a9645e68b..01391a1a0e50 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -44,12 +44,12 @@ using namespace llvm;
char MachineTraceMetrics::ID = 0;
char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
-INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
- "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
+ "Machine Trace Metrics", false, true)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineTraceMetrics,
- "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
+ "Machine Trace Metrics", false, true)
MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index b53b002f55a6..265f93c363ca 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -87,7 +87,6 @@ namespace {
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
RegMaskVector regMasks;
- RegSet regsLiveInButUnused;
SlotIndex lastIndex;
@@ -419,7 +418,6 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
regsDead.clear();
regsKilled.clear();
regMasks.clear();
- regsLiveInButUnused.clear();
MBBInfoMap.clear();
return foundErrors;
@@ -756,7 +754,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
regsLive.insert(*SubRegs);
}
}
- regsLiveInButUnused = regsLive;
const MachineFrameInfo &MFI = MF->getFrameInfo();
BitVector PR = MFI.getPristineRegs(*MF);
@@ -1268,8 +1265,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Both use and def operands can read a register.
if (MO->readsReg()) {
- regsLiveInButUnused.erase(Reg);
-
if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 2a8531f337a0..76ad668104b4 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -23,7 +23,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "phi-opt"
+#define DEBUG_TYPE "opt-phis"
STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
@@ -59,7 +59,7 @@ namespace {
char OptimizePHIs::ID = 0;
char &llvm::OptimizePHIsID = OptimizePHIs::ID;
-INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE,
"Optimize machine instruction PHIs", false, false)
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index db2264b2439d..9c898fa40d7e 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -112,11 +112,11 @@ STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
char& llvm::PHIEliminationID = PHIElimination::ID;
-INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE,
"Eliminate PHI nodes for register allocation",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE,
"Eliminate PHI nodes for register allocation", false, false)
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 61dccdde8f1d..f2249f9e37e0 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -200,7 +200,7 @@ namespace {
char &llvm::PostRASchedulerID = PostRAScheduler::ID;
-INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
"Post RA top-down list latency scheduler", false, false)
SchedulePostRATDList::SchedulePostRATDList(
@@ -367,7 +367,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Scheduler.finishBlock();
// Update register kills
- Scheduler.fixupKills(&MBB);
+ Scheduler.fixupKills(MBB);
}
return true;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index d27ea2f51867..0118580a626a 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
-#define DEBUG_TYPE "processimplicitdefs"
+#define DEBUG_TYPE "processimpdefs"
namespace {
/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
@@ -51,9 +51,7 @@ public:
char ProcessImplicitDefs::ID = 0;
char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
-INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
- "Process Implicit Definitions", false, false)
-INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE,
"Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index aaa253fde494..a9813e534c5f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -45,7 +45,7 @@
using namespace llvm;
-#define DEBUG_TYPE "pei"
+#define DEBUG_TYPE "prologepilog"
typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
@@ -129,12 +129,12 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
cl::desc("Warn for stack size bigger than the given"
" number"));
-INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false,
+INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_END(PEI, "prologepilog",
+INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
"Prologue/Epilogue Insertion & Frame Finalization", false,
false)
@@ -450,12 +450,13 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
for (MachineBasicBlock *MBB : Visited) {
MCPhysReg Reg = CSI[i].getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
- if (!MBB->isLiveIn(Reg))
+ if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
}
}
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index 2f7ee8bf414c..cc32e43968bb 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -112,11 +112,11 @@ char RenameIndependentSubregs::ID;
char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
-INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs",
+INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE,
"Rename Independent Subregisters", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs",
+INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE,
"Rename Independent Subregisters", false, false)
bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 2771fdbd737a..8584a9b7c897 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -52,7 +52,7 @@
using namespace llvm;
using namespace llvm::safestack;
-#define DEBUG_TYPE "safestack"
+#define DEBUG_TYPE "safe-stack"
namespace llvm {
@@ -820,10 +820,10 @@ public:
} // anonymous namespace
char SafeStackLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, "safe-stack",
+INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
"Safe Stack instrumentation pass", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(SafeStackLegacyPass, "safe-stack",
+INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE,
"Safe Stack instrumentation pass", false, false)
FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); }
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index dab5b91f50ad..07b43a82ca99 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -49,12 +49,8 @@ private:
} // namespace
char ScalarizeMaskedMemIntrin::ID = 0;
-INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin",
- "Scalarize unsupported masked memory intrinsics", false,
- false)
-INITIALIZE_PASS_END(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin",
- "Scalarize unsupported masked memory intrinsics", false,
- false)
+INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
+ "Scalarize unsupported masked memory intrinsics", false, false)
FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
return new ScalarizeMaskedMemIntrin();
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 18823b74c47f..8035ea80364b 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1057,179 +1057,71 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
loads.dump());
}
-void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
- // Start with no live registers.
- LiveRegs.reset();
-
- // Examine the live-in regs of all successors.
- for (const MachineBasicBlock *Succ : BB->successors()) {
- for (const auto &LI : Succ->liveins()) {
- // Repeat, for reg and all subregs.
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.set(*SubRegs);
- }
- }
-}
-
-/// \brief If we change a kill flag on the bundle instruction implicit register
-/// operands, then we also need to propagate that to any instructions inside
-/// the bundle which had the same kill state.
-static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
- bool NewKillState,
- const TargetRegisterInfo *TRI) {
- if (MI->getOpcode() != TargetOpcode::BUNDLE)
- return;
-
- // Walk backwards from the last instruction in the bundle to the first.
- // Once we set a kill flag on an instruction, we bail out, as otherwise we
- // might set it on too many operands. We will clear as many flags as we
- // can though.
- MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
- while (Begin != End) {
- if (NewKillState) {
- if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
- return;
- } else
- (--End)->clearRegisterKills(Reg, TRI);
- }
-}
-
-void ScheduleDAGInstrs::toggleKillFlag(MachineInstr &MI, MachineOperand &MO) {
- if (MO.isDebug())
- return;
-
- // Setting kill flag...
- if (!MO.isKill()) {
- MO.setIsKill(true);
- toggleBundleKillFlag(&MI, MO.getReg(), true, TRI);
- return;
- }
-
- // If MO itself is live, clear the kill flag...
- if (LiveRegs.test(MO.getReg())) {
- MO.setIsKill(false);
- toggleBundleKillFlag(&MI, MO.getReg(), false, TRI);
- return;
- }
-
- // If any subreg of MO is live, then create an imp-def for that
- // subreg and keep MO marked as killed.
- MO.setIsKill(false);
- toggleBundleKillFlag(&MI, MO.getReg(), false, TRI);
- bool AllDead = true;
- const unsigned SuperReg = MO.getReg();
- MachineInstrBuilder MIB(MF, &MI);
- for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
- if (LiveRegs.test(*SubRegs)) {
- MIB.addReg(*SubRegs, RegState::ImplicitDefine);
- AllDead = false;
- }
- }
+static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
+ MachineInstr &MI, bool addToLiveRegs) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
- if(AllDead) {
- MO.setIsKill(true);
- toggleBundleKillFlag(&MI, MO.getReg(), true, TRI);
+ // Things that are available after the instruction are killed by it.
+ bool IsKill = LiveRegs.available(MRI, Reg);
+ MO.setIsKill(IsKill);
+ if (IsKill && addToLiveRegs)
+ LiveRegs.addReg(Reg);
}
}
-void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
- // FIXME: Reuse the LivePhysRegs utility for this.
- DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
-
- LiveRegs.resize(TRI->getNumRegs());
- BitVector killedRegs(TRI->getNumRegs());
+void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n');
- startBlockForKills(MBB);
+ LiveRegs.init(*TRI);
+ LiveRegs.addLiveOuts(MBB);
// Examine block from end to start...
- unsigned Count = MBB->size();
- for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
- I != E; --Count) {
- MachineInstr &MI = *--I;
+ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (MI.isDebugValue())
continue;
// Update liveness. Registers that are defed but not used in this
// instruction are now dead. Mark register and all subregs as they
// are completely defined.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (MO.isRegMask())
- LiveRegs.clearBitsNotInMask(MO.getRegMask());
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
- if (!MO.isDef()) continue;
- // Ignore two-addr defs.
- if (MI.isRegTiedToUseOperand(i)) continue;
-
- // Repeat for reg and all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.reset(*SubRegs);
- }
-
- // Examine all used registers and set/clear kill flag. When a
- // register is used multiple times we only set the kill flag on
- // the first use. Don't set kill flags on undef operands.
- killedRegs.reset();
-
- // toggleKillFlag can append new operands (implicit defs), so using
- // a range-based loop is not safe. The new operands will be appended
- // at the end of the operand list and they don't need to be visited,
- // so iterating until the currently last operand is ok.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
- unsigned Reg = MO.getReg();
- if ((Reg == 0) || MRI.isReserved(Reg)) continue;
-
- bool kill = false;
- if (!killedRegs.test(Reg)) {
- kill = true;
- // A register is not killed if any subregs are live...
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- if (LiveRegs.test(*SubRegs)) {
- kill = false;
- break;
- }
- }
-
- // If subreg is not live, then register is killed if it became
- // live in this instruction
- if (kill)
- kill = !LiveRegs.test(Reg);
- }
-
- if (MO.isKill() != kill) {
- DEBUG(dbgs() << "Fixing " << MO << " in ");
- toggleKillFlag(MI, MO);
- DEBUG(MI.dump());
- DEBUG({
- if (MI.getOpcode() == TargetOpcode::BUNDLE) {
- MachineBasicBlock::instr_iterator Begin = MI.getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
- while (++Begin != End)
- DEBUG(Begin->dump());
- }
- });
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+ if (MO.isReg()) {
+ if (!MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LiveRegs.removeReg(Reg);
+ } else if (MO.isRegMask()) {
+ LiveRegs.removeRegsInMask(MO);
}
-
- killedRegs.set(Reg);
}
- // Mark any used register (that is not using undef) and subregs as
- // now live...
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
- unsigned Reg = MO.getReg();
- if ((Reg == 0) || MRI.isReserved(Reg)) continue;
-
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.set(*SubRegs);
+ // If there is a bundle header fix it up first.
+ if (!MI.isBundled()) {
+ toggleKills(MRI, LiveRegs, MI, true);
+ } else {
+ MachineBasicBlock::instr_iterator First = MI.getIterator();
+ if (MI.isBundle()) {
+ toggleKills(MRI, LiveRegs, MI, false);
+ ++First;
+ }
+ // Some targets make the (questionable) assumtion that the instructions
+ // inside the bundle are ordered and consequently only the last use of
+ // a register inside the bundle can kill it.
+ MachineBasicBlock::instr_iterator I = std::next(First);
+ while (I->isBundledWithSucc())
+ ++I;
+ do {
+ if (!I->isDebugValue())
+ toggleKills(MRI, LiveRegs, *I, true);
+ --I;
+ } while(I != First);
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d450e7e078c..23a302f3e561 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12349,9 +12349,9 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt |= C->getAPIntValue().zext(SizeInBits);
+ StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
} else {
llvm_unreachable("Invalid constant element type");
}
@@ -12617,16 +12617,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;
if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
+ } else if (!LegalTypes &&
+ TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
FirstStoreAS, FirstStoreAlign, &IsFast) &&
IsFast) {
@@ -12642,7 +12645,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
!NoVectors) {
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
@@ -12700,7 +12703,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT Ty =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast;
- if (TLI.isTypeLegal(Ty) &&
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast)
@@ -12810,6 +12813,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@@ -12823,6 +12827,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@@ -12834,7 +12839,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ if (!LegalTypes &&
+ TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
@@ -14455,6 +14462,145 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+/// If we are extracting a subvector produced by a wide binary operator with at
+/// at least one operand that was the result of a vector concatenation, then try
+/// to use the narrow vector operands directly to avoid the concatenation and
+/// extraction.
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
+ // some of these bailouts with other transforms.
+
+ // The extract index must be a constant, so we can map it to a concat operand.
+ auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!ExtractIndex)
+ return SDValue();
+
+ // Only handle the case where we are doubling and then halving. A larger ratio
+ // may require more than two narrow binops to replace the wide binop.
+ EVT VT = Extract->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
+ "Extract index is not a multiple of the vector length.");
+ if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
+ return SDValue();
+
+ // We are looking for an optionally bitcasted wide vector binary operator
+ // feeding an extract subvector.
+ SDValue BinOp = Extract->getOperand(0);
+ if (BinOp.getOpcode() == ISD::BITCAST)
+ BinOp = BinOp.getOperand(0);
+
+ // TODO: The motivating case for this transform is an x86 AVX1 target. That
+ // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+ // flavors, but no other 256-bit integer support. This could be extended to
+ // handle any binop, but that may require fixing/adding other folds to avoid
+ // codegen regressions.
+ unsigned BOpcode = BinOp.getOpcode();
+ if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ return SDValue();
+
+ // The binop must be a vector type, so we can chop it in half.
+ EVT WideBVT = BinOp.getValueType();
+ if (!WideBVT.isVector())
+ return SDValue();
+
+ // Bail out if the target does not support a narrower version of the binop.
+ EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
+ WideBVT.getVectorNumElements() / 2);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ return SDValue();
+
+ // Peek through bitcasts of the binary operator operands if needed.
+ SDValue LHS = BinOp.getOperand(0);
+ if (LHS.getOpcode() == ISD::BITCAST)
+ LHS = LHS.getOperand(0);
+
+ SDValue RHS = BinOp.getOperand(1);
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+
+ // We need at least one concatenation operation of a binop operand to make
+ // this transform worthwhile. The concat must double the input vector sizes.
+ // TODO: Should we also handle INSERT_SUBVECTOR patterns?
+ bool ConcatL =
+ LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
+ bool ConcatR =
+ RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
+ if (!ConcatL && !ConcatR)
+ return SDValue();
+
+ // If one of the binop operands was not the result of a concat, we must
+ // extract a half-sized operand for our new narrow binop. We can't just reuse
+ // the original extract index operand because we may have bitcasted.
+ unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
+ unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+ EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ SDLoc DL(Extract);
+
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
+ SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0),
+ DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+
+ SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1),
+ DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+}
+
+/// If we are extracting a subvector from a wide vector load, convert to a
+/// narrow load to eliminate the extraction:
+/// (extract_subvector (load wide vector)) --> (load narrow vector)
+static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Add support for big-endian. The offset calculation must be adjusted.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ // TODO: The one-use check is overly conservative. Check the cost of the
+ // extract instead or remove that condition entirely.
+ auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx)
+ return SDValue();
+
+ // The narrow load will be offset from the base address of the old load if
+ // we are extracting from something besides index 0 (little-endian).
+ EVT VT = Extract->getValueType(0);
+ SDLoc DL(Extract);
+ SDValue BaseAddr = Ld->getOperand(1);
+ unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+
+ // TODO: Use "BaseIndexOffset" to make this more effective.
+ SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
+ VT.getStoreSize());
+ SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
+
+ // The new load must have the same position as the old load in terms of memory
+ // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's
+ // output chain to use that TokenFactor.
+ // TODO: This code is based on a similar sequence in x86 lowering. It should
+ // be moved to a helper function, so it can be shared and reused.
+ if (Ld->hasAnyUseOfValue(1)) {
+ SDValue OldChain = SDValue(Ld, 1);
+ SDValue NewChain = SDValue(NewLd.getNode(), 1);
+ SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ OldChain, NewChain);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
+ DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ }
+
+ return NewLd;
+}
+
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -14463,6 +14609,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (V.isUndef())
return DAG.getUNDEF(NVT);
+ if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
+ if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
+ return NarrowLoad;
+
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
@@ -14510,6 +14660,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
+ return NarrowBOp;
+
return SDValue();
}
@@ -14745,10 +14898,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
// This is often generated during legalization.
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
-SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
- SelectionDAG &DAG,
- const TargetLowering &TLI,
- bool LegalOperations) {
+static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -14795,7 +14948,8 @@ SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
// destination type. This is often generated during legalization.
// If the source node itself was a '*_extend_vector_inreg' node then we should
// then be able to remove it.
-SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
+static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 9a47a914df91..d0a8b34c69c6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -899,6 +899,39 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
+static TargetLowering::LegalizeAction
+getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
+ unsigned EqOpc;
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected FP pseudo-opcode");
+ case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
+ case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
+ case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
+ case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
+ case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
+ case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
+ case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
+ case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
+ case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
+ case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+ }
+
+ auto Action = TLI.getOperationAction(EqOpc, VT);
+
+ // We don't currently handle Custom or Promote for strict FP pseudo-ops.
+ // For now, we just expand for those cases.
+ if (Action != TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+
+ // ISD::FPOWI returns 'Legal' even though it should be expanded.
+ if (Opcode == ISD::STRICT_FPOWI && Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+
+ return Action;
+}
+
/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
@@ -1043,6 +1076,25 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ // These pseudo-ops get legalized as if they were their non-strict
+ // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
+ // is also legal, but if ISD::FSQRT requires expansion then so does
+ // ISD::STRICT_FSQRT.
+ Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
+ Node->getValueType(0));
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2032,6 +2084,9 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -3907,16 +3962,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMAX_PPCF128));
break;
case ISD::FSQRT:
+ case ISD::STRICT_FSQRT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128));
break;
case ISD::FSIN:
+ case ISD::STRICT_FSIN:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
RTLIB::SIN_F80, RTLIB::SIN_F128,
RTLIB::SIN_PPCF128));
break;
case ISD::FCOS:
+ case ISD::STRICT_FCOS:
Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
RTLIB::COS_F80, RTLIB::COS_F128,
RTLIB::COS_PPCF128));
@@ -3926,26 +3984,31 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandSinCosLibCall(Node, Results);
break;
case ISD::FLOG:
+ case ISD::STRICT_FLOG:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
RTLIB::LOG_F80, RTLIB::LOG_F128,
RTLIB::LOG_PPCF128));
break;
case ISD::FLOG2:
+ case ISD::STRICT_FLOG2:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
RTLIB::LOG2_F80, RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128));
break;
case ISD::FLOG10:
+ case ISD::STRICT_FLOG10:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
RTLIB::LOG10_F80, RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128));
break;
case ISD::FEXP:
+ case ISD::STRICT_FEXP:
Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
RTLIB::EXP_F80, RTLIB::EXP_F128,
RTLIB::EXP_PPCF128));
break;
case ISD::FEXP2:
+ case ISD::STRICT_FEXP2:
Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
RTLIB::EXP2_F80, RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128));
@@ -3966,11 +4029,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::CEIL_PPCF128));
break;
case ISD::FRINT:
+ case ISD::STRICT_FRINT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
RTLIB::RINT_F80, RTLIB::RINT_F128,
RTLIB::RINT_PPCF128));
break;
case ISD::FNEARBYINT:
+ case ISD::STRICT_FNEARBYINT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
@@ -3985,11 +4050,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUND_PPCF128));
break;
case ISD::FPOWI:
+ case ISD::STRICT_FPOWI:
Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
RTLIB::POWI_F80, RTLIB::POWI_F128,
RTLIB::POWI_PPCF128));
break;
case ISD::FPOW:
+ case ISD::STRICT_FPOW:
Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
RTLIB::POW_F80, RTLIB::POW_F128,
RTLIB::POW_PPCF128));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 16c1f78f1b35..177898e1e950 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4779,23 +4779,23 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
DAG.getMachineFunction());
if (VT == MVT::Other) {
- if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) ||
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) {
- VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS);
- } else {
- switch (DstAlign & 7) {
- case 0: VT = MVT::i64; break;
- case 4: VT = MVT::i32; break;
- case 2: VT = MVT::i16; break;
- default: VT = MVT::i8; break;
- }
- }
-
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+ !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+
+ // Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
@@ -6542,6 +6542,63 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
return N;
}
+SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
+ unsigned OrigOpc = Node->getOpcode();
+ unsigned NewOpc;
+ bool IsUnary = false;
+ switch (OrigOpc) {
+ default:
+ llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
+ case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
+ case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
+ case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
+ case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
+ case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
+ case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
+ case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
+ case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
+ case ISD::STRICT_FNEARBYINT:
+ NewOpc = ISD::FNEARBYINT;
+ IsUnary = true;
+ break;
+ }
+
+ // We're taking this node out of the chain, so we need to re-link things.
+ SDValue InputChain = Node->getOperand(0);
+ SDValue OutputChain = SDValue(Node, 1);
+ ReplaceAllUsesOfValueWith(OutputChain, InputChain);
+
+ SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
+ SDNode *Res = nullptr;
+ if (IsUnary)
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+ else
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+ Node->getOperand(2) });
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ } else {
+ ReplaceAllUsesWith(Node, Res);
+ RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
+
/// getMachineNode - These are used for target selectors to create a new node
/// with specified return type(s), MachineInstr opcode, and operands.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 57d340c41c39..b895da21a7ff 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4736,24 +4736,15 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DIExpression *Expr, int64_t Offset,
const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
- SDDbgValue *SDV;
- auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
- if (FISDN && Expr->startsWithDeref()) {
+ if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
// stack slot locations as such instead of as indirectly addressed
// locations.
- ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1,
- Expr->elements_end());
- DIExpression *DerefedDIExpr =
- DIExpression::get(*DAG.getContext(), TrailingElements);
- int FI = FISDN->getIndex();
- SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl,
- DbgSDNodeOrder);
- } else {
- SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
- Offset, dl, DbgSDNodeOrder);
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl,
+ DbgSDNodeOrder);
}
- return SDV;
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
+ Offset, dl, DbgSDNodeOrder);
}
// VisualStudio defines setjmp as _setjmp
@@ -5254,7 +5245,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
- visitConstrainedFPIntrinsic(I, Intrinsic);
+ case Intrinsic::experimental_constrained_sqrt:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_nearbyint:
+ visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -5752,11 +5755,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
-void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
+ const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
unsigned Opcode;
- switch (Intrinsic) {
+ switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::experimental_constrained_fadd:
Opcode = ISD::STRICT_FADD;
@@ -5773,23 +5776,64 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
+ case Intrinsic::experimental_constrained_sqrt:
+ Opcode = ISD::STRICT_FSQRT;
+ break;
+ case Intrinsic::experimental_constrained_pow:
+ Opcode = ISD::STRICT_FPOW;
+ break;
+ case Intrinsic::experimental_constrained_powi:
+ Opcode = ISD::STRICT_FPOWI;
+ break;
+ case Intrinsic::experimental_constrained_sin:
+ Opcode = ISD::STRICT_FSIN;
+ break;
+ case Intrinsic::experimental_constrained_cos:
+ Opcode = ISD::STRICT_FCOS;
+ break;
+ case Intrinsic::experimental_constrained_exp:
+ Opcode = ISD::STRICT_FEXP;
+ break;
+ case Intrinsic::experimental_constrained_exp2:
+ Opcode = ISD::STRICT_FEXP2;
+ break;
+ case Intrinsic::experimental_constrained_log:
+ Opcode = ISD::STRICT_FLOG;
+ break;
+ case Intrinsic::experimental_constrained_log10:
+ Opcode = ISD::STRICT_FLOG10;
+ break;
+ case Intrinsic::experimental_constrained_log2:
+ Opcode = ISD::STRICT_FLOG2;
+ break;
+ case Intrinsic::experimental_constrained_rint:
+ Opcode = ISD::STRICT_FRINT;
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ Opcode = ISD::STRICT_FNEARBYINT;
+ break;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getRoot();
- SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)) };
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
ValueVTs.push_back(MVT::Other); // Out chain
SDVTList VTs = DAG.getVTList(ValueVTs);
- SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops);
+ SDValue Result;
+ if (FPI.isUnaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)) });
+ else
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)) });
assert(Result.getNode()->getNumValues() == 2);
SDValue OutChain = Result.getValue(1);
DAG.setRoot(OutChain);
SDValue FPResult = Result.getValue(0);
- setValue(&I, FPResult);
+ setValue(&FPI, FPResult);
}
std::pair<SDValue, SDValue>
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bdaee858da61..77e131fa551c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -895,7 +895,7 @@ private:
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
- void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 5e0feccb6b4c..687b882c5e4d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -905,50 +905,6 @@ public:
} // end anonymous namespace
-static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) {
- unsigned OrigOpc = Node->getOpcode();
- switch (OrigOpc) {
- case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true;
- case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true;
- case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true;
- case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true;
- case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true;
- default: return false;
- }
-}
-
-SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) {
- assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) ||
- (Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) ||
- (Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) ||
- (Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) ||
- (Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) &&
- "Unexpected StrictFP opcode!");
-
- // We're taking this node out of the chain, so we need to re-link things.
- SDValue InputChain = Node->getOperand(0);
- SDValue OutputChain = SDValue(Node, 1);
- CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain);
-
- SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType());
- SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) };
- SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops);
-
- // MorphNodeTo can operate in two ways: if an existing node with the
- // specified operands exists, it can just return it. Otherwise, it
- // updates the node in place to have the requested operands.
- if (Res == Node) {
- // If we updated the node in place, reset the node ID. To the isel,
- // this should be just like a newly allocated machine node.
- Res->setNodeId(-1);
- } else {
- CurDAG->ReplaceAllUsesWith(Node, Res);
- CurDAG->RemoveDeadNode(Node);
- }
-
- return Res;
-}
-
void SelectionDAGISel::DoInstructionSelection() {
DEBUG(dbgs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
@@ -992,15 +948,12 @@ void SelectionDAGISel::DoInstructionSelection() {
// If the current node is a strict FP pseudo-op, the isStrictFPOp()
// function will provide the corresponding normal FP opcode to which the
// node should be mutated.
- unsigned NormalFPOpc = ISD::UNDEF;
- bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc);
- if (IsStrictFPOp)
- Node = MutateStrictFPToFP(Node, NormalFPOpc);
+ //
+ // FIXME: The backends need a way to handle FP constraints.
+ if (Node->isStrictFPOpcode())
+ Node = CurDAG->mutateStrictFPToFP(Node);
Select(Node);
-
- // FIXME: Add code here to attach an implicit def and use of
- // target-specific FP environment registers.
}
CurDAG->setRoot(Dummy.getValue());
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index befbd80d7965..0dffffee9976 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -603,11 +603,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
Known2, TLO, Depth+1))
return true;
- assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
@@ -633,11 +633,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::OR:
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
Known2, TLO, Depth+1))
return true;
- assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
@@ -660,10 +660,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::XOR: {
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
return true;
- assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
@@ -725,8 +725,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return true;
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
- assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, NewMask, TLO))
@@ -741,8 +741,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return true;
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
- assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, NewMask, TLO))
@@ -907,7 +907,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
@@ -947,7 +947,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
@@ -1029,7 +1029,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
@@ -1084,7 +1084,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
Known.Zero |= NewBits;
break;
@@ -1134,7 +1134,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
break;
}
@@ -1193,7 +1193,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
@@ -1205,7 +1205,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
Known, TLO, Depth+1))
return true;
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
break;
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index ff7d205c1f4c..6750fde57638 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -27,7 +27,7 @@
using namespace llvm;
-#define DEBUG_TYPE "shadowstackgclowering"
+#define DEBUG_TYPE "shadow-stack-gc-lowering"
namespace {
@@ -66,10 +66,10 @@ private:
};
}
-INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
-INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index 2638702da152..aa75f5e2caa2 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -210,13 +210,12 @@ public:
char ShrinkWrap::ID = 0;
char &llvm::ShrinkWrapID = ShrinkWrap::ID;
-INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false,
- false)
+INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
+INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index e9eff4d0acb2..09e9c3bb3354 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -74,7 +74,7 @@ private:
} // end anonymous namespace
char SjLjEHPrepare::ID = 0;
-INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions",
+INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions",
false, false)
// Public Interface To the SjLjEHPrepare pass.
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index bc2a1d09056b..3656832a7f1a 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
#define DEBUG_TYPE "slotindexes"
char SlotIndexes::ID = 0;
-INITIALIZE_PASS(SlotIndexes, "slotindexes",
+INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
"Slot index numbering", false, false)
STATISTIC(NumLocalRenum, "Number of local renumberings");
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 43cbf4add0f8..0abe1c47da55 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -40,14 +40,14 @@
using namespace llvm;
-#define DEBUG_TYPE "spillplacement"
+#define DEBUG_TYPE "spill-code-placement"
char SpillPlacement::ID = 0;
-INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
char &llvm::SpillPlacementID = SpillPlacement::ID;
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 86a16187fcb6..acb3676fdd71 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -53,7 +53,7 @@
using namespace llvm;
-#define DEBUG_TYPE "stackcoloring"
+#define DEBUG_TYPE "stack-coloring"
static cl::opt<bool>
DisableColoring("no-stack-coloring",
@@ -371,12 +371,12 @@ private:
char StackColoring::ID = 0;
char &llvm::StackColoringID = StackColoring::ID;
-INITIALIZE_PASS_BEGIN(StackColoring,
- "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
+ "Merge disjoint stack slots", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_END(StackColoring,
- "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
+ "Merge disjoint stack slots", false, false)
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<SlotIndexes>();
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 5da77264261b..ca8bde2d114a 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -58,10 +58,10 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
cl::init(true), cl::Hidden);
char StackProtector::ID = 0;
-INITIALIZE_PASS_BEGIN(StackProtector, "stack-protector",
+INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
"Insert stack protectors", false, true)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(StackProtector, "stack-protector",
+INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE,
"Insert stack protectors", false, true)
FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); }
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 234b2043a6a1..d1758ecbd79f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -32,7 +32,7 @@
#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "stackslotcoloring"
+#define DEBUG_TYPE "stack-slot-coloring"
static cl::opt<bool>
DisableSharing("no-stack-slot-sharing",
@@ -116,12 +116,12 @@ namespace {
char StackSlotColoring::ID = 0;
char &llvm::StackSlotColoringID = StackSlotColoring::ID;
-INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
namespace {
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index e2377d89497d..ad0b04373656 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -40,8 +40,7 @@ char TailDuplicatePass::ID = 0;
char &llvm::TailDuplicateID = TailDuplicatePass::ID;
-INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false,
- false)
+INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false)
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index d2414200e9d5..d40f7af431a9 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -749,7 +749,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
if (PredBB->succ_size() > 1)
return false;
- MachineBasicBlock *PredTBB, *PredFBB;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
return false;
@@ -832,7 +832,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
appendCopies(PredBB, CopyInfos, Copies);
// Simplify
- MachineBasicBlock *PredTBB, *PredFBB;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 7392c8327148..552a89f76ca2 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -52,7 +52,7 @@
using namespace llvm;
-#define DEBUG_TYPE "twoaddrinstr"
+#define DEBUG_TYPE "twoaddressinstruction"
STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
@@ -171,10 +171,10 @@ public:
} // end anonymous namespace
char TwoAddressInstructionPass::ID = 0;
-INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index a632b40c20f5..4e7542bf31e0 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -94,7 +94,7 @@ private:
} // end anonymous namespace
char WinEHPrepare::ID = 0;
-INITIALIZE_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
+INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions",
false, false)
FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); }
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 556ebf78622f..90193d07b95d 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMDebugInfoCodeView
TypeDatabaseVisitor.cpp
TypeDumpVisitor.cpp
TypeIndex.cpp
+ TypeIndexDiscovery.cpp
TypeRecordMapping.cpp
TypeSerializer.cpp
TypeStreamMerger.cpp
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index f95c3e79388e..705b548141b0 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -45,24 +45,9 @@ static Error visitKnownMember(CVMemberRecord &Record,
}
static Expected<TypeServer2Record> deserializeTypeServerRecord(CVType &Record) {
- class StealTypeServerVisitor : public TypeVisitorCallbacks {
- public:
- explicit StealTypeServerVisitor(TypeServer2Record &TR) : TR(TR) {}
-
- Error visitKnownRecord(CVType &CVR, TypeServer2Record &Record) override {
- TR = Record;
- return Error::success();
- }
-
- private:
- TypeServer2Record &TR;
- };
-
TypeServer2Record R(TypeRecordKind::TypeServer2);
- StealTypeServerVisitor Thief(R);
- if (auto EC = visitTypeRecord(Record, Thief))
+ if (auto EC = TypeDeserializer::deserializeAs(Record, R))
return std::move(EC);
-
return R;
}
@@ -308,8 +293,9 @@ Error llvm::codeview::visitTypeRecord(CVType &Record,
Error llvm::codeview::visitTypeStream(const CVTypeArray &Types,
TypeVisitorCallbacks &Callbacks,
+ VisitorDataSource Source,
TypeServerHandler *TS) {
- VisitHelper V(Callbacks, VDS_BytesPresent);
+ VisitHelper V(Callbacks, Source);
if (TS)
V.Visitor.addTypeServerHandler(*TS);
return V.Visitor.visitTypeStream(Types);
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
new file mode 100644
index 000000000000..11e2e215303c
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -0,0 +1,371 @@
+//===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+static inline MethodKind getMethodKind(uint16_t Attrs) {
+ Attrs &= uint16_t(MethodOptions::MethodKindMask);
+ Attrs >>= 2;
+ return MethodKind(Attrs);
+}
+
+static inline bool isIntroVirtual(uint16_t Attrs) {
+ MethodKind MK = getMethodKind(Attrs);
+ return MK == MethodKind::IntroducingVirtual ||
+ MK == MethodKind::PureIntroducingVirtual;
+}
+
+static inline PointerMode getPointerMode(uint32_t Attrs) {
+ return static_cast<PointerMode>((Attrs >> PointerRecord::PointerModeShift) &
+ PointerRecord::PointerModeMask);
+}
+
+static inline bool isMemberPointer(uint32_t Attrs) {
+ PointerMode Mode = getPointerMode(Attrs);
+ return Mode == PointerMode::PointerToDataMember ||
+ Mode == PointerMode::PointerToDataMember;
+}
+
+static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) {
+ uint16_t N = support::endian::read16le(Data.data());
+ if (N < LF_NUMERIC)
+ return 2;
+
+ assert(N <= LF_UQUADWORD);
+
+ constexpr uint32_t Sizes[] = {
+ 1, // LF_CHAR
+ 2, // LF_SHORT
+ 2, // LF_USHORT
+ 4, // LF_LONG
+ 4, // LF_ULONG
+ 4, // LF_REAL32
+ 8, // LF_REAL64
+ 10, // LF_REAL80
+ 16, // LF_REAL128
+ 8, // LF_QUADWORD
+ 8, // LF_UQUADWORD
+ };
+
+ return Sizes[N - LF_NUMERIC];
+}
+
+static inline uint32_t getCStringLength(ArrayRef<uint8_t> Data) {
+ const char *S = reinterpret_cast<const char *>(Data.data());
+ return strlen(S) + 1;
+}
+
+static void handleMethodOverloadList(ArrayRef<uint8_t> Content,
+ SmallVectorImpl<TiReference> &Refs) {
+ uint32_t Offset = 0;
+
+ while (!Content.empty()) {
+ // Array of:
+ // 0: Attrs
+ // 2: Padding
+ // 4: TypeIndex
+ // if (isIntroVirtual())
+ // 8: VFTableOffset
+
+ // At least 8 bytes are guaranteed. 4 extra bytes come iff function is an
+ // intro virtual.
+ uint32_t Len = 8;
+
+ uint16_t Attrs = support::endian::read16le(Content.data());
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+
+ if (LLVM_UNLIKELY(isIntroVirtual(Attrs)))
+ Len += 4;
+ Offset += Len;
+ Content = Content.drop_front(Len);
+ }
+}
+
+static uint32_t handleBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ // 8: Encoded Integer
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ return 8 + getEncodedIntegerLength(Data.drop_front(8));
+}
+
+static uint32_t handleEnumerator(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: Encoded Integer
+ // <next>: Name
+ uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4));
+ return Size + getCStringLength(Data.drop_front(Size));
+}
+
+static uint32_t handleDataMember(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ // 8: Encoded Integer
+ // <next>: Name
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8));
+ return Size + getCStringLength(Data.drop_front(Size));
+}
+
+static uint32_t handleOverloadedMethod(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ // 8: Name
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ return 8 + getCStringLength(Data.drop_front(8));
+}
+
+static uint32_t handleOneMethod(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Attributes
+ // 4: Type
+ // if (isIntroVirtual)
+ // 8: VFTableOffset
+ // <next>: Name
+ uint32_t Size = 8;
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+
+ uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data());
+ if (LLVM_UNLIKELY(isIntroVirtual(Attrs)))
+ Size += 4;
+
+ return Size + getCStringLength(Data.drop_front(Size));
+}
+
+static uint32_t handleNestedType(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ // 8: Name
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ return 8 + getCStringLength(Data.drop_front(8));
+}
+
+static uint32_t handleStaticDataMember(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ // 8: Name
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ return 8 + getCStringLength(Data.drop_front(8));
+}
+
+static uint32_t handleVirtualBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset,
+ bool IsIndirect,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Attrs
+ // 4: TypeIndex
+ // 8: TypeIndex
+ // 12: Encoded Integer
+ // <next>: Encoded Integer
+ uint32_t Size = 12;
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2});
+ Size += getEncodedIntegerLength(Data.drop_front(Size));
+ Size += getEncodedIntegerLength(Data.drop_front(Size));
+ return Size;
+}
+
+static uint32_t handleVFPtr(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ return 8;
+}
+
+static uint32_t handleListContinuation(ArrayRef<uint8_t> Data, uint32_t Offset,
+ SmallVectorImpl<TiReference> &Refs) {
+ // 0: Kind
+ // 2: Padding
+ // 4: TypeIndex
+ Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
+ return 8;
+}
+
+static void handleFieldList(ArrayRef<uint8_t> Content,
+ SmallVectorImpl<TiReference> &Refs) {
+ uint32_t Offset = 0;
+ uint32_t ThisLen = 0;
+ while (!Content.empty()) {
+ TypeLeafKind Kind =
+ static_cast<TypeLeafKind>(support::endian::read16le(Content.data()));
+ switch (Kind) {
+ case LF_BCLASS:
+ ThisLen = handleBaseClass(Content, Offset, Refs);
+ break;
+ case LF_ENUMERATE:
+ ThisLen = handleEnumerator(Content, Offset, Refs);
+ break;
+ case LF_MEMBER:
+ ThisLen = handleDataMember(Content, Offset, Refs);
+ break;
+ case LF_METHOD:
+ ThisLen = handleOverloadedMethod(Content, Offset, Refs);
+ break;
+ case LF_ONEMETHOD:
+ ThisLen = handleOneMethod(Content, Offset, Refs);
+ break;
+ case LF_NESTTYPE:
+ ThisLen = handleNestedType(Content, Offset, Refs);
+ break;
+ case LF_STMEMBER:
+ ThisLen = handleStaticDataMember(Content, Offset, Refs);
+ break;
+ case LF_VBCLASS:
+ case LF_IVBCLASS:
+ ThisLen =
+ handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs);
+ break;
+ case LF_VFUNCTAB:
+ ThisLen = handleVFPtr(Content, Offset, Refs);
+ break;
+ case LF_INDEX:
+ ThisLen = handleListContinuation(Content, Offset, Refs);
+ break;
+ default:
+ return;
+ }
+ Content = Content.drop_front(ThisLen);
+ Offset += ThisLen;
+ if (!Content.empty()) {
+ uint8_t Pad = Content.front();
+ if (Pad >= LF_PAD0) {
+ uint32_t Skip = Pad & 0x0F;
+ Content = Content.drop_front(Skip);
+ Offset += Skip;
+ }
+ }
+ }
+}
+
+static void handlePointer(ArrayRef<uint8_t> Content,
+ SmallVectorImpl<TiReference> &Refs) {
+ Refs.push_back({TiRefKind::TypeRef, 0, 1});
+
+ uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data());
+ if (isMemberPointer(Attrs))
+ Refs.push_back({TiRefKind::TypeRef, 8, 1});
+}
+
+static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind,
+ SmallVectorImpl<TiReference> &Refs) {
+ uint32_t Count;
+ // FIXME: In the future it would be nice if we could avoid hardcoding these
+ // values. One idea is to define some structures representing these types
+ // that would allow the use of offsetof().
+ switch (Kind) {
+ case TypeLeafKind::LF_FUNC_ID:
+ Refs.push_back({TiRefKind::IndexRef, 0, 1});
+ Refs.push_back({TiRefKind::TypeRef, 4, 1});
+ break;
+ case TypeLeafKind::LF_MFUNC_ID:
+ Refs.push_back({TiRefKind::TypeRef, 0, 2});
+ break;
+ case TypeLeafKind::LF_STRING_ID:
+ Refs.push_back({TiRefKind::IndexRef, 0, 1});
+ break;
+ case TypeLeafKind::LF_SUBSTR_LIST:
+ Count = support::endian::read32le(Content.data());
+ if (Count > 0)
+ Refs.push_back({TiRefKind::IndexRef, 4, Count});
+ break;
+ case TypeLeafKind::LF_BUILDINFO:
+ Count = support::endian::read16le(Content.data());
+ if (Count > 0)
+ Refs.push_back({TiRefKind::IndexRef, 2, Count});
+ break;
+ case TypeLeafKind::LF_UDT_SRC_LINE:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1});
+ Refs.push_back({TiRefKind::IndexRef, 4, 1});
+ break;
+ case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1});
+ break;
+ case TypeLeafKind::LF_MODIFIER:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1});
+ break;
+ case TypeLeafKind::LF_PROCEDURE:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1});
+ Refs.push_back({TiRefKind::TypeRef, 8, 1});
+ break;
+ case TypeLeafKind::LF_MFUNCTION:
+ Refs.push_back({TiRefKind::TypeRef, 0, 3});
+ Refs.push_back({TiRefKind::TypeRef, 16, 1});
+ break;
+ case TypeLeafKind::LF_ARGLIST:
+ Count = support::endian::read32le(Content.data());
+ if (Count > 0)
+ Refs.push_back({TiRefKind::TypeRef, 4, Count});
+ break;
+ case TypeLeafKind::LF_ARRAY:
+ Refs.push_back({TiRefKind::TypeRef, 0, 2});
+ break;
+ case TypeLeafKind::LF_CLASS:
+ case TypeLeafKind::LF_STRUCTURE:
+ case TypeLeafKind::LF_INTERFACE:
+ Refs.push_back({TiRefKind::TypeRef, 4, 3});
+ break;
+ case TypeLeafKind::LF_UNION:
+ Refs.push_back({TiRefKind::TypeRef, 4, 1});
+ break;
+ case TypeLeafKind::LF_ENUM:
+ Refs.push_back({TiRefKind::TypeRef, 4, 2});
+ break;
+ case TypeLeafKind::LF_BITFIELD:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1});
+ break;
+ case TypeLeafKind::LF_VFTABLE:
+ Refs.push_back({TiRefKind::TypeRef, 0, 2});
+ break;
+ case TypeLeafKind::LF_VTSHAPE:
+ break;
+ case TypeLeafKind::LF_METHODLIST:
+ handleMethodOverloadList(Content, Refs);
+ break;
+ case TypeLeafKind::LF_FIELDLIST:
+ handleFieldList(Content, Refs);
+ break;
+ case TypeLeafKind::LF_POINTER:
+ handlePointer(Content, Refs);
+ break;
+ default:
+ break;
+ }
+}
+
+void llvm::codeview::discoverTypeIndices(const CVType &Type,
+ SmallVectorImpl<TiReference> &Refs) {
+ ::discoverTypeIndices(Type.content(), Type.kind(), Refs);
+}
+
+void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
+ SmallVectorImpl<TiReference> &Refs) {
+ const RecordPrefix *P =
+ reinterpret_cast<const RecordPrefix *>(RecordData.data());
+ TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
+ ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs);
+}
diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp
index 3b061e67e05e..93c1198e36ce 100644
--- a/lib/DebugInfo/CodeView/TypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/TypeSerializer.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include <string.h>
@@ -16,21 +17,109 @@
using namespace llvm;
using namespace llvm::codeview;
-bool TypeSerializer::isInFieldList() const {
- return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST;
+namespace {
+struct HashedType {
+ uint64_t Hash;
+ const uint8_t *Data;
+ unsigned Size; // FIXME: Go to uint16_t?
+ TypeIndex Index;
+};
+
+/// Wrapper around a poitner to a HashedType. Hash and equality operations are
+/// based on data in the pointee.
+struct HashedTypePtr {
+ HashedTypePtr() = default;
+ HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {}
+ HashedType *Ptr = nullptr;
+};
+} // namespace
+
+namespace llvm {
+template <> struct DenseMapInfo<HashedTypePtr> {
+ static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); }
+ static inline HashedTypePtr getTombstoneKey() {
+ return HashedTypePtr(reinterpret_cast<HashedType *>(1));
+ }
+ static unsigned getHashValue(HashedTypePtr Val) {
+ assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr);
+ return Val.Ptr->Hash;
+ }
+ static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) {
+ HashedType *LHS = LHSP.Ptr;
+ HashedType *RHS = RHSP.Ptr;
+ if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr)
+ return LHS == RHS;
+ if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size)
+ return false;
+ return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0;
+ }
+};
}
-TypeIndex TypeSerializer::calcNextTypeIndex() const {
- if (LastTypeIndex.isNoneType())
- return TypeIndex(TypeIndex::FirstNonSimpleIndex);
- else
- return TypeIndex(LastTypeIndex.getIndex() + 1);
+/// Private implementation so that we don't leak our DenseMap instantiations to
+/// users.
+class llvm::codeview::TypeHasher {
+private:
+ /// Storage for type record provided by the caller. Records will outlive the
+ /// hasher object, so they should be allocated here.
+ BumpPtrAllocator &RecordStorage;
+
+ /// Storage for hash keys. These only need to live as long as the hashing
+ /// operation.
+ BumpPtrAllocator KeyStorage;
+
+ /// Hash table. We really want a DenseMap<ArrayRef<uint8_t>, TypeIndex> here,
+ /// but DenseMap is inefficient when the keys are long (like type records)
+ /// because it recomputes the hash value of every key when it grows. This
+ /// value type stores the hash out of line in KeyStorage, so that table
+ /// entries are small and easy to rehash.
+ DenseSet<HashedTypePtr> HashedRecords;
+
+public:
+ TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {}
+
+ void reset() { HashedRecords.clear(); }
+
+ /// Takes the bytes of type record, inserts them into the hash table, saves
+ /// them, and returns a pointer to an identical stable type record along with
+ /// its type index in the destination stream.
+ TypeIndex getOrCreateRecord(ArrayRef<uint8_t> &Record, TypeIndex TI);
+};
+
+TypeIndex TypeHasher::getOrCreateRecord(ArrayRef<uint8_t> &Record,
+ TypeIndex TI) {
+ assert(Record.size() < UINT32_MAX && "Record too big");
+ assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
+
+ // Compute the hash up front so we can store it in the key.
+ HashedType TempHashedType = {hash_value(Record), Record.data(),
+ unsigned(Record.size()), TI};
+ auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType));
+ HashedType *&Hashed = Result.first->Ptr;
+
+ if (Result.second) {
+ // This was a new type record. We need stable storage for both the key and
+ // the record. The record should outlive the hashing operation.
+ Hashed = KeyStorage.Allocate<HashedType>();
+ *Hashed = TempHashedType;
+
+ uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
+ memcpy(Stable, Record.data(), Record.size());
+ Hashed->Data = Stable;
+ assert(Hashed->Size == Record.size());
+ }
+
+ // Update the caller's copy of Record to point a stable copy.
+ Record = ArrayRef<uint8_t>(Hashed->Data, Hashed->Size);
+ return Hashed->Index;
}
-TypeIndex TypeSerializer::incrementTypeIndex() {
- TypeIndex Previous = LastTypeIndex;
- LastTypeIndex = calcNextTypeIndex();
- return Previous;
+TypeIndex TypeSerializer::nextTypeIndex() const {
+ return TypeIndex::fromArrayIndex(SeenRecords.size());
+}
+
+bool TypeSerializer::isInFieldList() const {
+ return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST;
}
MutableArrayRef<uint8_t> TypeSerializer::getCurrentSubRecordData() {
@@ -51,46 +140,6 @@ Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) {
return Error::success();
}
-TypeIndex
-TypeSerializer::insertRecordBytesPrivate(MutableArrayRef<uint8_t> Record) {
- assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
-
- StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
-
- TypeIndex NextTypeIndex = calcNextTypeIndex();
- auto Result = HashedRecords.try_emplace(S, NextTypeIndex);
- if (Result.second) {
- LastTypeIndex = NextTypeIndex;
- SeenRecords.push_back(Record);
- }
- return Result.first->getValue();
-}
-
-TypeIndex
-TypeSerializer::insertRecordBytesWithCopy(CVType &Record,
- MutableArrayRef<uint8_t> Data) {
- assert(Data.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
-
- StringRef S(reinterpret_cast<const char *>(Data.data()), Data.size());
-
- // Do a two state lookup / insert so that we don't have to allocate unless
- // we're going
- // to do an insert. This is a big memory savings.
- auto Iter = HashedRecords.find(S);
- if (Iter != HashedRecords.end())
- return Iter->second;
-
- LastTypeIndex = calcNextTypeIndex();
- uint8_t *Copy = RecordStorage.Allocate<uint8_t>(Data.size());
- ::memcpy(Copy, Data.data(), Data.size());
- Data = MutableArrayRef<uint8_t>(Copy, Data.size());
- S = StringRef(reinterpret_cast<const char *>(Data.data()), Data.size());
- HashedRecords.insert(std::make_pair(S, LastTypeIndex));
- SeenRecords.push_back(Data);
- Record.RecordData = Data;
- return LastTypeIndex;
-}
-
Expected<MutableArrayRef<uint8_t>>
TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
uint32_t Align = Record.size() % 4;
@@ -108,27 +157,79 @@ TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
return MutableArrayRef<uint8_t>(Record.data(), Record.size() + N);
}
-TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage)
- : RecordStorage(Storage), LastTypeIndex(),
- RecordBuffer(MaxRecordLength * 2),
+TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash)
+ : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2),
Stream(RecordBuffer, llvm::support::little), Writer(Stream),
Mapping(Writer) {
// RecordBuffer needs to be able to hold enough data so that if we are 1
// byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
// we won't overflow.
+ if (Hash)
+ Hasher = make_unique<TypeHasher>(Storage);
}
-ArrayRef<MutableArrayRef<uint8_t>> TypeSerializer::records() const {
+TypeSerializer::~TypeSerializer() = default;
+
+ArrayRef<ArrayRef<uint8_t>> TypeSerializer::records() const {
return SeenRecords;
}
-TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; }
+void TypeSerializer::reset() {
+ if (Hasher)
+ Hasher->reset();
+ Writer.setOffset(0);
+ CurrentSegment = RecordSegment();
+ FieldListSegments.clear();
+ TypeKind.reset();
+ MemberKind.reset();
+ SeenRecords.clear();
+}
-TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef<uint8_t> Record) {
+TypeIndex TypeSerializer::insertRecordBytes(ArrayRef<uint8_t> &Record) {
assert(!TypeKind.hasValue() && "Already in a type mapping!");
assert(Writer.getOffset() == 0 && "Stream has data already!");
- return insertRecordBytesPrivate(Record);
+ if (Hasher) {
+ TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex());
+ if (nextTypeIndex() == ActualTI)
+ SeenRecords.push_back(Record);
+ return ActualTI;
+ }
+
+ TypeIndex NewTI = nextTypeIndex();
+ uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
+ memcpy(Stable, Record.data(), Record.size());
+ Record = ArrayRef<uint8_t>(Stable, Record.size());
+ SeenRecords.push_back(Record);
+ return NewTI;
+}
+
+TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) {
+ assert(!TypeKind.hasValue() && "Already in a type mapping!");
+ assert(Writer.getOffset() == 0 && "Stream has data already!");
+
+ TypeIndex TI;
+ ArrayRef<uint8_t> OriginalData = Record.OriginalRecord.RecordData;
+ if (Record.Mappings.empty()) {
+ // This record did not remap any type indices. Just write it.
+ return insertRecordBytes(OriginalData);
+ }
+
+ // At least one type index was remapped. Before we can hash it we have to
+ // copy the full record bytes, re-write each type index, then hash the copy.
+ // We do this in temporary storage since only the DenseMap can decide whether
+ // this record already exists, and if it does we don't want the memory to
+ // stick around.
+ RemapStorage.resize(OriginalData.size());
+ ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size());
+ uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix);
+ for (const auto &M : Record.Mappings) {
+ // First 4 bytes of every record are the record prefix, but the mapping
+ // offset is relative to the content which starts after.
+ *(TypeIndex *)(ContentBegin + M.first) = M.second;
+ }
+ auto RemapRef = makeArrayRef(RemapStorage);
+ return insertRecordBytes(RemapRef);
}
Error TypeSerializer::visitTypeBegin(CVType &Record) {
@@ -163,8 +264,13 @@ Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) {
Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t);
Record.Type = *TypeKind;
- TypeIndex InsertedTypeIndex =
- insertRecordBytesWithCopy(Record, ThisRecordData);
+ Record.RecordData = ThisRecordData;
+
+ // insertRecordBytes assumes we're not in a mapping, so do this first.
+ TypeKind.reset();
+ Writer.setOffset(0);
+
+ TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData);
// Write out each additional segment in reverse order, and update each
// record's continuation index to point to the previous one.
@@ -174,11 +280,9 @@ Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) {
reinterpret_cast<support::ulittle32_t *>(CIBytes.data());
assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder");
*CI = InsertedTypeIndex.getIndex();
- InsertedTypeIndex = insertRecordBytesPrivate(X);
+ InsertedTypeIndex = insertRecordBytes(X);
}
- TypeKind.reset();
- Writer.setOffset(0);
FieldListSegments.clear();
CurrentSegment.SubRecords.clear();
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 46747f8eab99..71a0966df036 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -11,7 +11,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
@@ -57,37 +59,56 @@ namespace {
/// looking at the record kind.
class TypeStreamMerger : public TypeVisitorCallbacks {
public:
- TypeStreamMerger(TypeTableBuilder &DestIdStream,
- TypeTableBuilder &DestTypeStream,
- SmallVectorImpl<TypeIndex> &SourceToDest,
- TypeServerHandler *Handler)
- : DestIdStream(DestIdStream), DestTypeStream(DestTypeStream),
- FieldListBuilder(DestTypeStream), Handler(Handler),
- IndexMap(SourceToDest) {}
+ explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest,
+ TypeServerHandler *Handler)
+ : Handler(Handler), IndexMap(SourceToDest) {
+ SourceToDest.clear();
+ }
static const TypeIndex Untranslated;
-/// TypeVisitorCallbacks overrides.
-#define TYPE_RECORD(EnumName, EnumVal, Name) \
- Error visitKnownRecord(CVType &CVR, Name##Record &Record) override;
-#define MEMBER_RECORD(EnumName, EnumVal, Name) \
- Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override;
-#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#include "llvm/DebugInfo/CodeView/TypeRecords.def"
-
- Error visitUnknownType(CVType &Record) override;
-
Error visitTypeBegin(CVType &Record) override;
Error visitTypeEnd(CVType &Record) override;
- Error visitMemberEnd(CVMemberRecord &Record) override;
- Error mergeStream(const CVTypeArray &Types);
+ Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes,
+ const CVTypeArray &IdsAndTypes);
+ Error mergeIdRecords(TypeTableBuilder &Dest,
+ ArrayRef<TypeIndex> TypeSourceToDest,
+ const CVTypeArray &Ids);
+ Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types);
private:
+ Error doit(const CVTypeArray &Types);
+
void addMapping(TypeIndex Idx);
- bool remapIndex(TypeIndex &Idx);
+ bool remapTypeIndex(TypeIndex &Idx);
+ bool remapItemIndex(TypeIndex &Idx);
+
+ bool remapIndices(RemappedType &Record, ArrayRef<TiReference> Refs) {
+ auto OriginalData = Record.OriginalRecord.content();
+ bool Success = true;
+ for (auto &Ref : Refs) {
+ uint32_t Offset = Ref.Offset;
+ ArrayRef<uint8_t> Bytes =
+ OriginalData.slice(Ref.Offset, sizeof(TypeIndex));
+ ArrayRef<TypeIndex> TIs(reinterpret_cast<const TypeIndex *>(Bytes.data()),
+ Ref.Count);
+ for (auto TI : TIs) {
+ TypeIndex NewTI = TI;
+ bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef)
+ ? remapItemIndex(NewTI)
+ : remapTypeIndex(NewTI);
+ if (ThisSuccess && NewTI != TI)
+ Record.Mappings.emplace_back(Offset, NewTI);
+ Offset += sizeof(TypeIndex);
+ Success &= ThisSuccess;
+ }
+ }
+ return Success;
+ }
+
+ bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
size_t slotForIndex(TypeIndex Idx) const {
assert(!Idx.isSimple() && "simple type indices have no slots");
@@ -98,49 +119,45 @@ private:
return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
}
- template <typename RecordType>
- Error writeRecord(RecordType &R, bool RemapSuccess) {
+ Error writeRecord(TypeTableBuilder &Dest, const RemappedType &Record,
+ bool RemapSuccess) {
TypeIndex DestIdx = Untranslated;
if (RemapSuccess)
- DestIdx = DestTypeStream.writeKnownType(R);
+ DestIdx = Dest.writeSerializedRecord(Record);
addMapping(DestIdx);
return Error::success();
}
- template <typename RecordType>
- Error writeIdRecord(RecordType &R, bool RemapSuccess) {
- TypeIndex DestIdx = Untranslated;
- if (RemapSuccess)
- DestIdx = DestIdStream.writeKnownType(R);
+ Error writeTypeRecord(const CVType &Record) {
+ TypeIndex DestIdx =
+ DestTypeStream->writeSerializedRecord(Record.RecordData);
addMapping(DestIdx);
return Error::success();
}
- template <typename RecordType>
- Error writeMember(RecordType &R, bool RemapSuccess) {
- if (RemapSuccess)
- FieldListBuilder.writeMemberType(R);
- else
- HadUntranslatedMember = true;
- return Error::success();
+ Error writeTypeRecord(const RemappedType &Record, bool RemapSuccess) {
+ return writeRecord(*DestTypeStream, Record, RemapSuccess);
+ }
+
+ Error writeIdRecord(const RemappedType &Record, bool RemapSuccess) {
+ return writeRecord(*DestIdStream, Record, RemapSuccess);
}
Optional<Error> LastError;
bool IsSecondPass = false;
- bool HadUntranslatedMember = false;
-
unsigned NumBadIndices = 0;
- BumpPtrAllocator Allocator;
+ TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
- TypeTableBuilder &DestIdStream;
- TypeTableBuilder &DestTypeStream;
- FieldListRecordBuilder FieldListBuilder;
- TypeServerHandler *Handler;
+ TypeTableBuilder *DestIdStream = nullptr;
+ TypeTableBuilder *DestTypeStream = nullptr;
+ TypeServerHandler *Handler = nullptr;
- TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
+ // If we're only mapping id records, this array contains the mapping for
+ // type records.
+ ArrayRef<TypeIndex> TypeLookup;
/// Map from source type index to destination type index. Indexed by source
/// type index minus 0x1000.
@@ -151,22 +168,34 @@ private:
const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
-Error TypeStreamMerger::visitTypeBegin(CVRecord<TypeLeafKind> &Rec) {
+Error TypeStreamMerger::visitTypeBegin(CVType &Rec) {
+ RemappedType R(Rec);
+ SmallVector<TiReference, 32> Refs;
+ discoverTypeIndices(Rec.RecordData, Refs);
+ bool Success = remapIndices(R, Refs);
+ switch (Rec.kind()) {
+ case TypeLeafKind::LF_FUNC_ID:
+ case TypeLeafKind::LF_MFUNC_ID:
+ case TypeLeafKind::LF_STRING_ID:
+ case TypeLeafKind::LF_SUBSTR_LIST:
+ case TypeLeafKind::LF_BUILDINFO:
+ case TypeLeafKind::LF_UDT_SRC_LINE:
+ case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
+ return writeIdRecord(R, Success);
+ default:
+ return writeTypeRecord(R, Success);
+ }
return Error::success();
}
-Error TypeStreamMerger::visitTypeEnd(CVRecord<TypeLeafKind> &Rec) {
- CurIndex = TypeIndex(CurIndex.getIndex() + 1);
+Error TypeStreamMerger::visitTypeEnd(CVType &Rec) {
+ ++CurIndex;
if (!IsSecondPass)
assert(IndexMap.size() == slotForIndex(CurIndex) &&
"visitKnownRecord should add one index map entry");
return Error::success();
}
-Error TypeStreamMerger::visitMemberEnd(CVMemberRecord &Rec) {
- return Error::success();
-}
-
void TypeStreamMerger::addMapping(TypeIndex Idx) {
if (!IsSecondPass) {
assert(IndexMap.size() == slotForIndex(CurIndex) &&
@@ -178,7 +207,7 @@ void TypeStreamMerger::addMapping(TypeIndex Idx) {
}
}
-bool TypeStreamMerger::remapIndex(TypeIndex &Idx) {
+bool TypeStreamMerger::remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
// Simple types are unchanged.
if (Idx.isSimple())
return true;
@@ -187,14 +216,14 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) {
// successfully. If it refers to a type later in the stream or a record we
// had to defer, defer it until later pass.
unsigned MapPos = slotForIndex(Idx);
- if (MapPos < IndexMap.size() && IndexMap[MapPos] != Untranslated) {
- Idx = IndexMap[MapPos];
+ if (MapPos < Map.size() && Map[MapPos] != Untranslated) {
+ Idx = Map[MapPos];
return true;
}
// If this is the second pass and this index isn't in the map, then it points
// outside the current type stream, and this is a corrupt record.
- if (IsSecondPass && MapPos >= IndexMap.size()) {
+ if (IsSecondPass && MapPos >= Map.size()) {
// FIXME: Print a more useful error. We can give the current record and the
// index that we think its pointing to.
LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
@@ -208,241 +237,61 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) {
return false;
}
-//----------------------------------------------------------------------------//
-// Item records
-//----------------------------------------------------------------------------//
+bool TypeStreamMerger::remapTypeIndex(TypeIndex &Idx) {
+ // If we're mapping a pure index stream, then IndexMap only contains mappings
+ // from OldIdStream -> NewIdStream, in which case we will need to use the
+ // special mapping from OldTypeStream -> NewTypeStream which was computed
+ // externally. Regardless, we use this special map if and only if we are
+ // doing an id-only mapping.
+ if (DestTypeStream == nullptr)
+ return remapIndex(Idx, TypeLookup);
-Error TypeStreamMerger::visitKnownRecord(CVType &, FuncIdRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.ParentScope);
- Success &= remapIndex(R.FunctionType);
- return writeIdRecord(R, Success);
+ assert(TypeLookup.empty());
+ return remapIndex(Idx, IndexMap);
}
-Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFuncIdRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.ClassType);
- Success &= remapIndex(R.FunctionType);
- return writeIdRecord(R, Success);
+bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) {
+ assert(DestIdStream);
+ return remapIndex(Idx, IndexMap);
}
-Error TypeStreamMerger::visitKnownRecord(CVType &, StringIdRecord &R) {
- return writeIdRecord(R, remapIndex(R.Id));
-}
+Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest,
+ const CVTypeArray &Types) {
+ DestTypeStream = &Dest;
-Error TypeStreamMerger::visitKnownRecord(CVType &, StringListRecord &R) {
- bool Success = true;
- for (TypeIndex &Str : R.StringIndices)
- Success &= remapIndex(Str);
- return writeIdRecord(R, Success);
+ return doit(Types);
}
-Error TypeStreamMerger::visitKnownRecord(CVType &, BuildInfoRecord &R) {
- bool Success = true;
- for (TypeIndex &Arg : R.ArgIndices)
- Success &= remapIndex(Arg);
- return writeIdRecord(R, Success);
-}
+Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest,
+ ArrayRef<TypeIndex> TypeSourceToDest,
+ const CVTypeArray &Ids) {
+ DestIdStream = &Dest;
+ TypeLookup = TypeSourceToDest;
-Error TypeStreamMerger::visitKnownRecord(CVType &, UdtSourceLineRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.UDT);
- Success &= remapIndex(R.SourceFile);
- // FIXME: Translate UdtSourceLineRecord into UdtModSourceLineRecords in the
- // IPI stream.
- return writeIdRecord(R, Success);
+ return doit(Ids);
}
-Error TypeStreamMerger::visitKnownRecord(CVType &, UdtModSourceLineRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.UDT);
- Success &= remapIndex(R.SourceFile);
- return writeIdRecord(R, Success);
-}
+Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds,
+ TypeTableBuilder &DestTypes,
+ const CVTypeArray &IdsAndTypes) {
+ DestIdStream = &DestIds;
+ DestTypeStream = &DestTypes;
-//----------------------------------------------------------------------------//
-// Type records
-//----------------------------------------------------------------------------//
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, ModifierRecord &R) {
- return writeRecord(R, remapIndex(R.ModifiedType));
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, ProcedureRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.ReturnType);
- Success &= remapIndex(R.ArgumentList);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFunctionRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.ReturnType);
- Success &= remapIndex(R.ClassType);
- Success &= remapIndex(R.ThisType);
- Success &= remapIndex(R.ArgumentList);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &Type, ArgListRecord &R) {
- bool Success = true;
- for (TypeIndex &Arg : R.ArgIndices)
- Success &= remapIndex(Arg);
- if (auto EC = writeRecord(R, Success))
- return EC;
- return Error::success();
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, PointerRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.ReferentType);
- if (R.isPointerToMember())
- Success &= remapIndex(R.MemberInfo->ContainingType);
- return writeRecord(R, Success);
+ return doit(IdsAndTypes);
}
-Error TypeStreamMerger::visitKnownRecord(CVType &, ArrayRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.ElementType);
- Success &= remapIndex(R.IndexType);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, ClassRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.FieldList);
- Success &= remapIndex(R.DerivationList);
- Success &= remapIndex(R.VTableShape);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, UnionRecord &R) {
- return writeRecord(R, remapIndex(R.FieldList));
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, EnumRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.FieldList);
- Success &= remapIndex(R.UnderlyingType);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, BitFieldRecord &R) {
- return writeRecord(R, remapIndex(R.Type));
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableShapeRecord &R) {
- return writeRecord(R, true);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, TypeServer2Record &R) {
- return writeRecord(R, true);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, LabelRecord &R) {
- return writeRecord(R, true);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.CompleteClass);
- Success &= remapIndex(R.OverriddenVFTable);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &,
- MethodOverloadListRecord &R) {
- bool Success = true;
- for (OneMethodRecord &Meth : R.Methods)
- Success &= remapIndex(Meth.Type);
- return writeRecord(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) {
- // Visit the members inside the field list.
- HadUntranslatedMember = false;
- FieldListBuilder.begin();
- if (auto EC = codeview::visitMemberRecordStream(R.Data, *this))
- return EC;
-
- // Write the record if we translated all field list members.
- TypeIndex DestIdx = Untranslated;
- if (!HadUntranslatedMember)
- DestIdx = FieldListBuilder.end();
- else
- FieldListBuilder.reset();
- addMapping(DestIdx);
-
- return Error::success();
-}
-
-//----------------------------------------------------------------------------//
-// Member records
-//----------------------------------------------------------------------------//
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- NestedTypeRecord &R) {
- return writeMember(R, remapIndex(R.Type));
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, OneMethodRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.Type);
- return writeMember(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- OverloadedMethodRecord &R) {
- return writeMember(R, remapIndex(R.MethodList));
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- DataMemberRecord &R) {
- return writeMember(R, remapIndex(R.Type));
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- StaticDataMemberRecord &R) {
- return writeMember(R, remapIndex(R.Type));
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- EnumeratorRecord &R) {
- return writeMember(R, true);
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, VFPtrRecord &R) {
- return writeMember(R, remapIndex(R.Type));
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, BaseClassRecord &R) {
- return writeMember(R, remapIndex(R.Type));
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- VirtualBaseClassRecord &R) {
- bool Success = true;
- Success &= remapIndex(R.BaseType);
- Success &= remapIndex(R.VBPtrType);
- return writeMember(R, Success);
-}
-
-Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
- ListContinuationRecord &R) {
- return writeMember(R, remapIndex(R.ContinuationIndex));
-}
-
-Error TypeStreamMerger::visitUnknownType(CVType &Rec) {
- // We failed to translate a type. Translate this index as "not translated".
- addMapping(TypeIndex(SimpleTypeKind::NotTranslated));
- return errorCorruptRecord();
-}
-
-Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
- assert(IndexMap.empty());
+Error TypeStreamMerger::doit(const CVTypeArray &Types) {
LastError = Error::success();
- if (auto EC = codeview::visitTypeStream(Types, *this, Handler))
+ // We don't want to deserialize records. I guess this flag is poorly named,
+ // but it really means "Don't deserialize records before switching on the
+ // concrete type.
+ // FIXME: We can probably get even more speed here if we don't use the visitor
+ // pipeline here, but instead write the switch ourselves. I don't think it
+ // would buy us much since it's already pretty fast, but it's probably worth
+ // a few cycles.
+ if (auto EC =
+ codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler))
return EC;
// If we found bad indices but no other errors, try doing another pass and see
@@ -458,7 +307,8 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
NumBadIndices = 0;
CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
- if (auto EC = codeview::visitTypeStream(Types, *this, Handler))
+ if (auto EC =
+ codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler))
return EC;
assert(NumBadIndices <= BadIndicesRemaining &&
@@ -469,18 +319,32 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
}
}
- IndexMap.clear();
-
Error Ret = std::move(*LastError);
LastError.reset();
return Ret;
}
-Error llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestIdStream,
- TypeTableBuilder &DestTypeStream,
+Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest,
SmallVectorImpl<TypeIndex> &SourceToDest,
TypeServerHandler *Handler,
- const CVTypeArray &Types) {
- return TypeStreamMerger(DestIdStream, DestTypeStream, SourceToDest, Handler)
- .mergeStream(Types);
+ const CVTypeArray &Types) {
+ TypeStreamMerger M(SourceToDest, Handler);
+ return M.mergeTypeRecords(Dest, Types);
+}
+
+Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest,
+ ArrayRef<TypeIndex> TypeSourceToDest,
+ SmallVectorImpl<TypeIndex> &SourceToDest,
+ const CVTypeArray &Ids) {
+ TypeStreamMerger M(SourceToDest, nullptr);
+ return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
+}
+
+Error llvm::codeview::mergeTypeAndIdRecords(
+ TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes,
+ SmallVectorImpl<TypeIndex> &SourceToDest, TypeServerHandler *Handler,
+ const CVTypeArray &IdsAndTypes) {
+
+ TypeStreamMerger M(SourceToDest, Handler);
+ return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes);
}
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index a18710d6ab52..699694fde928 100644
--- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -24,8 +24,7 @@ static void error(Error &&EC) {
consumeError(std::move(EC));
}
-TypeTableCollection::TypeTableCollection(
- ArrayRef<MutableArrayRef<uint8_t>> Records)
+TypeTableCollection::TypeTableCollection(ArrayRef<ArrayRef<uint8_t>> Records)
: Records(Records), Database(Records.size()) {}
Optional<TypeIndex> TypeTableCollection::getFirst() {
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 8e7c6c43d1a2..5ed55ce4c0dc 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -60,12 +60,15 @@ typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size,
- uint32_t *Off, const RelocAddrMap *Relocs) {
+ uint32_t *Off, const RelocAddrMap *Relocs,
+ uint64_t *SectionIndex) {
if (!Relocs)
return Data.getUnsigned(Off, Size);
RelocAddrMap::const_iterator AI = Relocs->find(*Off);
if (AI == Relocs->end())
return Data.getUnsigned(Off, Size);
+ if (SectionIndex)
+ *SectionIndex = AI->second.SectionIndex;
return Data.getUnsigned(Off, Size) + AI->second.Value;
}
@@ -287,6 +290,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
getStringSection(), isLittleEndian());
}
+DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
+ // FIXME: Improve this for the case where this DWO file is really a DWP file
+ // with an index - use the index for lookup instead of a linear search.
+ for (const auto &DWOCU : dwo_compile_units())
+ if (DWOCU->getDWOId() == Hash)
+ return DWOCU.get();
+ return nullptr;
+}
+
DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) {
parseCompileUnits();
if (auto *CU = CUs.getUnitForOffset(Offset))
@@ -897,28 +909,81 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
return InliningInfo;
}
+std::shared_ptr<DWARFContext>
+DWARFContext::getDWOContext(StringRef AbsolutePath) {
+ if (auto S = DWP.lock()) {
+ DWARFContext *Ctxt = S->Context.get();
+ return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+ }
+
+ std::weak_ptr<DWOFile> *Entry = &DWOFiles[AbsolutePath];
+
+ if (auto S = Entry->lock()) {
+ DWARFContext *Ctxt = S->Context.get();
+ return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+ }
+
+ SmallString<128> DWPName;
+ Expected<OwningBinary<ObjectFile>> Obj = [&] {
+ if (!CheckedForDWP) {
+ (getFileName() + ".dwp").toVector(DWPName);
+ auto Obj = object::ObjectFile::createObjectFile(DWPName);
+ if (Obj) {
+ Entry = &DWP;
+ return Obj;
+ } else {
+ CheckedForDWP = true;
+ // TODO: Should this error be handled (maybe in a high verbosity mode)
+ // before falling back to .dwo files?
+ consumeError(Obj.takeError());
+ }
+ }
+
+ return object::ObjectFile::createObjectFile(AbsolutePath);
+ }();
+
+ if (!Obj) {
+ // TODO: Actually report errors helpfully.
+ consumeError(Obj.takeError());
+ return nullptr;
+ }
+
+ auto S = std::make_shared<DWOFile>();
+ S->File = std::move(Obj.get());
+ S->Context = llvm::make_unique<DWARFContextInMemory>(*S->File.getBinary());
+ *Entry = S;
+ auto *Ctxt = S->Context.get();
+ return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
+}
+
static Error createError(const Twine &Reason, llvm::Error E) {
return make_error<StringError>(Reason + toString(std::move(E)),
inconvertibleErrorCode());
}
-/// Returns the address of symbol relocation used against. Used for futher
-/// relocations computation. Symbol's section load address is taken in account if
-/// LoadedObjectInfo interface is provided.
-static Expected<uint64_t>
-getSymbolAddress(const object::ObjectFile &Obj, const RelocationRef &Reloc,
- const LoadedObjectInfo *L,
- std::map<SymbolRef, uint64_t> &Cache) {
- uint64_t Ret = 0;
+/// SymInfo contains information about symbol: it's address
+/// and section index which is -1LL for absolute symbols.
+struct SymInfo {
+ uint64_t Address;
+ uint64_t SectionIndex;
+};
+
+/// Returns the address of symbol relocation used against and a section index.
+/// Used for futher relocations computation. Symbol's section load address is
+static Expected<SymInfo> getSymbolInfo(const object::ObjectFile &Obj,
+ const RelocationRef &Reloc,
+ const LoadedObjectInfo *L,
+ std::map<SymbolRef, SymInfo> &Cache) {
+ SymInfo Ret = {0, (uint64_t)-1LL};
object::section_iterator RSec = Obj.section_end();
object::symbol_iterator Sym = Reloc.getSymbol();
- std::map<SymbolRef, uint64_t>::iterator CacheIt = Cache.end();
+ std::map<SymbolRef, SymInfo>::iterator CacheIt = Cache.end();
// First calculate the address of the symbol or section as it appears
// in the object file
if (Sym != Obj.symbol_end()) {
bool New;
- std::tie(CacheIt, New) = Cache.insert({*Sym, 0});
+ std::tie(CacheIt, New) = Cache.insert({*Sym, {0, 0}});
if (!New)
return CacheIt->second;
@@ -934,12 +999,15 @@ getSymbolAddress(const object::ObjectFile &Obj, const RelocationRef &Reloc,
SectOrErr.takeError());
RSec = *SectOrErr;
- Ret = *SymAddrOrErr;
+ Ret.Address = *SymAddrOrErr;
} else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
- Ret = RSec->getAddress();
+ Ret.Address = RSec->getAddress();
}
+ if (RSec != Obj.section_end())
+ Ret.SectionIndex = RSec->getIndex();
+
// If we are given load addresses for the sections, we need to adjust:
// SymAddr = (Address of Symbol Or Section in File) -
// (Address of Section in File) +
@@ -949,7 +1017,7 @@ getSymbolAddress(const object::ObjectFile &Obj, const RelocationRef &Reloc,
// we need to perform the same computation.
if (L && RSec != Obj.section_end())
if (uint64_t SectionLoadAddress = L->getSectionLoadAddress(*RSec))
- Ret += SectionLoadAddress - RSec->getAddress();
+ Ret.Address += SectionLoadAddress - RSec->getAddress();
if (CacheIt != Cache.end())
CacheIt->second = Ret;
@@ -989,8 +1057,8 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec,
}
DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
- const LoadedObjectInfo *L)
- : IsLittleEndian(Obj.isLittleEndian()),
+ const LoadedObjectInfo *L)
+ : FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()),
AddressSize(Obj.getBytesInAddress()) {
for (const SectionRef &Section : Obj.sections()) {
StringRef name;
@@ -1008,7 +1076,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
// Try to obtain an already relocated version of this section.
// Else use the unrelocated section from the object file. We'll have to
// apply relocations ourselves later.
- if (!L || !L->getLoadedSectionContents(*RelocatedSection,data))
+ if (!L || !L->getLoadedSectionContents(*RelocatedSection, data))
Section.getContents(data);
if (auto Err = maybeDecompress(Section, name, data)) {
@@ -1047,7 +1115,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
// If the section we're relocating was relocated already by the JIT,
// then we used the relocated version above, so we do not need to process
// relocations for it now.
- if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData))
+ if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData))
continue;
// In Mach-o files, the relocations do not need to be applied if
@@ -1091,29 +1159,30 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
if (Section.relocation_begin() == Section.relocation_end())
continue;
- std::map<SymbolRef, uint64_t> AddrCache;
+ // Symbol to [address, section index] cache mapping.
+ std::map<SymbolRef, SymInfo> AddrCache;
for (const RelocationRef &Reloc : Section.relocations()) {
// FIXME: it's not clear how to correctly handle scattered
// relocations.
if (isRelocScattered(Obj, Reloc))
continue;
- Expected<uint64_t> SymAddrOrErr =
- getSymbolAddress(Obj, Reloc, L, AddrCache);
- if (!SymAddrOrErr) {
- errs() << toString(SymAddrOrErr.takeError()) << '\n';
+ Expected<SymInfo> SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache);
+ if (!SymInfoOrErr) {
+ errs() << toString(SymInfoOrErr.takeError()) << '\n';
continue;
}
object::RelocVisitor V(Obj);
- uint64_t Val = V.visit(Reloc.getType(), Reloc, *SymAddrOrErr);
+ uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address);
if (V.error()) {
SmallString<32> Name;
Reloc.getTypeName(Name);
errs() << "error: failed to compute relocation: " << Name << "\n";
continue;
}
- Map->insert({Reloc.getOffset(), {Val}});
+ llvm::RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
+ Map->insert({Reloc.getOffset(), Rel});
}
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index 8da797750abd..6b5e1d3c931b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -35,8 +35,8 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr,
while (true) {
RangeListEntry entry;
uint32_t prev_offset = *offset_ptr;
- entry.StartAddress =
- getRelocatedValue(data, AddressSize, offset_ptr, &Relocs);
+ entry.StartAddress = getRelocatedValue(data, AddressSize, offset_ptr,
+ &Relocs, &entry.SectionIndex);
entry.EndAddress =
getRelocatedValue(data, AddressSize, offset_ptr, &Relocs);
@@ -69,8 +69,8 @@ DWARFDebugRangeList::getAbsoluteRanges(uint64_t BaseAddress) const {
if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
BaseAddress = RLE.EndAddress;
} else {
- Res.push_back(
- {BaseAddress + RLE.StartAddress, BaseAddress + RLE.EndAddress});
+ Res.push_back({BaseAddress + RLE.StartAddress,
+ BaseAddress + RLE.EndAddress, RLE.SectionIndex});
}
}
return Res;
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index e3bd759ba94b..fd45c77d3745 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -211,13 +211,16 @@ Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
return None;
}
-bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const {
- auto LowPcAddr = toAddress(find(DW_AT_low_pc));
+bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC,
+ uint64_t &SectionIndex) const {
+ auto F = find(DW_AT_low_pc);
+ auto LowPcAddr = toAddress(F);
if (!LowPcAddr)
return false;
if (auto HighPcAddr = getHighPC(*LowPcAddr)) {
LowPC = *LowPcAddr;
HighPC = *HighPcAddr;
+ SectionIndex = F->getSectionIndex();
return true;
}
return false;
@@ -228,9 +231,9 @@ DWARFDie::getAddressRanges() const {
if (isNULL())
return DWARFAddressRangesVector();
// Single range specified by low/high PC.
- uint64_t LowPC, HighPC;
- if (getLowAndHighPC(LowPC, HighPC))
- return {{LowPC, HighPC}};
+ uint64_t LowPC, HighPC, Index;
+ if (getLowAndHighPC(LowPC, HighPC, Index))
+ return {{LowPC, HighPC, Index}};
// Multiple ranges from .debug_ranges section.
auto RangesOffset = toSectionOffset(find(DW_AT_ranges));
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 1cbd3ea2c869..0963d7bfd713 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -333,8 +333,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
return false;
uint16_t AddrSize = (Form == DW_FORM_addr) ? U->getAddressByteSize()
: U->getRefAddrByteSize();
- Value.uval =
- getRelocatedValue(Data, AddrSize, OffsetPtr, U->getRelocMap());
+ Value.uval = getRelocatedValue(Data, AddrSize, OffsetPtr,
+ U->getRelocMap(), &Value.SectionIndex);
break;
}
case DW_FORM_exprloc:
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index c268afc222c3..c5add6a478b3 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -249,23 +249,6 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
return DieArray.size();
}
-DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath, uint64_t DWOId) {
- auto Obj = object::ObjectFile::createObjectFile(DWOPath);
- if (!Obj) {
- // TODO: Actually report errors helpfully.
- consumeError(Obj.takeError());
- return;
- }
- DWOFile = std::move(Obj.get());
- DWOContext.reset(
- cast<DWARFContext>(new DWARFContextInMemory(*DWOFile.getBinary())));
- for (const auto &DWOCU : DWOContext->dwo_compile_units())
- if (DWOCU->getDWOId() == DWOId) {
- DWOU = DWOCU.get();
- return;
- }
-}
-
bool DWARFUnit::parseDWO() {
if (isDWO)
return false;
@@ -287,16 +270,18 @@ bool DWARFUnit::parseDWO() {
auto DWOId = getDWOId();
if (!DWOId)
return false;
- DWO = llvm::make_unique<DWOHolder>(AbsolutePath, *DWOId);
- DWARFUnit *DWOCU = DWO->getUnit();
- if (!DWOCU) {
- DWO.reset();
+ auto DWOContext = Context.getDWOContext(AbsolutePath);
+ if (!DWOContext)
return false;
- }
+
+ DWARFCompileUnit *DWOCU = DWOContext->getDWOCompileUnitForHash(*DWOId);
+ if (!DWOCU)
+ return false;
+ DWO = std::shared_ptr<DWARFCompileUnit>(std::move(DWOContext), DWOCU);
// Share .debug_addr and .debug_ranges section with compile unit in .dwo
- DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase);
+ DWO->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase);
auto DWORangesBase = UnitDie.getRangesBaseAttribute();
- DWOCU->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0);
+ DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0);
return true;
}
@@ -339,8 +324,8 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
// Collect address ranges from DIEs in .dwo if necessary.
bool DWOCreated = parseDWO();
- if (DWO.get())
- DWO->getUnit()->collectAddressRanges(CURanges);
+ if (DWO)
+ DWO->collectAddressRanges(CURanges);
if (DWOCreated)
DWO.reset();
@@ -400,7 +385,7 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address,
// First, find the subroutine that contains the given address (the leaf
// of inlined chain).
DWARFDie SubroutineDIE =
- (DWO ? DWO->getUnit() : this)->getSubroutineForAddress(Address);
+ (DWO ? DWO.get() : this)->getSubroutineForAddress(Address);
while (SubroutineDIE) {
if (SubroutineDIE.isSubroutineDIE())
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index 57953cfa338e..dfdeb8414212 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -45,18 +45,17 @@ static Interval intersect(const Interval &I1, const Interval &I2) {
std::min(I1.second, I2.second));
}
-MappedBlockStream::MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks,
+MappedBlockStream::MappedBlockStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
BinaryStreamRef MsfData)
- : BlockSize(BlockSize), NumBlocks(NumBlocks), StreamLayout(Layout),
- MsfData(MsfData) {}
+ : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData) {}
std::unique_ptr<MappedBlockStream>
-MappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks,
+MappedBlockStream::createStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
BinaryStreamRef MsfData) {
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
- BlockSize, NumBlocks, Layout, MsfData);
+ BlockSize, Layout, MsfData);
}
std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream(
@@ -66,7 +65,7 @@ std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream(
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
- Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+ Layout.SB->BlockSize, SL, MsfData);
}
std::unique_ptr<MappedBlockStream>
@@ -75,7 +74,7 @@ MappedBlockStream::createDirectoryStream(const MSFLayout &Layout,
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
- return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData);
}
std::unique_ptr<MappedBlockStream>
@@ -83,7 +82,7 @@ MappedBlockStream::createFpmStream(const MSFLayout &Layout,
BinaryStreamRef MsfData) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
- return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData);
}
Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
@@ -173,7 +172,7 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
uint32_t First = Offset / BlockSize;
uint32_t Last = First;
- while (Last < NumBlocks - 1) {
+ while (Last < getNumBlocks() - 1) {
if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1)
break;
++Last;
@@ -313,17 +312,16 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
}
WritableMappedBlockStream::WritableMappedBlockStream(
- uint32_t BlockSize, uint32_t NumBlocks, const MSFStreamLayout &Layout,
+ uint32_t BlockSize, const MSFStreamLayout &Layout,
WritableBinaryStreamRef MsfData)
- : ReadInterface(BlockSize, NumBlocks, Layout, MsfData),
- WriteInterface(MsfData) {}
+ : ReadInterface(BlockSize, Layout, MsfData), WriteInterface(MsfData) {}
std::unique_ptr<WritableMappedBlockStream>
-WritableMappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks,
+WritableMappedBlockStream::createStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
WritableBinaryStreamRef MsfData) {
return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
- BlockSize, NumBlocks, Layout, MsfData);
+ BlockSize, Layout, MsfData);
}
std::unique_ptr<WritableMappedBlockStream>
@@ -334,7 +332,7 @@ WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout,
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
- return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData);
}
std::unique_ptr<WritableMappedBlockStream>
@@ -343,7 +341,7 @@ WritableMappedBlockStream::createDirectoryStream(
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
- return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData);
}
std::unique_ptr<WritableMappedBlockStream>
@@ -351,7 +349,7 @@ WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
WritableBinaryStreamRef MsfData) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
- return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData);
}
Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index c19a2f0d3110..23c7456d7772 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -129,16 +129,21 @@ uint32_t DbiStreamBuilder::calculateSectionMapStreamSize() const {
return sizeof(SecMapHeader) + sizeof(SecMapEntry) * SectionMap.size();
}
-uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const {
- uint32_t Size = 0;
- Size += sizeof(ulittle16_t); // NumModules
- Size += sizeof(ulittle16_t); // NumSourceFiles
- Size += ModiList.size() * sizeof(ulittle16_t); // ModIndices
- Size += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts
+uint32_t DbiStreamBuilder::calculateNamesOffset() const {
+ uint32_t Offset = 0;
+ Offset += sizeof(ulittle16_t); // NumModules
+ Offset += sizeof(ulittle16_t); // NumSourceFiles
+ Offset += ModiList.size() * sizeof(ulittle16_t); // ModIndices
+ Offset += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts
uint32_t NumFileInfos = 0;
for (const auto &M : ModiList)
NumFileInfos += M->source_files().size();
- Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets
+ Offset += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets
+ return Offset;
+}
+
+uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const {
+ uint32_t Size = calculateNamesOffset();
Size += calculateNamesBufferSize();
return alignTo(Size, sizeof(uint32_t));
}
@@ -157,9 +162,8 @@ uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const {
Error DbiStreamBuilder::generateFileInfoSubstream() {
uint32_t Size = calculateFileInfoSubstreamSize();
- uint32_t NameSize = calculateNamesBufferSize();
auto Data = Allocator.Allocate<uint8_t>(Size);
- uint32_t NamesOffset = Size - NameSize;
+ uint32_t NamesOffset = calculateNamesOffset();
FileInfoBuffer = MutableBinaryByteStream(MutableArrayRef<uint8_t>(Data, Size),
llvm::support::little);
@@ -207,6 +211,9 @@ Error DbiStreamBuilder::generateFileInfoSubstream() {
}
}
+ if (auto EC = NameBufferWriter.padToAlignment(sizeof(uint32_t)))
+ return EC;
+
if (NameBufferWriter.bytesRemaining() > 0)
return make_error<RawError>(raw_error_code::invalid_format,
"The names buffer contained unexpected data.");
diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
index f00567db743e..9fd90102f72c 100644
--- a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
@@ -47,7 +47,7 @@ void PDBTypeServerHandler::addSearchPath(StringRef Path) {
if (Path.empty() || !sys::fs::is_directory(Path))
return;
- SearchPaths.push_back(Path);
+ SearchPaths.insert(Path);
}
Expected<bool>
@@ -57,7 +57,13 @@ PDBTypeServerHandler::handleInternal(PDBFile &File,
if (!ExpectedTpi)
return ExpectedTpi.takeError();
- if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks))
+ // For handling a type server, we should be using whatever the callback array
+ // was
+ // that is being used for the original file. We shouldn't allow the visitor
+ // to
+ // arbitrarily stick a deserializer in there.
+ if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks,
+ VDS_BytesExternal))
return std::move(EC);
return true;
@@ -80,13 +86,14 @@ Expected<bool> PDBTypeServerHandler::handle(TypeServer2Record &TS,
cv_error_code::corrupt_record,
"TypeServer2Record does not contain filename!");
- for (auto Path : SearchPaths) {
- sys::path::append(Path, File);
- if (!sys::fs::exists(Path))
+ for (auto &Path : SearchPaths) {
+ SmallString<64> PathStr = Path.getKey();
+ sys::path::append(PathStr, File);
+ if (!sys::fs::exists(PathStr))
continue;
std::unique_ptr<IPDBSession> ThisSession;
- if (auto EC = loadDataForPDB(PDB_ReaderType::Native, Path, ThisSession)) {
+ if (auto EC = loadDataForPDB(PDB_ReaderType::Native, PathStr, ThisSession)) {
// It is not an error if this PDB fails to load, it just means that it
// doesn't match and we should continue searching.
ignoreErrors(std::move(EC));
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 8e0065873892..623afb371b50 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -8,7 +8,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+
#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
@@ -104,6 +106,8 @@ Error TpiStream::reload() {
HashStream = std::move(HS);
}
+ Types = llvm::make_unique<LazyRandomTypeCollection>(
+ TypeRecords, getNumTypeRecords(), getTypeIndexOffsets());
return Error::success();
}
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index f454ae61d965..34f4017d9828 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -2525,6 +2525,9 @@ static std::string base_name(std::string &s) {
++p0;
break;
}
+ if (!isalpha(*p0) && !isdigit(*p0) && *p0 != '_') {
+ return std::string();
+ }
}
return std::string(p0, pe);
}
@@ -2612,39 +2615,45 @@ static const char *parse_unnamed_type_name(const char *first, const char *last,
first = t0 + 1;
} break;
case 'l': {
+ size_t lambda_pos = db.names.size();
db.names.push_back(std::string("'lambda'("));
const char *t0 = first + 2;
if (first[2] == 'v') {
db.names.back().first += ')';
++t0;
} else {
- const char *t1 = parse_type(t0, last, db);
- if (t1 == t0) {
- if (!db.names.empty())
- db.names.pop_back();
- return first;
- }
- if (db.names.size() < 2)
- return first;
- auto tmp = db.names.back().move_full();
- db.names.pop_back();
- db.names.back().first.append(tmp);
- t0 = t1;
+ bool is_first_it = true;
while (true) {
- t1 = parse_type(t0, last, db);
+ long k0 = static_cast<long>(db.names.size());
+ const char *t1 = parse_type(t0, last, db);
+ long k1 = static_cast<long>(db.names.size());
if (t1 == t0)
break;
- if (db.names.size() < 2)
+ if (k0 >= k1)
return first;
- tmp = db.names.back().move_full();
- db.names.pop_back();
- if (!tmp.empty()) {
- db.names.back().first.append(", ");
- db.names.back().first.append(tmp);
- }
+ // If the call to parse_type above found a pack expansion
+ // substitution, then multiple names could have been
+ // inserted into the name table. Walk through the names,
+ // appending each onto the lambda's parameter list.
+ std::for_each(db.names.begin() + k0, db.names.begin() + k1,
+ [&](typename C::sub_type::value_type &pair) {
+ if (pair.empty())
+ return;
+ auto &lambda = db.names[lambda_pos].first;
+ if (!is_first_it)
+ lambda.append(", ");
+ is_first_it = false;
+ lambda.append(pair.move_full());
+ });
+ db.names.erase(db.names.begin() + k0, db.names.end());
t0 = t1;
}
- if (db.names.empty())
+ if (is_first_it) {
+ if (!db.names.empty())
+ db.names.pop_back();
+ return first;
+ }
+ if (db.names.empty() || db.names.size() - 1 != lambda_pos)
return first;
db.names.back().first.append(")");
}
@@ -4030,6 +4039,8 @@ static const char *parse_encoding(const char *first, const char *last, C &db) {
save_value<decltype(db.tag_templates)> sb(db.tag_templates);
if (db.encoding_depth > 1)
db.tag_templates = true;
+ save_value<decltype(db.parsed_ctor_dtor_cv)> sp(db.parsed_ctor_dtor_cv);
+ db.parsed_ctor_dtor_cv = false;
switch (*first) {
case 'G':
case 'T':
@@ -4229,6 +4240,7 @@ template <class StrT> struct string_pair {
template <size_t N> string_pair(const char (&s)[N]) : first(s, N - 1) {}
size_t size() const { return first.size() + second.size(); }
+ bool empty() const { return first.empty() && second.empty(); }
StrT full() const { return first + second; }
StrT move_full() { return std::move(first) + std::move(second); }
};
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 660843765b3f..9ce3974529bb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -737,23 +737,23 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
writeInt16BE(LocalAddress, applyPPCha(Delta));
} break;
case ELF::R_PPC64_ADDR32: {
- int32_t Result = static_cast<int32_t>(Value + Addend);
- if (SignExtend32<32>(Result) != Result)
+ int64_t Result = static_cast<int64_t>(Value + Addend);
+ if (SignExtend64<32>(Result) != Result)
llvm_unreachable("Relocation R_PPC64_ADDR32 overflow");
writeInt32BE(LocalAddress, Result);
} break;
case ELF::R_PPC64_REL24: {
uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
- int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
- if (SignExtend32<26>(delta) != delta)
+ int64_t delta = static_cast<int64_t>(Value - FinalAddress + Addend);
+ if (SignExtend64<26>(delta) != delta)
llvm_unreachable("Relocation R_PPC64_REL24 overflow");
// Generates a 'bl <address>' instruction
writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC));
} break;
case ELF::R_PPC64_REL32: {
uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
- int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
- if (SignExtend32<32>(delta) != delta)
+ int64_t delta = static_cast<int64_t>(Value - FinalAddress + Addend);
+ if (SignExtend64<32>(delta) != delta)
llvm_unreachable("Relocation R_PPC64_REL32 overflow");
writeInt32BE(LocalAddress, delta);
} break;
@@ -1324,12 +1324,13 @@ RuntimeDyldELF::processRelocationRef(
Obj.getPlatformFlags(AbiVariant);
AbiVariant &= ELF::EF_PPC64_ABI;
// A PPC branch relocation will need a stub function if the target is
- // an external symbol (Symbol::ST_Unknown) or if the target address
- // is not within the signed 24-bits branch address.
+ // an external symbol (either Value.SymbolName is set, or SymType is
+ // Symbol::ST_Unknown) or if the target address is not within the
+ // signed 24-bits branch address.
SectionEntry &Section = Sections[SectionID];
uint8_t *Target = Section.getAddressWithOffset(Offset);
bool RangeOverflow = false;
- if (SymType != SymbolRef::ST_Unknown) {
+ if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) {
if (AbiVariant != 2) {
// In the ELFv1 ABI, a function call may point to the .opd entry,
// so the final symbol value is calculated based on the relocation
@@ -1344,21 +1345,19 @@ RuntimeDyldELF::processRelocationRef(
}
uint8_t *RelocTarget =
Sections[Value.SectionID].getAddressWithOffset(Value.Addend);
- int32_t delta = static_cast<int32_t>(Target - RelocTarget);
+ int64_t delta = static_cast<int64_t>(Target - RelocTarget);
// If it is within 26-bits branch range, just set the branch target
- if (SignExtend32<26>(delta) == delta) {
+ if (SignExtend64<26>(delta) == delta) {
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
- if (Value.SymbolName)
- addRelocationForSymbol(RE, Value.SymbolName);
- else
- addRelocationForSection(RE, Value.SectionID);
+ addRelocationForSection(RE, Value.SectionID);
} else {
RangeOverflow = true;
}
}
- if (SymType == SymbolRef::ST_Unknown || RangeOverflow) {
- // It is an external symbol (SymbolRef::ST_Unknown) or within a range
- // larger than 24-bits.
+ if (Value.SymbolName || SymType == SymbolRef::ST_Unknown ||
+ RangeOverflow) {
+ // It is an external symbol (either Value.SymbolName is set, or
+ // SymType is SymbolRef::ST_Unknown) or out of range.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
// Symbol function stub already created, just relocate to it
@@ -1412,7 +1411,7 @@ RuntimeDyldELF::processRelocationRef(
RelType, 0);
Section.advanceStubOffset(getMaxStubSize());
}
- if (SymType == SymbolRef::ST_Unknown) {
+ if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) {
// Restore the TOC for external calls
if (AbiVariant == 2)
writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1)
diff --git a/lib/Fuzzer/FuzzerUtilPosix.cpp b/lib/Fuzzer/FuzzerUtilPosix.cpp
index 0161309fbf86..bc85264ac187 100644
--- a/lib/Fuzzer/FuzzerUtilPosix.cpp
+++ b/lib/Fuzzer/FuzzerUtilPosix.cpp
@@ -47,8 +47,21 @@ static void FileSizeExceedHandler(int, siginfo_t *, void *) {
static void SetSigaction(int signum,
void (*callback)(int, siginfo_t *, void *)) {
- struct sigaction sigact;
- memset(&sigact, 0, sizeof(sigact));
+ struct sigaction sigact = {};
+ if (sigaction(signum, nullptr, &sigact)) {
+ Printf("libFuzzer: sigaction failed with %d\n", errno);
+ exit(1);
+ }
+ if (sigact.sa_flags & SA_SIGINFO) {
+ if (sigact.sa_sigaction)
+ return;
+ } else {
+ if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN &&
+ sigact.sa_handler != SIG_ERR)
+ return;
+ }
+
+ sigact = {};
sigact.sa_sigaction = callback;
if (sigaction(signum, &sigact, 0)) {
Printf("libFuzzer: sigaction failed with %d\n", errno);
diff --git a/lib/Fuzzer/test/fuzzer-segv.test b/lib/Fuzzer/test/fuzzer-segv.test
index b9a6a5ce44ca..90f01932f652 100644
--- a/lib/Fuzzer/test/fuzzer-segv.test
+++ b/lib/Fuzzer/test/fuzzer-segv.test
@@ -3,3 +3,5 @@ LIBFUZZER_OWN_SEGV_HANDLER: == ERROR: libFuzzer: deadly signal
LIBFUZZER_OWN_SEGV_HANDLER: SUMMARY: libFuzzer: deadly signal
LIBFUZZER_OWN_SEGV_HANDLER: Test unit written to ./crash-
+RUN: env ASAN_OPTIONS=handle_segv=1 not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=LIBFUZZER_ASAN_SEGV_HANDLER
+LIBFUZZER_ASAN_SEGV_HANDLER: ERROR: AddressSanitizer: {{SEGV|access-violation}} on unknown address
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index acfac316e91e..4ed7b021883d 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -212,27 +212,21 @@ using IndexAttrPair = std::pair<unsigned, AttributeSet>;
/// return type, and parameters.
class AttributeListImpl final
: public FoldingSetNode,
- private TrailingObjects<AttributeListImpl, IndexAttrPair> {
+ private TrailingObjects<AttributeListImpl, AttributeSet> {
friend class AttributeList;
friend TrailingObjects;
private:
- LLVMContext &Context;
- unsigned NumSlots; ///< Number of entries in this set.
/// Bitset with a bit for each available attribute Attribute::AttrKind.
uint64_t AvailableFunctionAttrs;
+ LLVMContext &Context;
+ unsigned NumAttrSets; ///< Number of entries in this set.
// Helper fn for TrailingObjects class.
- size_t numTrailingObjects(OverloadToken<IndexAttrPair>) { return NumSlots; }
-
- /// \brief Return a pointer to the IndexAttrPair for the specified slot.
- const IndexAttrPair *getSlotPair(unsigned Slot) const {
- return getTrailingObjects<IndexAttrPair>() + Slot;
- }
+ size_t numTrailingObjects(OverloadToken<AttributeSet>) { return NumAttrSets; }
public:
- AttributeListImpl(LLVMContext &C,
- ArrayRef<std::pair<unsigned, AttributeSet>> Slots);
+ AttributeListImpl(LLVMContext &C, ArrayRef<AttributeSet> Sets);
// AttributesSetImpt is uniqued, these should not be available.
AttributeListImpl(const AttributeListImpl &) = delete;
@@ -243,41 +237,18 @@ public:
/// \brief Get the context that created this AttributeListImpl.
LLVMContext &getContext() { return Context; }
- /// \brief Return the number of slots used in this attribute list. This is
- /// the number of arguments that have an attribute set on them (including the
- /// function itself).
- unsigned getNumSlots() const { return NumSlots; }
-
- /// \brief Get the index of the given "slot" in the AttrNodes list. This index
- /// is the index of the return, parameter, or function object that the
- /// attributes are applied to, not the index into the AttrNodes list where the
- /// attributes reside.
- unsigned getSlotIndex(unsigned Slot) const {
- return getSlotPair(Slot)->first;
- }
-
- /// \brief Retrieve the attribute set node for the given "slot" in the
- /// AttrNode list.
- AttributeSet getSlotAttributes(unsigned Slot) const {
- return getSlotPair(Slot)->second;
- }
-
/// \brief Return true if the AttributeSet or the FunctionIndex has an
/// enum attribute of the given kind.
bool hasFnAttribute(Attribute::AttrKind Kind) const {
return AvailableFunctionAttrs & ((uint64_t)1) << Kind;
}
- using iterator = AttributeSet::iterator;
-
- iterator begin(unsigned Slot) const {
- return getSlotAttributes(Slot).begin();
- }
- iterator end(unsigned Slot) const { return getSlotAttributes(Slot).end(); }
+ typedef const AttributeSet *iterator;
+ iterator begin() const { return getTrailingObjects<AttributeSet>(); }
+ iterator end() const { return begin() + NumAttrSets; }
void Profile(FoldingSetNodeID &ID) const;
- static void Profile(FoldingSetNodeID &ID,
- ArrayRef<std::pair<unsigned, AttributeSet>> Nodes);
+ static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeSet> Nodes);
void dump() const;
};
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index adb31d127a2e..19b7c3027232 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -507,7 +507,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<Attribute> Attrs) {
}
AttributeSet AttributeSet::addAttribute(LLVMContext &C,
- Attribute::AttrKind Kind) const {
+ Attribute::AttrKind Kind) const {
if (hasAttribute(Kind)) return *this;
AttrBuilder B;
B.addAttribute(Kind);
@@ -515,7 +515,7 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C,
}
AttributeSet AttributeSet::addAttribute(LLVMContext &C, StringRef Kind,
- StringRef Value) const {
+ StringRef Value) const {
AttrBuilder B;
B.addAttribute(Kind, Value);
return addAttributes(C, AttributeSet::get(C, B));
@@ -788,48 +788,44 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
// AttributeListImpl Definition
//===----------------------------------------------------------------------===//
-AttributeListImpl::AttributeListImpl(
- LLVMContext &C, ArrayRef<std::pair<unsigned, AttributeSet>> Slots)
- : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) {
-#ifndef NDEBUG
- assert(!Slots.empty() && "pointless AttributeListImpl");
- if (Slots.size() >= 2) {
- auto &PrevPair = Slots.front();
- for (auto &CurPair : Slots.drop_front()) {
- assert(PrevPair.first <= CurPair.first && "Attribute set not ordered!");
- }
- }
-#endif
+/// Map from AttributeList index to the internal array index. Adding one works:
+/// FunctionIndex: ~0U -> 0
+/// ReturnIndex: 0 -> 1
+/// FirstArgIndex: 1.. -> 2..
+static constexpr unsigned attrIdxToArrayIdx(unsigned Index) {
+ // MSVC warns about '~0U + 1' wrapping around when this is called on
+ // FunctionIndex, so cast to int first.
+ return static_cast<int>(Index) + 1;
+}
+
+AttributeListImpl::AttributeListImpl(LLVMContext &C,
+ ArrayRef<AttributeSet> Sets)
+ : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) {
+ assert(!Sets.empty() && "pointless AttributeListImpl");
// There's memory after the node where we can store the entries in.
- std::copy(Slots.begin(), Slots.end(), getTrailingObjects<IndexAttrPair>());
+ std::copy(Sets.begin(), Sets.end(), getTrailingObjects<AttributeSet>());
// Initialize AvailableFunctionAttrs summary bitset.
static_assert(Attribute::EndAttrKinds <=
sizeof(AvailableFunctionAttrs) * CHAR_BIT,
"Too many attributes");
- static_assert(AttributeList::FunctionIndex == ~0u,
- "FunctionIndex should be biggest possible index");
- const auto &Last = Slots.back();
- if (Last.first == AttributeList::FunctionIndex) {
- AttributeSet Node = Last.second;
- for (Attribute I : Node) {
- if (!I.isStringAttribute())
- AvailableFunctionAttrs |= ((uint64_t)1) << I.getKindAsEnum();
- }
+ static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
+ "function should be stored in slot 0");
+ for (Attribute I : Sets[0]) {
+ if (!I.isStringAttribute())
+ AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum();
}
}
void AttributeListImpl::Profile(FoldingSetNodeID &ID) const {
- Profile(ID, makeArrayRef(getSlotPair(0), getNumSlots()));
+ Profile(ID, makeArrayRef(begin(), end()));
}
-void AttributeListImpl::Profile(
- FoldingSetNodeID &ID, ArrayRef<std::pair<unsigned, AttributeSet>> Nodes) {
- for (const auto &Node : Nodes) {
- ID.AddInteger(Node.first);
- ID.AddPointer(Node.second.SetNode);
- }
+void AttributeListImpl::Profile(FoldingSetNodeID &ID,
+ ArrayRef<AttributeSet> Sets) {
+ for (const auto &Set : Sets)
+ ID.AddPointer(Set.SetNode);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -842,24 +838,13 @@ LLVM_DUMP_METHOD void AttributeListImpl::dump() const {
// AttributeList Construction and Mutation Methods
//===----------------------------------------------------------------------===//
-AttributeList AttributeList::getImpl(
- LLVMContext &C, ArrayRef<std::pair<unsigned, AttributeSet>> Attrs) {
- assert(!Attrs.empty() && "creating pointless AttributeList");
-#ifndef NDEBUG
- unsigned LastIndex = 0;
- bool IsFirst = true;
- for (auto &&AttrPair : Attrs) {
- assert((IsFirst || LastIndex < AttrPair.first) &&
- "unsorted or duplicate AttributeList indices");
- assert(AttrPair.second.hasAttributes() && "pointless AttributeList slot");
- LastIndex = AttrPair.first;
- IsFirst = false;
- }
-#endif
+AttributeList AttributeList::getImpl(LLVMContext &C,
+ ArrayRef<AttributeSet> AttrSets) {
+ assert(!AttrSets.empty() && "pointless AttributeListImpl");
LLVMContextImpl *pImpl = C.pImpl;
FoldingSetNodeID ID;
- AttributeListImpl::Profile(ID, Attrs);
+ AttributeListImpl::Profile(ID, AttrSets);
void *InsertPoint;
AttributeListImpl *PA =
@@ -870,8 +855,8 @@ AttributeList AttributeList::getImpl(
if (!PA) {
// Coallocate entries after the AttributeListImpl itself.
void *Mem = ::operator new(
- AttributeListImpl::totalSizeToAlloc<IndexAttrPair>(Attrs.size()));
- PA = new (Mem) AttributeListImpl(C, Attrs);
+ AttributeListImpl::totalSizeToAlloc<AttributeSet>(AttrSets.size()));
+ PA = new (Mem) AttributeListImpl(C, AttrSets);
pImpl->AttrsLists.InsertNode(PA, InsertPoint);
}
@@ -912,7 +897,7 @@ AttributeList::get(LLVMContext &C,
AttrPairVec.emplace_back(Index, AttributeSet::get(C, AttrVec));
}
- return getImpl(C, AttrPairVec);
+ return get(C, AttrPairVec);
}
AttributeList
@@ -922,35 +907,76 @@ AttributeList::get(LLVMContext &C,
if (Attrs.empty())
return AttributeList();
- return getImpl(C, Attrs);
+ assert(std::is_sorted(Attrs.begin(), Attrs.end(),
+ [](const std::pair<unsigned, AttributeSet> &LHS,
+ const std::pair<unsigned, AttributeSet> &RHS) {
+ return LHS.first < RHS.first;
+ }) &&
+ "Misordered Attributes list!");
+ assert(none_of(Attrs,
+ [](const std::pair<unsigned, AttributeSet> &Pair) {
+ return !Pair.second.hasAttributes();
+ }) &&
+ "Pointless attribute!");
+
+ unsigned MaxIndex = Attrs.back().first;
+
+ SmallVector<AttributeSet, 4> AttrVec(attrIdxToArrayIdx(MaxIndex) + 1);
+ for (auto Pair : Attrs)
+ AttrVec[attrIdxToArrayIdx(Pair.first)] = Pair.second;
+
+ return getImpl(C, AttrVec);
}
AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs,
AttributeSet RetAttrs,
ArrayRef<AttributeSet> ArgAttrs) {
- SmallVector<std::pair<unsigned, AttributeSet>, 8> AttrPairs;
- if (RetAttrs.hasAttributes())
- AttrPairs.emplace_back(ReturnIndex, RetAttrs);
- size_t Index = 1;
- for (AttributeSet AS : ArgAttrs) {
- if (AS.hasAttributes())
- AttrPairs.emplace_back(Index, AS);
- ++Index;
+ // Scan from the end to find the last argument with attributes. Most
+ // arguments don't have attributes, so it's nice if we can have fewer unique
+ // AttributeListImpls by dropping empty attribute sets at the end of the list.
+ unsigned NumSets = 0;
+ for (size_t I = ArgAttrs.size(); I != 0; --I) {
+ if (ArgAttrs[I - 1].hasAttributes()) {
+ NumSets = I + 2;
+ break;
+ }
}
- if (FnAttrs.hasAttributes())
- AttrPairs.emplace_back(FunctionIndex, FnAttrs);
- if (AttrPairs.empty())
+ if (NumSets == 0) {
+ // Check function and return attributes if we didn't have argument
+ // attributes.
+ if (RetAttrs.hasAttributes())
+ NumSets = 2;
+ else if (FnAttrs.hasAttributes())
+ NumSets = 1;
+ }
+
+ // If all attribute sets were empty, we can use the empty attribute list.
+ if (NumSets == 0)
return AttributeList();
- return getImpl(C, AttrPairs);
+
+ SmallVector<AttributeSet, 8> AttrSets;
+ AttrSets.reserve(NumSets);
+ // If we have any attributes, we always have function attributes.
+ AttrSets.push_back(FnAttrs);
+ if (NumSets > 1)
+ AttrSets.push_back(RetAttrs);
+ if (NumSets > 2) {
+ // Drop the empty argument attribute sets at the end.
+ ArgAttrs = ArgAttrs.take_front(NumSets - 2);
+ AttrSets.insert(AttrSets.end(), ArgAttrs.begin(), ArgAttrs.end());
+ }
+
+ return getImpl(C, AttrSets);
}
AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
const AttrBuilder &B) {
if (!B.hasAttributes())
return AttributeList();
- AttributeSet AS = AttributeSet::get(C, B);
- std::pair<unsigned, AttributeSet> Arr[1] = {{Index, AS}};
- return getImpl(C, Arr);
+ Index = attrIdxToArrayIdx(Index);
+ SmallVector<AttributeSet, 8> AttrSets(Index + 1);
+ AttrSets[Index] = AttributeSet::get(C, B);
+ return getImpl(C, AttrSets);
}
AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
@@ -973,32 +999,22 @@ AttributeList AttributeList::get(LLVMContext &C,
ArrayRef<AttributeList> Attrs) {
if (Attrs.empty())
return AttributeList();
- if (Attrs.size() == 1) return Attrs[0];
-
- SmallVector<std::pair<unsigned, AttributeSet>, 8> AttrNodeVec;
- AttributeListImpl *A0 = Attrs[0].pImpl;
- if (A0)
- AttrNodeVec.append(A0->getSlotPair(0), A0->getSlotPair(A0->getNumSlots()));
- // Copy all attributes from Attrs into AttrNodeVec while keeping AttrNodeVec
- // ordered by index. Because we know that each list in Attrs is ordered by
- // index we only need to merge each successive list in rather than doing a
- // full sort.
- for (unsigned I = 1, E = Attrs.size(); I != E; ++I) {
- AttributeListImpl *ALI = Attrs[I].pImpl;
- if (!ALI) continue;
- SmallVector<std::pair<unsigned, AttributeSet>, 8>::iterator
- ANVI = AttrNodeVec.begin(), ANVE;
- for (const IndexAttrPair *AI = ALI->getSlotPair(0),
- *AE = ALI->getSlotPair(ALI->getNumSlots());
- AI != AE; ++AI) {
- ANVE = AttrNodeVec.end();
- while (ANVI != ANVE && ANVI->first <= AI->first)
- ++ANVI;
- ANVI = AttrNodeVec.insert(ANVI, *AI) + 1;
- }
+ if (Attrs.size() == 1)
+ return Attrs[0];
+
+ unsigned MaxSize = 0;
+ for (AttributeList List : Attrs)
+ MaxSize = std::max(MaxSize, List.getNumAttrSets());
+
+ SmallVector<AttributeSet, 8> NewAttrSets(MaxSize);
+ for (unsigned I = 0; I < MaxSize; ++I) {
+ AttrBuilder CurBuilder;
+ for (AttributeList List : Attrs)
+ CurBuilder.merge(List.getAttributes(I - 1));
+ NewAttrSets[I] = AttributeSet::get(C, CurBuilder);
}
- return getImpl(C, AttrNodeVec);
+ return getImpl(C, NewAttrSets);
}
AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
@@ -1022,29 +1038,19 @@ AttributeList AttributeList::addAttribute(LLVMContext &C,
Attribute A) const {
assert(std::is_sorted(Indices.begin(), Indices.end()));
- unsigned I = 0, E = pImpl ? pImpl->getNumSlots() : 0;
- SmallVector<IndexAttrPair, 4> AttrVec;
+ SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
+ unsigned MaxIndex = attrIdxToArrayIdx(Indices.back());
+ if (MaxIndex >= AttrSets.size())
+ AttrSets.resize(MaxIndex + 1);
+
for (unsigned Index : Indices) {
- // Add all attribute slots before the current index.
- for (; I < E && getSlotIndex(I) < Index; ++I)
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
-
- // Add the attribute at this index. If we already have attributes at this
- // index, merge them into a new set.
- AttrBuilder B;
- if (I < E && getSlotIndex(I) == Index) {
- B.merge(AttrBuilder(pImpl->getSlotAttributes(I)));
- ++I;
- }
+ Index = attrIdxToArrayIdx(Index);
+ AttrBuilder B(AttrSets[Index]);
B.addAttribute(A);
- AttrVec.emplace_back(Index, AttributeSet::get(C, B));
+ AttrSets[Index] = AttributeSet::get(C, B);
}
- // Add remaining attributes.
- for (; I < E; ++I)
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
-
- return get(C, AttrVec);
+ return getImpl(C, AttrSets);
}
AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
@@ -1064,33 +1070,16 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
"Attempt to change alignment!");
#endif
- SmallVector<IndexAttrPair, 4> AttrVec;
- uint64_t NumAttrs = pImpl->getNumSlots();
- unsigned I;
+ Index = attrIdxToArrayIdx(Index);
+ SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
+ if (Index >= AttrSets.size())
+ AttrSets.resize(Index + 1);
- // Add all the attribute slots before the one we need to merge.
- for (I = 0; I < NumAttrs; ++I) {
- if (getSlotIndex(I) >= Index)
- break;
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
- }
-
- AttrBuilder NewAttrs;
- if (I < NumAttrs && getSlotIndex(I) == Index) {
- // We need to merge the attribute sets.
- NewAttrs.merge(pImpl->getSlotAttributes(I));
- ++I;
- }
- NewAttrs.merge(B);
+ AttrBuilder Merged(AttrSets[Index]);
+ Merged.merge(B);
+ AttrSets[Index] = AttributeSet::get(C, Merged);
- // Add the new or merged attribute set at this index.
- AttrVec.emplace_back(Index, AttributeSet::get(C, NewAttrs));
-
- // Add the remaining entries.
- for (; I < NumAttrs; ++I)
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
-
- return get(C, AttrVec);
+ return getImpl(C, AttrSets);
}
AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
@@ -1109,54 +1098,38 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
return removeAttributes(C, Index, B);
}
-AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &Attrs) const {
+AttributeList
+AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
+ const AttrBuilder &AttrsToRemove) const {
if (!pImpl)
return AttributeList();
// FIXME it is not obvious how this should work for alignment.
// For now, say we can't pass in alignment, which no current use does.
- assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!");
+ assert(!AttrsToRemove.hasAlignmentAttr() && "Attempt to change alignment!");
- // Add the attribute slots before the one we're trying to add.
- SmallVector<IndexAttrPair, 4> AttrSets;
- uint64_t NumAttrs = pImpl->getNumSlots();
- AttrBuilder B;
- uint64_t LastIndex = 0;
- for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
- if (getSlotIndex(I) >= Index) {
- if (getSlotIndex(I) == Index)
- B = AttrBuilder(getSlotAttributes(LastIndex++));
- break;
- }
- LastIndex = I + 1;
- AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)});
- }
+ Index = attrIdxToArrayIdx(Index);
+ SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
+ if (Index >= AttrSets.size())
+ AttrSets.resize(Index + 1);
- // Remove the attributes from the existing set and add them.
- B.remove(Attrs);
- if (B.hasAttributes())
- AttrSets.push_back({Index, AttributeSet::get(C, B)});
-
- // Add the remaining attribute slots.
- for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
- AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)});
+ AttrBuilder B(AttrSets[Index]);
+ B.remove(AttrsToRemove);
+ AttrSets[Index] = AttributeSet::get(C, B);
- return get(C, AttrSets);
+ return getImpl(C, AttrSets);
}
AttributeList AttributeList::removeAttributes(LLVMContext &C,
unsigned WithoutIndex) const {
if (!pImpl)
return AttributeList();
-
- SmallVector<std::pair<unsigned, AttributeSet>, 4> AttrSet;
- for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) {
- unsigned Index = getSlotIndex(I);
- if (Index != WithoutIndex)
- AttrSet.push_back({Index, pImpl->getSlotAttributes(I)});
- }
- return get(C, AttrSet);
+ WithoutIndex = attrIdxToArrayIdx(WithoutIndex);
+ if (WithoutIndex >= getNumAttrSets())
+ return *this;
+ SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
+ AttrSets[WithoutIndex] = AttributeSet();
+ return getImpl(C, AttrSets);
}
AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C,
@@ -1225,20 +1198,20 @@ bool AttributeList::hasFnAttribute(StringRef Kind) const {
bool AttributeList::hasParamAttribute(unsigned ArgNo,
Attribute::AttrKind Kind) const {
- return hasAttribute(ArgNo + 1, Kind);
+ return hasAttribute(ArgNo + FirstArgIndex, Kind);
}
bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr,
unsigned *Index) const {
if (!pImpl) return false;
- for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I)
- for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I);
- II != IE; ++II)
- if (II->hasAttribute(Attr)) {
- if (Index) *Index = pImpl->getSlotIndex(I);
- return true;
- }
+ for (unsigned I = index_begin(), E = index_end(); I != E; ++I) {
+ if (hasAttribute(I, Attr)) {
+ if (Index)
+ *Index = I;
+ return true;
+ }
+ }
return false;
}
@@ -1282,60 +1255,35 @@ std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const {
}
AttributeSet AttributeList::getAttributes(unsigned Index) const {
- if (!pImpl) return AttributeSet();
-
- // Loop through to find the attribute node we want.
- for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I)
- if (pImpl->getSlotIndex(I) == Index)
- return pImpl->getSlotAttributes(I);
-
- return AttributeSet();
+ Index = attrIdxToArrayIdx(Index);
+ if (!pImpl || Index >= getNumAttrSets())
+ return AttributeSet();
+ return pImpl->begin()[Index];
}
-AttributeList::iterator AttributeList::begin(unsigned Slot) const {
- if (!pImpl)
- return ArrayRef<Attribute>().begin();
- return pImpl->begin(Slot);
+AttributeList::iterator AttributeList::begin() const {
+ return pImpl ? pImpl->begin() : nullptr;
}
-AttributeList::iterator AttributeList::end(unsigned Slot) const {
- if (!pImpl)
- return ArrayRef<Attribute>().end();
- return pImpl->end(Slot);
+AttributeList::iterator AttributeList::end() const {
+ return pImpl ? pImpl->end() : nullptr;
}
//===----------------------------------------------------------------------===//
// AttributeList Introspection Methods
//===----------------------------------------------------------------------===//
-unsigned AttributeList::getNumSlots() const {
- return pImpl ? pImpl->getNumSlots() : 0;
-}
-
-unsigned AttributeList::getSlotIndex(unsigned Slot) const {
- assert(pImpl && Slot < pImpl->getNumSlots() &&
- "Slot # out of range!");
- return pImpl->getSlotIndex(Slot);
-}
-
-AttributeSet AttributeList::getSlotAttributes(unsigned Slot) const {
- assert(pImpl && Slot < pImpl->getNumSlots() &&
- "Slot # out of range!");
- return pImpl->getSlotAttributes(Slot);
+unsigned AttributeList::getNumAttrSets() const {
+ return pImpl ? pImpl->NumAttrSets : 0;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void AttributeList::dump() const {
dbgs() << "PAL[\n";
- for (unsigned i = 0, e = getNumSlots(); i < e; ++i) {
- uint64_t Index = getSlotIndex(i);
- dbgs() << " { ";
- if (Index == ~0U)
- dbgs() << "~0U";
- else
- dbgs() << Index;
- dbgs() << " => " << getAsString(Index) << " }\n";
+ for (unsigned i = index_begin(), e = index_end(); i != e; ++i) {
+ if (getAttributes(i).hasAttributes())
+ dbgs() << " { " << i << " => " << getAsString(i) << " }\n";
}
dbgs() << "]\n";
@@ -1346,26 +1294,16 @@ LLVM_DUMP_METHOD void AttributeList::dump() const {
// AttrBuilder Method Implementations
//===----------------------------------------------------------------------===//
+// FIXME: Remove this ctor, use AttributeSet.
AttrBuilder::AttrBuilder(AttributeList AL, unsigned Index) {
- AttributeListImpl *pImpl = AL.pImpl;
- if (!pImpl) return;
-
- for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) {
- if (pImpl->getSlotIndex(I) != Index) continue;
-
- for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I);
- II != IE; ++II)
- addAttribute(*II);
-
- break;
- }
+ AttributeSet AS = AL.getAttributes(Index);
+ for (const Attribute &A : AS)
+ addAttribute(A);
}
AttrBuilder::AttrBuilder(AttributeSet AS) {
- if (AS.hasAttributes()) {
- for (const Attribute &A : AS)
- addAttribute(A);
- }
+ for (const Attribute &A : AS)
+ addAttribute(A);
}
void AttrBuilder::clear() {
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 90ca21ab91f8..1f8659d4e2ca 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -263,6 +263,10 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const {
return SuccBB;
}
+iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
+ return make_range<phi_iterator>(dyn_cast<PHINode>(&front()), nullptr);
+}
+
/// This method is used to notify a BasicBlock that the
/// specified Predecessor of the block is no longer able to reach it. This is
/// actually not used to update the Predecessor list, but is actually used to
@@ -389,13 +393,11 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
// Loop over any phi nodes in the basic block, updating the BB field of
// incoming values...
BasicBlock *Successor = *I;
- PHINode *PN;
- for (BasicBlock::iterator II = Successor->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- int IDX = PN->getBasicBlockIndex(this);
- while (IDX != -1) {
- PN->setIncomingBlock((unsigned)IDX, New);
- IDX = PN->getBasicBlockIndex(this);
+ for (auto &PN : Successor->phis()) {
+ int Idx = PN.getBasicBlockIndex(this);
+ while (Idx != -1) {
+ PN.setIncomingBlock((unsigned)Idx, New);
+ Idx = PN.getBasicBlockIndex(this);
}
}
}
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index 3168ec6944a3..b7e3f0c6779e 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -163,7 +163,7 @@ void DebugLoc::reparentDebugInfo(Instruction &I, DISubprogram *OrigSP,
// Fix up debug variables to point to NewSP.
auto reparentVar = [&](DILocalVariable *Var) {
- return DILocalVariable::getDistinct(
+ return DILocalVariable::get(
Ctx,
cast<DILocalScope>(
reparentScope(Ctx, Var->getScope(), OrigSP, NewSP, Cache)),
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 01d4ed6c8eef..d7baa9ebc223 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -454,6 +454,9 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i,
// question is a call argument; or be indirectly implied by the kind of its
// containing operand bundle, if the operand is a bundle operand.
+ if (i == AttributeList::ReturnIndex)
+ return hasRetAttr(Kind);
+
// FIXME: Avoid these i - 1 calculations and update the API to use zero-based
// indices.
if (i < (getNumArgOperands() + 1))
@@ -779,6 +782,9 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i,
// question is an invoke argument; or be indirectly implied by the kind of its
// containing operand bundle, if the operand is a bundle operand.
+ if (i == AttributeList::ReturnIndex)
+ return hasRetAttr(Kind);
+
// FIXME: Avoid these i - 1 calculations and update the API to use zero-based
// indices.
if (i < (getNumArgOperands() + 1))
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index c9814a96bea6..94e115a6a78d 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -97,7 +97,9 @@ Value *InstrProfIncrementInst::getStep() const {
ConstrainedFPIntrinsic::RoundingMode
ConstrainedFPIntrinsic::getRoundingMode() const {
- Metadata *MD = dyn_cast<MetadataAsValue>(getOperand(2))->getMetadata();
+ unsigned NumOperands = getNumArgOperands();
+ Metadata *MD =
+ dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
if (!MD || !isa<MDString>(MD))
return rmInvalid;
StringRef RoundingArg = cast<MDString>(MD)->getString();
@@ -115,7 +117,9 @@ ConstrainedFPIntrinsic::getRoundingMode() const {
ConstrainedFPIntrinsic::ExceptionBehavior
ConstrainedFPIntrinsic::getExceptionBehavior() const {
- Metadata *MD = dyn_cast<MetadataAsValue>(getOperand(3))->getMetadata();
+ unsigned NumOperands = getNumArgOperands();
+ Metadata *MD =
+ dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
if (!MD || !isa<MDString>(MD))
return ebInvalid;
StringRef ExceptionArg = cast<MDString>(MD)->getString();
@@ -125,3 +129,21 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
.Case("fpexcept.strict", ebStrict)
.Default(ebInvalid);
}
+
+bool ConstrainedFPIntrinsic::isUnaryOp() const {
+ switch (getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::experimental_constrained_sqrt:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_nearbyint:
+ return true;
+ }
+}
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 12c258d95f52..95673e515a55 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -481,7 +481,7 @@ PICLevel::Level Module::getPICLevel() const {
}
void Module::setPICLevel(PICLevel::Level PL) {
- addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL);
+ addModuleFlag(ModFlagBehavior::Max, "PIC Level", PL);
}
PIELevel::Level Module::getPIELevel() const {
@@ -495,7 +495,7 @@ PIELevel::Level Module::getPIELevel() const {
}
void Module::setPIELevel(PIELevel::Level PL) {
- addModuleFlag(ModFlagBehavior::Error, "PIE Level", PL);
+ addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL);
}
void Module::setProfileSummary(Metadata *M) {
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 21e8048442be..a8523236ac9f 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1282,6 +1282,13 @@ Verifier::visitModuleFlag(const MDNode *Op,
// These behavior types accept any value.
break;
+ case Module::Max: {
+ Assert(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)),
+ "invalid value for 'max' module flag (expected constant integer)",
+ Op->getOperand(2));
+ break;
+ }
+
case Module::Require: {
// The value should itself be an MDNode with two operands, a flag ID (an
// MDString), and a value.
@@ -1729,17 +1736,9 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) {
}
bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
- if (Attrs.getNumSlots() == 0)
- return true;
-
- unsigned LastSlot = Attrs.getNumSlots() - 1;
- unsigned LastIndex = Attrs.getSlotIndex(LastSlot);
- if (LastIndex <= Params ||
- (LastIndex == AttributeList::FunctionIndex &&
- (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params)))
- return true;
-
- return false;
+ // There shouldn't be more attribute sets than there are parameters plus the
+ // function and return value.
+ return Attrs.getNumAttrSets() <= Params + 2;
}
/// Verify that statepoint intrinsic is well formed.
@@ -3967,6 +3966,18 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_sqrt:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_nearbyint:
visitConstrainedFPIntrinsic(
cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
break;
@@ -4336,7 +4347,12 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
}
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
- Assert(isa<MetadataAsValue>(FPI.getOperand(2)),
+ unsigned NumOperands = FPI.getNumArgOperands();
+ Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
+ "invalid arguments for constrained FP intrinsic", &FPI);
+ Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
+ "invalid exception behavior argument", &FPI);
+ Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
"invalid rounding mode argument", &FPI);
Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid,
"invalid rounding mode argument", &FPI);
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index c73b6b6b15c1..9efc095f9fcf 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -114,7 +114,10 @@ static void computeCacheKey(
AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
for (auto &A : Conf.MAttrs)
AddString(A);
- AddUnsigned(Conf.RelocModel);
+ if (Conf.RelocModel)
+ AddUnsigned(*Conf.RelocModel);
+ else
+ AddUnsigned(-1);
AddUnsigned(Conf.CodeModel);
AddUnsigned(Conf.CGOptLevel);
AddUnsigned(Conf.CGFileType);
@@ -539,16 +542,10 @@ Error LTO::addRegularLTO(BitcodeModule BM,
if (Sym.isUndefined())
continue;
Keep.push_back(GV);
- switch (GV->getLinkage()) {
- default:
- break;
- case GlobalValue::LinkOnceAnyLinkage:
- GV->setLinkage(GlobalValue::WeakAnyLinkage);
- break;
- case GlobalValue::LinkOnceODRLinkage:
- GV->setLinkage(GlobalValue::WeakODRLinkage);
- break;
- }
+ GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
+ if (GlobalValue::isLinkOnceLinkage(OriginalLinkage))
+ GV->setLinkage(GlobalValue::getWeakLinkage(
+ GlobalValue::isLinkOnceODRLinkage(OriginalLinkage)));
} else if (isa<GlobalObject>(GV) &&
(GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
GV->hasAvailableExternallyLinkage()) &&
@@ -999,10 +996,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
ExportedGUIDs.insert(GUID);
}
- auto isPrevailing = [&](GlobalValue::GUID GUID,
- const GlobalValueSummary *S) {
- return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
- };
auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
const auto &ExportList = ExportLists.find(ModuleIdentifier);
return (ExportList != ExportLists.end() &&
@@ -1010,17 +1003,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
ExportedGUIDs.count(GUID);
};
thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported);
-
- auto recordNewLinkage = [&](StringRef ModuleIdentifier,
- GlobalValue::GUID GUID,
- GlobalValue::LinkageTypes NewLinkage) {
- ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
- };
-
- thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
- recordNewLinkage);
}
+ auto isPrevailing = [&](GlobalValue::GUID GUID,
+ const GlobalValueSummary *S) {
+ return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
+ };
+ auto recordNewLinkage = [&](StringRef ModuleIdentifier,
+ GlobalValue::GUID GUID,
+ GlobalValue::LinkageTypes NewLinkage) {
+ ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
+ };
+ thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
+ recordNewLinkage);
+
std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
AddStream, Cache);
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index 30447c528af1..668667a53562 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -117,15 +117,22 @@ Error Config::addSaveTemps(std::string OutputFileName,
namespace {
std::unique_ptr<TargetMachine>
-createTargetMachine(Config &Conf, StringRef TheTriple,
- const Target *TheTarget) {
+createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
+ StringRef TheTriple = M.getTargetTriple();
SubtargetFeatures Features;
Features.getDefaultSubtargetFeatures(Triple(TheTriple));
for (const std::string &A : Conf.MAttrs)
Features.AddFeature(A);
+ Reloc::Model RelocModel;
+ if (Conf.RelocModel)
+ RelocModel = *Conf.RelocModel;
+ else
+ RelocModel =
+ M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
+
return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
- TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel,
+ TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
Conf.CodeModel, Conf.CGOptLevel));
}
@@ -311,7 +318,7 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
std::unique_ptr<TargetMachine> TM =
- createTargetMachine(C, MPartInCtx->getTargetTriple(), T);
+ createTargetMachine(C, T, *MPartInCtx);
codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
},
@@ -360,8 +367,7 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
if (!TOrErr)
return TOrErr.takeError();
- std::unique_ptr<TargetMachine> TM =
- createTargetMachine(C, Mod->getTargetTriple(), *TOrErr);
+ std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupOptimizationRemarks(
@@ -397,8 +403,7 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
if (!TOrErr)
return TOrErr.takeError();
- std::unique_ptr<TargetMachine> TM =
- createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr);
+ std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod);
if (Conf.CodeGenOnly) {
codegen(Conf, TM.get(), AddStream, Task, Mod);
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index c0af21aa148c..defad1904989 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -1157,6 +1157,11 @@ Error IRLinker::linkModuleFlagsMetadata() {
mdconst::extract<ConstantInt>(DstOp->getOperand(0));
unsigned DstBehaviorValue = DstBehavior->getZExtValue();
+ auto overrideDstValue = [&]() {
+ DstModFlags->setOperand(DstIndex, SrcOp);
+ Flags[ID].first = SrcOp;
+ };
+
// If either flag has override behavior, handle it first.
if (DstBehaviorValue == Module::Override) {
// Diagnose inconsistent flags which both have override behavior.
@@ -1167,8 +1172,7 @@ Error IRLinker::linkModuleFlagsMetadata() {
continue;
} else if (SrcBehaviorValue == Module::Override) {
// Update the destination flag to that of the source.
- DstModFlags->setOperand(DstIndex, SrcOp);
- Flags[ID].first = SrcOp;
+ overrideDstValue();
continue;
}
@@ -1204,6 +1208,15 @@ Error IRLinker::linkModuleFlagsMetadata() {
}
continue;
}
+ case Module::Max: {
+ ConstantInt *DstValue =
+ mdconst::extract<ConstantInt>(DstOp->getOperand(2));
+ ConstantInt *SrcValue =
+ mdconst::extract<ConstantInt>(SrcOp->getOperand(2));
+ if (SrcValue->getZExtValue() > DstValue->getZExtValue())
+ overrideDstValue();
+ break;
+ }
case Module::Append: {
MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 0540c4c47a3f..8c3df36cfb48 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -422,6 +422,7 @@ static void ApplyRelocations(
RelEntry.Offset;
switch (RelEntry.Type) {
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: {
+ assert(SymbolIndices.count(RelEntry.Symbol));
uint32_t Index = SymbolIndices[RelEntry.Symbol];
assert(RelEntry.Addend == 0);
@@ -429,6 +430,7 @@ static void ApplyRelocations(
break;
}
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
+ assert(SymbolIndices.count(RelEntry.Symbol));
uint32_t Index = SymbolIndices[RelEntry.Symbol];
assert(RelEntry.Addend == 0);
@@ -448,6 +450,7 @@ static void ApplyRelocations(
break;
}
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
+ assert(SymbolIndices.count(RelEntry.Symbol));
uint32_t Index = SymbolIndices[RelEntry.Symbol];
assert(RelEntry.Addend == 0);
@@ -478,6 +481,7 @@ WriteRelocations(ArrayRef<WasmRelocationEntry> Relocations,
uint64_t Offset = RelEntry.Offset +
RelEntry.FixupSection->getSectionOffset() + HeaderSize;
+ assert(SymbolIndices.count(RelEntry.Symbol));
uint32_t Index = SymbolIndices[RelEntry.Symbol];
int64_t Addend = RelEntry.Addend;
@@ -726,10 +730,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (IsAddressTaken.count(&WS))
TableElems.push_back(Index);
} else {
- // For now, ignore temporary non-function symbols.
- if (S.isTemporary())
- continue;
-
if (WS.getOffset() != 0)
report_fatal_error("data sections must contain one variable each");
if (!WS.getSize())
@@ -777,20 +777,18 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
}
}
- // For each external global, prepare a corresponding wasm global
- // holding its address.
- if (WS.isExternal()) {
- Index = NumGlobalImports + Globals.size();
-
- WasmGlobal Global;
- Global.Type = PtrType;
- Global.IsMutable = false;
- Global.HasImport = false;
- Global.InitialValue = DataSection.getSectionOffset();
- Global.ImportIndex = 0;
- SymbolIndices[&WS] = Index;
- Globals.push_back(Global);
- }
+ // For each global, prepare a corresponding wasm global holding its
+ // address. For externals these will also be named exports.
+ Index = NumGlobalImports + Globals.size();
+
+ WasmGlobal Global;
+ Global.Type = PtrType;
+ Global.IsMutable = false;
+ Global.HasImport = false;
+ Global.InitialValue = DataSection.getSectionOffset();
+ Global.ImportIndex = 0;
+ SymbolIndices[&WS] = Index;
+ Globals.push_back(Global);
}
}
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 28531feccfe1..7372f24cb9a8 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -293,6 +293,10 @@ uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const {
return Result;
}
+uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ return toSec(Sec) - SectionTable;
+}
+
uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const {
return getSectionSize(toSec(Ref));
}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 3d3fa07db3f4..bfb8875f47d4 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1820,6 +1820,10 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
return getSection(Sec).addr;
}
+uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ return Sec.d.a;
+}
+
uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
// In the case if a malformed Mach-O file where the section offset is past
// the end of the file or some part of the section size is past the end of
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index 058686e4db9e..f565d7a33e55 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -743,6 +743,10 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
+uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ return Sec.d.a;
+}
+
uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const {
const WasmSection &S = Sections[Sec.d.a];
return S.Content.size();
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 7eafb00855d7..b00d21ec8f67 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -186,6 +186,20 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
return 0;
}
+std::vector<std::string> OptTable::findByPrefix(StringRef Cur) const {
+ std::vector<std::string> Ret;
+ for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) {
+ if (!In.Prefixes)
+ continue;
+ for (int I = 0; In.Prefixes[I]; I++) {
+ std::string S = std::string(In.Prefixes[I]) + std::string(In.Name);
+ if (StringRef(S).startswith(Cur))
+ Ret.push_back(S);
+ }
+ }
+ return Ret;
+}
+
Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
unsigned FlagsToInclude,
unsigned FlagsToExclude) const {
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 6ece7965ce64..abc53e97aa72 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -155,6 +155,11 @@ static cl::opt<bool>
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run Partial inlinining pass"));
+static cl::opt<bool>
+ RunNewGVN("enable-npm-newgvn", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run NewGVN instead of GVN"));
+
static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -336,10 +341,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
LPM1.addPass(LICMPass());
-#if 0
- // The LoopUnswitch pass isn't yet ported to the new pass manager.
- LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3));
-#endif
+ LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(IndVarSimplifyPass());
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(LoopDeletionPass());
@@ -357,7 +359,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (Level != O1) {
// These passes add substantial compile time so skip them at O1.
FPM.addPass(MergedLoadStoreMotionPass());
- FPM.addPass(GVN());
+ if (RunNewGVN)
+ FPM.addPass(NewGVNPass());
+ else
+ FPM.addPass(GVN());
}
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
@@ -429,6 +434,11 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
}
+ // Delete anything that is now dead to make sure that we don't instrument
+ // dead code. Instrumentation can end up keeping dead code around and
+ // dramatically increase code size.
+ MPM.addPass(GlobalDCEPass());
+
if (RunProfileGen) {
MPM.addPass(PGOInstrumentationGen());
@@ -774,7 +784,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// FIXME: once we fix LoopPass Manager, add LICM here.
// FIXME: once we provide support for enabling MLSM, add it here.
// FIXME: once we provide support for enabling NewGVN, add it here.
- MainFPM.addPass(GVN());
+ if (RunNewGVN)
+ MainFPM.addPass(NewGVNPass());
+ else
+ MainFPM.addPass(GVN());
// Remove dead memcpy()'s.
MainFPM.addPass(MemCpyOptPass());
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 64a65ccc11a1..a2b7c94f9dec 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -355,7 +355,7 @@ void InstrProfSymtab::create(Module &M, bool InLTO) {
finalizeSymtab();
}
-Error collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
bool doCompression, std::string &Result) {
assert(!NameStrs.empty() && "No name data to emit");
@@ -403,7 +403,7 @@ StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) {
return NameStr;
}
-Error collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
std::string &Result, bool doCompression) {
std::vector<std::string> NameStrs;
for (auto *NameVar : NameVars) {
@@ -978,22 +978,22 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
}
// Parse the value profile options.
-void getMemOPSizeRangeFromOption(std::string MemOPSizeRange,
- int64_t &RangeStart, int64_t &RangeLast) {
+void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
+ int64_t &RangeLast) {
static const int64_t DefaultMemOPSizeRangeStart = 0;
static const int64_t DefaultMemOPSizeRangeLast = 8;
RangeStart = DefaultMemOPSizeRangeStart;
RangeLast = DefaultMemOPSizeRangeLast;
if (!MemOPSizeRange.empty()) {
- auto Pos = MemOPSizeRange.find(":");
+ auto Pos = MemOPSizeRange.find(':');
if (Pos != std::string::npos) {
if (Pos > 0)
- RangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str());
+ MemOPSizeRange.substr(0, Pos).getAsInteger(10, RangeStart);
if (Pos < MemOPSizeRange.size() - 1)
- RangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str());
+ MemOPSizeRange.substr(Pos + 1).getAsInteger(10, RangeLast);
} else
- RangeLast = atoi(MemOPSizeRange.c_str());
+ MemOPSizeRange.getAsInteger(10, RangeLast);
}
assert(RangeLast >= RangeStart);
}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 2a916b14bc22..e9716e3b1e87 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2045,7 +2045,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
if (isSingleWord()) {
char Buffer[65];
- char *BufPtr = Buffer+65;
+ char *BufPtr = std::end(Buffer);
uint64_t N;
if (!Signed) {
@@ -2069,7 +2069,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
*--BufPtr = Digits[N % Radix];
N /= Radix;
}
- Str.append(BufPtr, Buffer+65);
+ Str.append(BufPtr, std::end(Buffer));
return;
}
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
index 5c277448a765..862232971162 100644
--- a/lib/Support/BinaryStreamReader.cpp
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -42,29 +42,30 @@ Error BinaryStreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) {
}
Error BinaryStreamReader::readCString(StringRef &Dest) {
- // TODO: This could be made more efficient by using readLongestContiguousChunk
- // and searching for null terminators in the resulting buffer.
-
- uint32_t Length = 0;
- // First compute the length of the string by reading 1 byte at a time.
uint32_t OriginalOffset = getOffset();
- const char *C;
+ uint32_t FoundOffset = 0;
while (true) {
- if (auto EC = readObject(C))
+ uint32_t ThisOffset = getOffset();
+ ArrayRef<uint8_t> Buffer;
+ if (auto EC = readLongestContiguousChunk(Buffer))
return EC;
- if (*C == '\0')
+ StringRef S(reinterpret_cast<const char *>(Buffer.begin()), Buffer.size());
+ size_t Pos = S.find_first_of('\0');
+ if (LLVM_LIKELY(Pos != StringRef::npos)) {
+ FoundOffset = Pos + ThisOffset;
break;
- ++Length;
+ }
}
- // Now go back and request a reference for that many bytes.
- uint32_t NewOffset = getOffset();
+ assert(FoundOffset >= OriginalOffset);
+
setOffset(OriginalOffset);
+ size_t Length = FoundOffset - OriginalOffset;
if (auto EC = readFixedString(Dest, Length))
return EC;
- // Now set the offset back to where it was after we calculated the length.
- setOffset(NewOffset);
+ // Now set the offset back to after the null terminator.
+ setOffset(FoundOffset + 1);
return Error::success();
}
diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp
index 39fd218d3f07..aa9507c189ed 100644
--- a/lib/Support/ConvertUTF.cpp
+++ b/lib/Support/ConvertUTF.cpp
@@ -53,6 +53,35 @@
#endif
#include <assert.h>
+
+/*
+ * This code extensively uses fall-through switches.
+ * Keep the compiler from warning about that.
+ */
+#if defined(__clang__) && defined(__has_warning)
+# if __has_warning("-Wimplicit-fallthrough")
+# define ConvertUTF_DISABLE_WARNINGS \
+ _Pragma("clang diagnostic push") \
+ _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"")
+# define ConvertUTF_RESTORE_WARNINGS \
+ _Pragma("clang diagnostic pop")
+# endif
+#elif defined(__GNUC__) && __GNUC__ > 6
+# define ConvertUTF_DISABLE_WARNINGS \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+# define ConvertUTF_RESTORE_WARNINGS \
+ _Pragma("GCC diagnostic pop")
+#endif
+#ifndef ConvertUTF_DISABLE_WARNINGS
+# define ConvertUTF_DISABLE_WARNINGS
+#endif
+#ifndef ConvertUTF_RESTORE_WARNINGS
+# define ConvertUTF_RESTORE_WARNINGS
+#endif
+
+ConvertUTF_DISABLE_WARNINGS
+
namespace llvm {
static const int halfShift = 10; /* used for shifting by 10 bits */
@@ -708,3 +737,5 @@ ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
--------------------------------------------------------------------- */
} // namespace llvm
+
+ConvertUTF_RESTORE_WARNINGS
diff --git a/lib/Support/DebugCounter.cpp b/lib/Support/DebugCounter.cpp
index 29dae8a20f00..a10ac8e85396 100644
--- a/lib/Support/DebugCounter.cpp
+++ b/lib/Support/DebugCounter.cpp
@@ -6,6 +6,7 @@
using namespace llvm;
+namespace {
// This class overrides the default list implementation of printing so we
// can pretty print the list of debug counter options. This type of
// dynamic option is pretty rare (basically this and pass lists).
@@ -40,6 +41,7 @@ private:
}
}
};
+} // namespace
// Create our command line option.
static DebugCounterList DebugCounterOption(
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 1541a5726302..9398789cea87 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -127,10 +127,15 @@ void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) {
DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName,
std::string *Err) {
- SmartScopedLock<true> Lock(*SymbolsMutex);
+ // Force OpenedHandles to be added into the ManagedStatic list before any
+ // ManagedStatic can be added from static constructors in HandleSet::DLOpen.
+ HandleSet& HS = *OpenedHandles;
+
void *Handle = HandleSet::DLOpen(FileName, Err);
- if (Handle != &Invalid)
- OpenedHandles->AddLibrary(Handle, /*IsProcess*/ FileName == nullptr);
+ if (Handle != &Invalid) {
+ SmartScopedLock<true> Lock(*SymbolsMutex);
+ HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr);
+ }
return DynamicLibrary(Handle);
}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index d0e1d50e8ccb..f70b77da8de4 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -43,6 +43,7 @@ std::string llvm::DOT::EscapeString(const std::string &Label) {
Str.erase(Str.begin()+i); continue;
default: break;
}
+ LLVM_FALLTHROUGH;
case '{': case '}':
case '<': case '>':
case '|': case '"':
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 6a0b64fb884d..234f7439a546 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -1401,6 +1401,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
+ Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
// Enable protection keys
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 9fd6652ce4b8..80bef558258d 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -1156,6 +1156,7 @@ file_magic identify_magic(StringRef Magic) {
case 0xc4: // ARMNT Windows
if (Magic[1] == 0x01)
return file_magic::coff_object;
+ LLVM_FALLTHROUGH;
case 0x90: // PA-RISC Windows
case 0x68: // mc68K Windows
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index b0e3d6898cae..318e21da999d 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -34,6 +34,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case mips64: return "mips64";
case mips64el: return "mips64el";
case msp430: return "msp430";
+ case nios2: return "nios2";
case ppc64: return "powerpc64";
case ppc64le: return "powerpc64le";
case ppc: return "powerpc";
@@ -98,6 +99,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case mips64:
case mips64el: return "mips";
+ case nios2: return "nios2";
+
case hexagon: return "hexagon";
case amdgcn: return "amdgcn";
@@ -262,6 +265,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("mips64", mips64)
.Case("mips64el", mips64el)
.Case("msp430", msp430)
+ .Case("nios2", nios2)
.Case("ppc64", ppc64)
.Case("ppc32", ppc)
.Case("ppc", ppc)
@@ -384,6 +388,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
.Cases("mips64", "mips64eb", Triple::mips64)
.Case("mips64el", Triple::mips64el)
+ .Case("nios2", Triple::nios2)
.Case("r600", Triple::r600)
.Case("amdgcn", Triple::amdgcn)
.Case("riscv32", Triple::riscv32)
@@ -625,6 +630,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::mips64el:
case Triple::mipsel:
case Triple::msp430:
+ case Triple::nios2:
case Triple::nvptx:
case Triple::nvptx64:
case Triple::ppc64le:
@@ -643,11 +649,13 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::tce:
case Triple::tcele:
case Triple::thumbeb:
- case Triple::wasm32:
- case Triple::wasm64:
case Triple::xcore:
return Triple::ELF;
+ case Triple::wasm32:
+ case Triple::wasm64:
+ return Triple::Wasm;
+
case Triple::ppc:
case Triple::ppc64:
if (T.isOSDarwin())
@@ -1160,6 +1168,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::le32:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
+ case llvm::Triple::nios2:
case llvm::Triple::nvptx:
case llvm::Triple::ppc:
case llvm::Triple::r600:
@@ -1243,6 +1252,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::le32:
case Triple::mips:
case Triple::mipsel:
+ case Triple::nios2:
case Triple::nvptx:
case Triple::ppc:
case Triple::r600:
@@ -1290,6 +1300,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::kalimba:
case Triple::lanai:
case Triple::msp430:
+ case Triple::nios2:
case Triple::r600:
case Triple::tce:
case Triple::tcele:
@@ -1361,6 +1372,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::le32:
case Triple::le64:
case Triple::msp430:
+ case Triple::nios2:
case Triple::nvptx64:
case Triple::nvptx:
case Triple::r600:
@@ -1447,6 +1459,7 @@ bool Triple::isLittleEndian() const {
case Triple::mips64el:
case Triple::mipsel:
case Triple::msp430:
+ case Triple::nios2:
case Triple::nvptx64:
case Triple::nvptx:
case Triple::ppc64le:
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index c17a6f6e1ea6..f1496393e55e 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -2116,6 +2116,7 @@ void MappingNode::increment() {
break;
default:
setError("Unexpected token. Expected Key or Block End", T);
+ LLVM_FALLTHROUGH;
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
@@ -2128,6 +2129,7 @@ void MappingNode::increment() {
return increment();
case Token::TK_FlowMappingEnd:
getNext();
+ LLVM_FALLTHROUGH;
case Token::TK_Error:
// Set this to end iterator.
IsAtEnd = true;
@@ -2170,6 +2172,7 @@ void SequenceNode::increment() {
default:
setError( "Unexpected token. Expected Block Entry or Block End."
, T);
+ LLVM_FALLTHROUGH;
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
@@ -2198,6 +2201,7 @@ void SequenceNode::increment() {
return increment();
case Token::TK_FlowSequenceEnd:
getNext();
+ LLVM_FALLTHROUGH;
case Token::TK_Error:
// Set this to end iterator.
IsAtEnd = true;
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 33d3de5daf33..09f9759ce7da 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -219,7 +219,6 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
static FoldingSet<BitsInit> ThePool;
- static std::vector<BitsInit*> TheActualPool;
FoldingSetNodeID ID;
ProfileBitsInit(ID, Range);
@@ -234,7 +233,6 @@ BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
ThePool.InsertNode(I, IP);
- TheActualPool.push_back(I);
return I;
}
@@ -456,7 +454,6 @@ static void ProfileListInit(FoldingSetNodeID &ID,
ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
static FoldingSet<ListInit> ThePool;
- static std::vector<ListInit*> TheActualPool;
FoldingSetNodeID ID;
ProfileListInit(ID, Range, EltTy);
@@ -471,7 +468,6 @@ ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
ThePool.InsertNode(I, IP);
- TheActualPool.push_back(I);
return I;
}
@@ -606,7 +602,6 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) {
UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
static FoldingSet<UnOpInit> ThePool;
- static std::vector<UnOpInit*> TheActualPool;
FoldingSetNodeID ID;
ProfileUnOpInit(ID, Opc, LHS, Type);
@@ -617,7 +612,6 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type);
ThePool.InsertNode(I, IP);
- TheActualPool.push_back(I);
return I;
}
@@ -752,7 +746,6 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS,
BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS,
Init *RHS, RecTy *Type) {
static FoldingSet<BinOpInit> ThePool;
- static std::vector<BinOpInit*> TheActualPool;
FoldingSetNodeID ID;
ProfileBinOpInit(ID, Opc, LHS, RHS, Type);
@@ -763,7 +756,6 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS,
BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type);
ThePool.InsertNode(I, IP);
- TheActualPool.push_back(I);
return I;
}
@@ -910,7 +902,6 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS,
TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
RecTy *Type) {
static FoldingSet<TernOpInit> ThePool;
- static std::vector<TernOpInit*> TheActualPool;
FoldingSetNodeID ID;
ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type);
@@ -921,7 +912,6 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
ThePool.InsertNode(I, IP);
- TheActualPool.push_back(I);
return I;
}
@@ -1503,7 +1493,6 @@ DagInit *
DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
ArrayRef<StringInit *> NameRange) {
static FoldingSet<DagInit> ThePool;
- static std::vector<DagInit*> TheActualPool;
FoldingSetNodeID ID;
ProfileDagInit(ID, V, VN, ArgRange, NameRange);
@@ -1512,9 +1501,13 @@ DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
return I;
- DagInit *I = new(Allocator) DagInit(V, VN, ArgRange, NameRange);
+ void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *, StringInit *>(ArgRange.size(), NameRange.size()), alignof(BitsInit));
+ DagInit *I = new(Mem) DagInit(V, VN, ArgRange.size(), NameRange.size());
+ std::uninitialized_copy(ArgRange.begin(), ArgRange.end(),
+ I->getTrailingObjects<Init *>());
+ std::uninitialized_copy(NameRange.begin(), NameRange.end(),
+ I->getTrailingObjects<StringInit *>());
ThePool.InsertNode(I, IP);
- TheActualPool.push_back(I);
return I;
}
@@ -1533,7 +1526,7 @@ DagInit::get(Init *V, StringInit *VN,
}
void DagInit::Profile(FoldingSetNodeID &ID) const {
- ProfileDagInit(ID, Val, ValName, Args, ArgNames);
+ ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects<Init *>(), NumArgs), makeArrayRef(getTrailingObjects<StringInit *>(), NumArgNames));
}
Init *DagInit::convertInitializerTo(RecTy *Ty) const {
@@ -1545,9 +1538,9 @@ Init *DagInit::convertInitializerTo(RecTy *Ty) const {
Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
SmallVector<Init*, 8> NewArgs;
- NewArgs.reserve(Args.size());
+ NewArgs.reserve(arg_size());
bool ArgsChanged = false;
- for (const Init *Arg : Args) {
+ for (const Init *Arg : args()) {
Init *NewArg = Arg->resolveReferences(R, RV);
NewArgs.push_back(NewArg);
ArgsChanged |= NewArg != Arg;
@@ -1555,7 +1548,7 @@ Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
Init *Op = Val->resolveReferences(R, RV);
if (Op != Val || ArgsChanged)
- return DagInit::get(Op, ValName, NewArgs, ArgNames);
+ return DagInit::get(Op, ValName, NewArgs, getArgNames());
return const_cast<DagInit *>(this);
}
@@ -1564,12 +1557,12 @@ std::string DagInit::getAsString() const {
std::string Result = "(" + Val->getAsString();
if (ValName)
Result += ":" + ValName->getAsUnquotedString();
- if (!Args.empty()) {
- Result += " " + Args[0]->getAsString();
- if (ArgNames[0]) Result += ":$" + ArgNames[0]->getAsUnquotedString();
- for (unsigned i = 1, e = Args.size(); i != e; ++i) {
- Result += ", " + Args[i]->getAsString();
- if (ArgNames[i]) Result += ":$" + ArgNames[i]->getAsUnquotedString();
+ if (!arg_empty()) {
+ Result += " " + getArg(0)->getAsString();
+ if (getArgName(0)) Result += ":$" + getArgName(0)->getAsUnquotedString();
+ for (unsigned i = 1, e = getNumArgs(); i != e; ++i) {
+ Result += ", " + getArg(i)->getAsString();
+ if (getArgName(i)) Result += ":$" + getArgName(i)->getAsUnquotedString();
}
}
return Result + ")";
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 056ffd58b521..981fd22c213c 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -320,6 +320,9 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
switch (ExtraCode[0]) {
default:
return true; // Unknown modifier.
+ case 'a': // Print 'a' modifier
+ PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ return false;
case 'w': // Print W register
case 'x': // Print X register
if (MO.isReg())
@@ -388,7 +391,7 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
+ if (ExtraCode && ExtraCode[0] && ExtraCode[0] != 'a')
return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNum);
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 629ad5c61b78..33fec74998d6 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -584,27 +584,21 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
return true;
}
-static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MBB->addLiveIn(*I);
-}
-
bool AArch64ExpandPseudo::expandCMP_SWAP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
-
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(1).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ unsigned NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -616,19 +610,18 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
MF->insert(++StoreBB->getIterator(), DoneBB);
// .Lloadcmp:
+ // mov wStatus, 0
// ldaxr xDest, [xAddr]
// cmp xDest, xDesired
// b.ne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
+ if (!StatusDead)
+ BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
+ .addImm(0).addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
- .addReg(Addr.getReg());
+ .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .add(Desired)
+ .addReg(DesiredReg)
.addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE)
@@ -640,25 +633,35 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
// .Lstore:
// stlxr wStatus, xNew, [xAddr]
// cbnz wStatus, .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
- BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
+ BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
+ .addReg(NewReg)
+ .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
@@ -671,16 +674,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
MachineOperand &DestLo = MI.getOperand(0);
MachineOperand &DestHi = MI.getOperand(1);
unsigned StatusReg = MI.getOperand(2).getReg();
- MachineOperand &Addr = MI.getOperand(3);
- MachineOperand &DesiredLo = MI.getOperand(4);
- MachineOperand &DesiredHi = MI.getOperand(5);
- MachineOperand &NewLo = MI.getOperand(6);
- MachineOperand &NewHi = MI.getOperand(7);
-
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(2).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(3).getReg();
+ unsigned DesiredLoReg = MI.getOperand(4).getReg();
+ unsigned DesiredHiReg = MI.getOperand(5).getReg();
+ unsigned NewLoReg = MI.getOperand(6).getReg();
+ unsigned NewHiReg = MI.getOperand(7).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -696,20 +698,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
// cmp xDestLo, xDesiredLo
// sbcs xDestHi, xDesiredHi
// b.ne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(DestLo.getReg());
- LoadCmpBB->addLiveIn(DestHi.getReg());
- LoadCmpBB->addLiveIn(DesiredLo.getReg());
- LoadCmpBB->addLiveIn(DesiredHi.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
.addReg(DestLo.getReg(), RegState::Define)
.addReg(DestHi.getReg(), RegState::Define)
- .addReg(Addr.getReg());
+ .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
- .add(DesiredLo)
+ .addReg(DesiredLoReg)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR)
@@ -717,14 +712,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
- .add(DesiredHi)
+ .addReg(DesiredHiReg)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill)
.addUse(StatusReg, RegState::Kill)
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
- .addUse(StatusReg, RegState::Kill)
+ .addUse(StatusReg, getKillRegState(StatusDead))
.addMBB(DoneBB);
LoadCmpBB->addSuccessor(DoneBB);
LoadCmpBB->addSuccessor(StoreBB);
@@ -732,28 +727,36 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
// .Lstore:
// stlxp wStatus, xNewLo, xNewHi, [xAddr]
// cbnz wStatus, .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(NewLo.getReg());
- StoreBB->addLiveIn(NewHi.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
- .add(NewLo)
- .add(NewHi)
- .add(Addr);
+ .addReg(NewLoReg)
+ .addReg(NewHiReg)
+ .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute liveness bottom up.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass in the loop to get the loop carried dependencies right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 1aec602a2a36..0b92249580c8 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -267,12 +267,12 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
return AArch64::X9;
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -991,6 +991,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
SmallVector<RegPairInfo, 8> RegPairs;
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
@@ -1022,9 +1023,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
dbgs() << ")\n");
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
- MBB.addLiveIn(Reg1);
+ if (!MRI.isReserved(Reg1))
+ MBB.addLiveIn(Reg1);
if (RPI.isPaired()) {
- MBB.addLiveIn(Reg2);
+ if (!MRI.isReserved(Reg2))
+ MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1af36086ad90..62f4c953830b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -886,18 +886,21 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
// Create the new constant immediate node.
EVT VT = Op.getValueType();
SDLoc DL(Op);
+ SDValue New;
// If the new constant immediate is all-zeros or all-ones, let the target
// independent DAG combine optimize this node.
- if (NewImm == 0 || NewImm == OrigMask)
- return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT));
-
+ if (NewImm == 0 || NewImm == OrigMask) {
+ New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
+ TLO.DAG.getConstant(NewImm, DL, VT));
// Otherwise, create a machine node so that target independent DAG combine
// doesn't undo this optimization.
- Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
- SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
- SDValue New(
- TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
+ } else {
+ Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
+ SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
+ New = SDValue(
+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
+ }
return TLO.CombineTo(Op, New);
}
@@ -9219,16 +9222,26 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
// instructions (stp).
SDLoc DL(&St);
SDValue BasePtr = St.getBasePtr();
+ uint64_t BaseOffset = 0;
+
const MachinePointerInfo &PtrInfo = St.getPointerInfo();
SDValue NewST1 =
DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
OrigAlignment, St.getMemOperand()->getFlags());
+ // As this in ISel, we will not merge this add which may degrade results.
+ if (BasePtr->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(BasePtr->getOperand(1))) {
+ BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
+ BasePtr = BasePtr->getOperand(0);
+ }
+
unsigned Offset = EltOffset;
while (--NumVecElts) {
unsigned Alignment = MinAlign(OrigAlignment, Offset);
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, DL, MVT::i64));
+ SDValue OffsetPtr =
+ DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+ DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
PtrInfo.getWithOffset(Offset), Alignment,
St.getMemOperand()->getFlags());
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index c42738da7ab0..faf39be9b41e 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -763,15 +763,126 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
-bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
- if (MI.getNumOperands() < 4)
+bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
return false;
- unsigned ShOpVal = MI.getOperand(3).getImm();
- unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
- if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
- ShImm < 4)
- return true;
- return false;
+
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs: {
+ unsigned Imm = MI.getOperand(3).getImm();
+ unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
+ if (ShiftVal == 0)
+ return true;
+ return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
+ }
+
+ case AArch64::ADDWrx:
+ case AArch64::ADDXrx:
+ case AArch64::ADDXrx64:
+ case AArch64::ADDSWrx:
+ case AArch64::ADDSXrx:
+ case AArch64::ADDSXrx64: {
+ unsigned Imm = MI.getOperand(3).getImm();
+ switch (AArch64_AM::getArithExtendType(Imm)) {
+ default:
+ return false;
+ case AArch64_AM::UXTB:
+ case AArch64_AM::UXTH:
+ case AArch64_AM::UXTW:
+ case AArch64_AM::UXTX:
+ return AArch64_AM::getArithShiftValue(Imm) <= 4;
+ }
+ }
+
+ case AArch64::SUBWrs:
+ case AArch64::SUBSWrs: {
+ unsigned Imm = MI.getOperand(3).getImm();
+ unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
+ return ShiftVal == 0 ||
+ (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
+ }
+
+ case AArch64::SUBXrs:
+ case AArch64::SUBSXrs: {
+ unsigned Imm = MI.getOperand(3).getImm();
+ unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
+ return ShiftVal == 0 ||
+ (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
+ }
+
+ case AArch64::SUBWrx:
+ case AArch64::SUBXrx:
+ case AArch64::SUBXrx64:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXrx:
+ case AArch64::SUBSXrx64: {
+ unsigned Imm = MI.getOperand(3).getImm();
+ switch (AArch64_AM::getArithExtendType(Imm)) {
+ default:
+ return false;
+ case AArch64_AM::UXTB:
+ case AArch64_AM::UXTH:
+ case AArch64_AM::UXTW:
+ case AArch64_AM::UXTX:
+ return AArch64_AM::getArithShiftValue(Imm) == 0;
+ }
+ }
+
+ case AArch64::LDRBBroW:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRBroW:
+ case AArch64::LDRBroX:
+ case AArch64::LDRDroW:
+ case AArch64::LDRDroX:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRHroW:
+ case AArch64::LDRHroX:
+ case AArch64::LDRQroW:
+ case AArch64::LDRQroX:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSWroW:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSroW:
+ case AArch64::LDRSroX:
+ case AArch64::LDRWroW:
+ case AArch64::LDRWroX:
+ case AArch64::LDRXroW:
+ case AArch64::LDRXroX:
+ case AArch64::PRFMroW:
+ case AArch64::PRFMroX:
+ case AArch64::STRBBroW:
+ case AArch64::STRBBroX:
+ case AArch64::STRBroW:
+ case AArch64::STRBroX:
+ case AArch64::STRDroW:
+ case AArch64::STRDroX:
+ case AArch64::STRHHroW:
+ case AArch64::STRHHroX:
+ case AArch64::STRHroW:
+ case AArch64::STRHroX:
+ case AArch64::STRQroW:
+ case AArch64::STRQroX:
+ case AArch64::STRSroW:
+ case AArch64::STRSroX:
+ case AArch64::STRWroW:
+ case AArch64::STRWroX:
+ case AArch64::STRXroW:
+ case AArch64::STRXroX: {
+ unsigned IsSigned = MI.getOperand(3).getImm();
+ return !IsSigned;
+ }
+ }
}
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 4cd14db633b9..59f3405fe439 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -270,7 +270,7 @@ public:
bool IsTailCall) const override;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
- bool isFalkorLSLFast(const MachineInstr &MI) const;
+ bool isFalkorShiftExtFast(const MachineInstr &MI) const;
private:
/// \brief Sets the offsets on outlined instructions in \p MBB which use SP
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index da68f3165c5e..ad24612239fa 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -442,7 +442,7 @@ def MSRpstateImm4 : MSRpstateImm0_15;
// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
let hasSideEffects = 0 in
def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
- [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
index a6926a6700e1..3b71d529db59 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -232,6 +232,19 @@ static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
+ if (&SecondSU != &DAG->ExitSU)
+ // Make instructions dependent on FirstSU also dependent on SecondSU to
+ // prevent them from being scheduled between FirstSU and and SecondSU.
+ for (SUnit::const_succ_iterator
+ SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end();
+ SI != SE; ++SI) {
+ if (!SI->getSUnit() || SI->getSUnit() == &SecondSU)
+ continue;
+ DEBUG(dbgs() << " Copy Succ ";
+ SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';);
+ DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial));
+ }
+
++NumFused;
return true;
}
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index cf1c0b66db58..44fd94fc3d48 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -61,56 +61,42 @@ let SchedModel = FalkorModel in {
let SchedModel = FalkorModel in {
-def : WriteRes<WriteImm, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteI, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 1; let NumMicroOps = 2; }
-def : WriteRes<WriteIEReg, [FalkorUnitXYZ, FalkorUnitXYZ]>
- { let Latency = 2; let NumMicroOps = 2; }
-def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
- { let Latency = 2; let NumMicroOps = 2; }
-def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
- { let Latency = 8; let NumMicroOps = 2; }
-def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
- { let Latency = 16; let NumMicroOps = 2; }
-def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
-def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
-def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
-def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
-def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteST, [FalkorUnitST, FalkorUnitSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
-def : WriteRes<WriteSTIdx, [FalkorUnitST, FalkorUnitSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 3; let NumMicroOps = 2; }
-def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
-def : WriteRes<WriteFCvt, [FalkorUnitVXVY]> { let Latency = 4; }
-def : WriteRes<WriteFCopy, [FalkorUnitVXVY]> { let Latency = 4; }
-def : WriteRes<WriteFImm, [FalkorUnitVXVY]> { let Latency = 4; }
-def : WriteRes<WriteFMul, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 6; let NumMicroOps = 2; }
-def : WriteRes<WriteFDiv, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV, [FalkorUnitVXVY]> { let Latency = 6; }
-def : WriteRes<WriteVLD, [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteVST, [FalkorUnitST, FalkorUnitVSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-
-def : WriteRes<WriteSys, []> { let Latency = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint, []> { let Latency = 1; }
-
-def : WriteRes<WriteLDHi, []> { let Latency = 3; }
-
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-
-// No forwarding logic is modelled yet.
+// These WriteRes entries are not used in the Falkor sched model.
+def : WriteRes<WriteImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteI, []> { let Unsupported = 1; }
+def : WriteRes<WriteISReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteIEReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteExtr, []> { let Unsupported = 1; }
+def : WriteRes<WriteIS, []> { let Unsupported = 1; }
+def : WriteRes<WriteID32, []> { let Unsupported = 1; }
+def : WriteRes<WriteID64, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM32, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM64, []> { let Unsupported = 1; }
+def : WriteRes<WriteBr, []> { let Unsupported = 1; }
+def : WriteRes<WriteBrReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTP, []> { let Unsupported = 1; }
+def : WriteRes<WriteAdr, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteF, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCmp, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCvt, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
+def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
+def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
+def : WriteRes<WriteV, []> { let Unsupported = 1; }
+def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteVST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSys, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
+def : WriteRes<WriteHint, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDHi, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// These ReadAdvance entries are not used in the Falkor sched model.
def : ReadAdvance<ReadI, 0>;
def : ReadAdvance<ReadISReg, 0>;
def : ReadAdvance<ReadIEReg, 0>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index a9b4d44a523e..d098cf7a5a37 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -12,7 +12,509 @@
//
//===----------------------------------------------------------------------===//
-include "AArch64SchedFalkorWriteRes.td"
+// Contains all of the Falkor specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: FalkorWr
+// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
+// Latency: #cyc
+//
+// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
+// down one Z pipe, six SD pipes, four VX pipes and the total latency is
+// six cycles.
+//
+// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
+//
+// Contains all of the Falkor specific WriteVariant types for immediate zero
+// and LSLFast.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define 0 micro-op types
+def FalkorWr_none_1cyc : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def FalkorWr_none_3cyc : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def FalkorWr_none_4cyc : SchedWriteRes<[]> {
+ let Latency = 4;
+ let NumMicroOps = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 1 micro-op types
+
+def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
+def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
+def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
+def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
+def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
+def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
+def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
+def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
+def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
+def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
+def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
+def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
+def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
+def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+
+def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
+def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
+def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+
+def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
+def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
+def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define 2 micro-op types
+
+def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 8];
+}
+
+def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 16;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 16];
+}
+
+def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 3 micro-op types
+
+def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+ FalkorUnitLD]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+//===----------------------------------------------------------------------===//
+// Define 4 micro-op types
+
+def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
+ FalkorUnitSD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 5 micro-op types
+
+def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitST,
+ FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 5;
+}
+def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitST,
+ FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 5;
+}
+//===----------------------------------------------------------------------===//
+// Define 6 micro-op types
+
+def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitXYZ,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 8 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 9 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitXYZ,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitXYZ,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 10 micro-op types
+
+def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 12 micro-op types
+
+def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 12;
+}
+
+// Forwarding logic is modeled for multiply add/accumulate.
+// -----------------------------------------------------------------------------
+def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
+def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
+def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
+def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
+def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
+
+// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
+// -----------------------------------------------------------------------------
+def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
+def FalkorFMOVZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
+ MI->getOperand(1).getReg() == AArch64::XZR}]>;
+def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
+
+def FalkorWr_FMOV : SchedWriteVariant<[
+ SchedVar<FalkorFMOVZrReg, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
+
+def FalkorWr_MOVZ : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
+
+def FalkorWr_ADDSUBsx : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
+
+def FalkorWr_LDRro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
+
+def FalkorWr_LDRSro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
+
+def FalkorWr_PRFMro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
+
+def FalkorWr_STRVro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
+
+def FalkorWr_STRQro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
+
+def FalkorWr_STRro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
//===----------------------------------------------------------------------===//
// Specialize the coarse model by associating instruction groups with the
@@ -22,63 +524,76 @@ include "AArch64SchedFalkorWriteRes.td"
// Miscellaneous
// -----------------------------------------------------------------------------
-def : InstRW<[WriteI], (instrs COPY)>;
+// FIXME: This could be better modeled by looking at the regclasses of the operands.
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
// SIMD Floating-point Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
-def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
-def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FMULX16, FMULX32)>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instrs FMULX32)>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FMULX64)>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instrs FMULX64)>;
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)v2f32$")>;
-def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
-def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
+ (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
-def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
+ (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
-def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
-def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
+def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
-
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)v1i64_indexed$")>;
-def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
-def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
+ (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
+ (instregex "^FML(A|S)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
+ (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
+ (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
// SIMD Integer Instructions
// -----------------------------------------------------------------------------
@@ -92,12 +607,14 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$"
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
@@ -110,6 +627,8 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
@@ -120,10 +639,14 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^SQDMULL(i16|i32)$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
@@ -154,7 +677,7 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
@@ -165,14 +688,18 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
-def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^SQDMULLv.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
@@ -186,99 +713,114 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
// SIMD Load Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
-def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD], (instrs LD2i64)>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>;
-
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>;
-
-def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
-def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
-def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs LD2i64)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "^LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc],
+ (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc],
+ (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD3Threev(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc],
+ (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
+
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc],
+ (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
// Arithmetic and Logical Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>;
-def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
+def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
+def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
// SIMD Miscellaneous Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
@@ -287,35 +829,42 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instrs FRECPS64, FRSQRTS64)>;
-def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
+ (instregex "^INSv(i32|i64)(gpr|lane)$")>;
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
-def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
+ (instrs FRECPSv4f32, FRSQRTSv4f32)>;
-def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
+ (instrs FRECPSv2f64, FRSQRTSv2f64)>;
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
@@ -328,50 +877,95 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
// SIMD Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>;
-def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>;
-
-def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
-def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
-
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
-
-def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
-def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
-
-def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>;
-
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
-
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>;
-
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>;
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
-
-def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>;
-def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^STPQ(post|pre)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^STP(D|S)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>;
+
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
+ (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
+ (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
+ (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
+ (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
// Branch Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>;
+def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>;
def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>;
def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
@@ -388,89 +982,103 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
// FP Load Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
-def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
-def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
-def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
+ (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+ (instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+ (instrs LDPQi)>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+ (instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+ (instregex "LDP(D|S)i$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+ (instregex "LDP(D|S)(pre|post)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDPQ(pre|post)$")>;
// FP Data Processing Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>;
-def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instregex "^F(N)?MULSrr$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instregex "^F(N)?MULDrr$")>;
-def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(S|D)r$")>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], (instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
+ (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
+ (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
// FP Miscellaneous Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(H|S|D)i$")>;
-def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
-def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
-def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hr|Sr|Dr|v.*_ns)$")>;
-// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov 0.0
+def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(S|D)i$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>;
+// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs FMOVD0, FMOVS0)>;
def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
-def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
// Load Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
-
-def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
-def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>;
-def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDP(W|X)i$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
+ (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>;
-
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
+ (instrs LDPSWi)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
+ (instregex "^LDPSW(post|pre)$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc],
+ (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
-def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>;
-def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
-
-def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
-
-def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
-def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
-def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
-def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
-
// Miscellaneous Data-Processing Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
@@ -480,17 +1088,22 @@ def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
// Divide and Multiply Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
-def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], (instregex "^M(ADD|SUB)Wrrr$")>;
+def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
+ (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
+ (instregex "^M(ADD|SUB)Wrrr$")>;
-def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
-def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^M(ADD|SUB)Xrrr$")>;
+def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
+ (instregex "^M(ADD|SUB)Xrrr$")>;
-def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
-def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
+def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(S|U)MULLv.*$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^(S|U)MULLv.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
// Move and Shift Instructions
// -----------------------------------------------------------------------------
@@ -498,6 +1111,11 @@ def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs MOVi32imm, MOVi64imm)>;
+def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
+ (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
+def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
+ (instrs LOADgot)>;
// Other Instructions
// -----------------------------------------------------------------------------
@@ -507,13 +1125,12 @@ def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>;
def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
-def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>;
-def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>;
def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
@@ -523,20 +1140,16 @@ def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>;
def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>;
def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>;
-def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
// Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>;
-def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
-def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
-def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
-def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>;
-def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>;
-def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
-def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
-
-def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
-
-def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STP(W|X)i$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
+ (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
+ (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRro], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STUR(BB|HH|W|X)i$")>;
+
diff --git a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
deleted file mode 100644
index 6526cc28e806..000000000000
--- a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
+++ /dev/null
@@ -1,403 +0,0 @@
-//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Contains all of the Falkor specific SchedWriteRes types. The approach
-// below is to define a generic SchedWriteRes for every combination of
-// latency and microOps. The naming conventions is to use a prefix, one field
-// for latency, and one or more microOp count/type designators.
-// Prefix: FalkorWr
-// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
-// Latency: #cyc
-//
-// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
-// down one Z pipe, six SD pipes, four VX pipes and the total latency is
-// six cycles.
-//
-// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
-//
-// Contains all of the Falkor specific WriteVariant types for immediate zero
-// and LSLFast.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Define 1 micro-op types
-
-def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
-def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
-def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
-def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
-def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
-def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
-def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
-def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
-def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
-def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
-def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
-def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
-def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
-
-def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
-def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
-def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
-def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
-def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
-def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
-def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
-def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
-
-def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
-def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
-def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
-
-def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
-def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
-def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
-
-//===----------------------------------------------------------------------===//
-// Define 2 micro-op types
-
-def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 1;
- let NumMicroOps = 2;
-}
-def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 3;
- let NumMicroOps = 2;
-}
-def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 6;
- let NumMicroOps = 2;
-}
-def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 6;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
- let Latency = 10;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
- let Latency = 1;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
- let Latency = 0;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
- let Latency = 8;
- let ResourceCycles = [2, 8];
-}
-
-def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
- let Latency = 16;
- let ResourceCycles = [2, 16];
-}
-
-def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
- let Latency = 3;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 2;
-}
-
-def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
- let Latency = 0;
- let NumMicroOps = 2;
-}
-
-//===----------------------------------------------------------------------===//
-// Define 3 micro-op types
-
-def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
- FalkorUnitLD]> {
- let Latency = 0;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
- FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 3;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 5;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 6;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
- FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 3;
-}
-
-def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
- FalkorUnitZ]> {
- let Latency = 3;
- let NumMicroOps = 3;
-}
-
-//===----------------------------------------------------------------------===//
-// Define 4 micro-op types
-
-def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
- FalkorUnitVX, FalkorUnitVY]> {
- let Latency = 2;
- let NumMicroOps = 4;
-}
-
-def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 2;
- let NumMicroOps = 4;
-}
-def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 3;
- let NumMicroOps = 4;
-}
-def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 4;
-}
-def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 6;
- let NumMicroOps = 4;
-}
-
-def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
- FalkorUnitLD, FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 4;
-}
-
-def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 4;
-}
-
-def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 4;
-}
-
-def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
- FalkorUnitSD, FalkorUnitLD]> {
- let Latency = 3;
- let NumMicroOps = 4;
-}
-
-//===----------------------------------------------------------------------===//
-// Define 5 micro-op types
-
-def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 5;
-}
-def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 5;
-}
-def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitVXVY]> {
- let Latency = 7;
- let NumMicroOps = 5;
-}
-
-//===----------------------------------------------------------------------===//
-// Define 6 micro-op types
-
-def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 6;
-}
-
-def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
- FalkorUnitVSD, FalkorUnitXYZ,
- FalkorUnitST, FalkorUnitVSD]> {
- let Latency = 0;
- let NumMicroOps = 6;
-}
-
-//===----------------------------------------------------------------------===//
-// Define 8 micro-op types
-
-def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
- FalkorUnitVXVY, FalkorUnitVXVY,
- FalkorUnitLD, FalkorUnitLD,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 8;
-}
-
-//===----------------------------------------------------------------------===//
-// Define 9 micro-op types
-
-def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
- FalkorUnitLD, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitLD,
- FalkorUnitLD, FalkorUnitXYZ,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 9;
-}
-
-def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
- FalkorUnitLD, FalkorUnitVXVY,
- FalkorUnitVXVY, FalkorUnitXYZ,
- FalkorUnitLD, FalkorUnitLD,
- FalkorUnitVXVY, FalkorUnitVXVY]> {
- let Latency = 4;
- let NumMicroOps = 9;
-}
-
-// Forwarding logic is modeled for multiply add/accumulate.
-// -----------------------------------------------------------------------------
-def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
-def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
-def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
-def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
-def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
-
-// SchedPredicates and WriteVariants for Immediate Zero and LSLFast
-// -----------------------------------------------------------------------------
-def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
-def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>;
-
-def FalkorWr_FMOV : SchedWriteVariant<[
- SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
-
-def FalkorWr_MOVZ : SchedWriteVariant<[
- SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
-
-def FalkorWr_LDR : SchedWriteVariant<[
- SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_3cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
-
-def FalkorWr_ADD : SchedWriteVariant<[
- SchedVar<FalkorLSLFastPred, [FalkorWr_1XYZ_1cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
-
-def FalkorWr_PRFM : SchedWriteVariant<[
- SchedVar<FalkorLSLFastPred, [FalkorWr_1ST_3cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
-
-def FalkorWr_LDRS : SchedWriteVariant<[
- SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_4cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index b369ee7e4ba2..d3cab1ad3397 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -90,7 +90,6 @@ void AArch64Subtarget::initializeProperties() {
break;
case Falkor:
MaxInterleaveFactor = 4;
- VectorInsertExtractBaseCost = 2;
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 132f192f2a9a..cb3f72a524f5 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -10,10 +10,10 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64TargetMachine.h"
#include "AArch64.h"
#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
@@ -277,7 +278,7 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
- if (ST.hasFuseLiterals()) {
+ if (ST.hasFuseAES() || ST.hasFuseLiterals()) {
// Run the Macro Fusion after RA again since literals are expanded from
// pseudos then (v. addPreSched2()).
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
@@ -295,6 +296,7 @@ public:
bool addIRTranslator() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
+ void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
#endif
bool addILPOpts() override;
@@ -404,6 +406,12 @@ bool AArch64PassConfig::addRegBankSelect() {
return false;
}
+void AArch64PassConfig::addPreGlobalInstructionSelect() {
+ // Workaround the deficiency of the fast register allocator.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ addPass(new Localizer());
+}
+
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
return false;
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index b279bd61e180..e7ebb37a9d62 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -425,7 +425,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
- FeatureFastFMAF32, FeatureDPP,
+ FeatureFastFMAF32, FeatureSDWA, FeatureDPP,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts
]
>;
@@ -534,10 +534,12 @@ def AMDGPUAsmVariants {
int VOP3_ID = 1;
string SDWA = "SDWA";
int SDWA_ID = 2;
+ string SDWA9 = "SDWA9";
+ int SDWA9_ID = 3;
string DPP = "DPP";
- int DPP_ID = 3;
+ int DPP_ID = 4;
string Disable = "Disable";
- int Disable_ID = 4;
+ int Disable_ID = 5;
}
def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
@@ -555,6 +557,12 @@ def SDWAAsmParserVariant : AsmParserVariant {
let Name = AMDGPUAsmVariants.SDWA;
}
+def SDWA9AsmParserVariant : AsmParserVariant {
+ let Variant = AMDGPUAsmVariants.SDWA9_ID;
+ let Name = AMDGPUAsmVariants.SDWA9;
+}
+
+
def DPPAsmParserVariant : AsmParserVariant {
let Variant = AMDGPUAsmVariants.DPP_ID;
let Name = AMDGPUAsmVariants.DPP;
@@ -567,6 +575,7 @@ def AMDGPU : Target {
let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
VOP3AsmParserVariant,
SDWAAsmParserVariant,
+ SDWA9AsmParserVariant,
DPPAsmParserVariant];
let AssemblyWriters = [AMDGPUAsmWriter];
}
@@ -607,7 +616,10 @@ def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<"FeatureVOP3P">;
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
- AssemblerPredicate<"FeatureSDWA">;
+ AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
+
+def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<"FeatureSDWA,FeatureGFX9">;
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5ec46a8294c0..723e8a7b54e2 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -127,6 +127,29 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
}
+bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op)
+{
+ assert(Op.getOpcode() == ISD::OR);
+
+ SDValue N0 = Op->getOperand(0);
+ SDValue N1 = Op->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ if (VT.isInteger() && !VT.isVector()) {
+ KnownBits LHSKnown, RHSKnown;
+ DAG.computeKnownBits(N0, LHSKnown);
+
+ if (LHSKnown.Zero.getBoolValue()) {
+ DAG.computeKnownBits(N1, RHSKnown);
+
+ if (!(~RHSKnown.Zero & ~LHSKnown.Zero))
+ return true;
+ }
+ }
+
+ return false;
+}
+
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -2596,8 +2619,6 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
- if (VT != MVT::i64)
- return SDValue();
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS)
@@ -2618,6 +2639,8 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
// shl (ext x) => zext (shl x), if shift does not overflow int
+ if (VT != MVT::i64)
+ break;
KnownBits Known;
SDValue X = LHS->getOperand(0);
DAG.computeKnownBits(X, Known);
@@ -2628,8 +2651,23 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
return DAG.getZExtOrTrunc(Shl, SL, VT);
}
+ case ISD::OR: if (!isOrEquivalentToAdd(DAG, LHS)) break;
+ case ISD::ADD: { // Fall through from above
+ // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
+ SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0),
+ SDValue(RHS, 0));
+ SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal,
+ SDLoc(C2), VT);
+ return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V);
+ }
+ break;
+ }
}
+ if (VT != MVT::i64)
+ return SDValue();
+
// i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
// On some subtargets, 64-bit shift is a quarter rate instruction. In the
@@ -3440,7 +3478,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
DL);
}
- if ((OffsetVal + WidthVal) >= 32) {
+ if ((OffsetVal + WidthVal) >= 32 &&
+ !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
BitsFrom, ShiftVal);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index fb2f15022d25..0d066cdbdff4 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -34,6 +34,9 @@ private:
/// compare.
SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const;
+public:
+ static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op);
+
protected:
const AMDGPUSubtarget *Subtarget;
AMDGPUAS AMDGPUASI;
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 9de302994e68..57905be18813 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -36,6 +36,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
setAction({G_CONSTANT, S32}, Legal);
setAction({G_CONSTANT, S64}, Legal);
+ setAction({G_FCONSTANT, S32}, Legal);
+
setAction({G_GEP, P1}, Legal);
setAction({G_GEP, P2}, Legal);
setAction({G_GEP, 1, S64}, Legal);
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 85184b363905..07f92918a43f 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -97,6 +97,9 @@ private:
Instruction *UseInst,
int OpIdx0, int OpIdx1) const;
+ /// Check whether we have enough local memory for promotion.
+ bool hasSufficientLocalMem(const Function &F);
+
public:
static char ID;
@@ -107,7 +110,7 @@ public:
StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
- void handleAlloca(AllocaInst &I);
+ bool handleAlloca(AllocaInst &I, bool SufficientLDS);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -147,105 +150,21 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
if (!ST.isPromoteAllocaEnabled())
return false;
- AS = AMDGPU::getAMDGPUAS(*F.getParent());
-
- FunctionType *FTy = F.getFunctionType();
-
- // If the function has any arguments in the local address space, then it's
- // possible these arguments require the entire local memory space, so
- // we cannot use local memory in the pass.
- for (Type *ParamTy : FTy->params()) {
- PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
- if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
- LocalMemLimit = 0;
- DEBUG(dbgs() << "Function has local memory argument. Promoting to "
- "local memory disabled.\n");
- return false;
- }
- }
-
- LocalMemLimit = ST.getLocalMemorySize();
- if (LocalMemLimit == 0)
- return false;
-
- const DataLayout &DL = Mod->getDataLayout();
-
- // Check how much local memory is being used by global objects
- CurrentLocalMemUsage = 0;
- for (GlobalVariable &GV : Mod->globals()) {
- if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
- continue;
-
- for (const User *U : GV.users()) {
- const Instruction *Use = dyn_cast<Instruction>(U);
- if (!Use)
- continue;
-
- if (Use->getParent()->getParent() == &F) {
- unsigned Align = GV.getAlignment();
- if (Align == 0)
- Align = DL.getABITypeAlignment(GV.getValueType());
- // FIXME: Try to account for padding here. The padding is currently
- // determined from the inverse order of uses in the function. I'm not
- // sure if the use list order is in any way connected to this, so the
- // total reported size is likely incorrect.
- uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
- CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
- CurrentLocalMemUsage += AllocSize;
- break;
- }
- }
- }
-
- unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
- F);
-
- // Restrict local memory usage so that we don't drastically reduce occupancy,
- // unless it is already significantly reduced.
-
- // TODO: Have some sort of hint or other heuristics to guess occupancy based
- // on other factors..
- unsigned OccupancyHint = ST.getWavesPerEU(F).second;
- if (OccupancyHint == 0)
- OccupancyHint = 7;
-
- // Clamp to max value.
- OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
-
- // Check the hint but ignore it if it's obviously wrong from the existing LDS
- // usage.
- MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
-
-
- // Round up to the next tier of usage.
- unsigned MaxSizeWithWaveCount
- = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
-
- // Program is possibly broken by using more local mem than available.
- if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
- return false;
-
- LocalMemLimit = MaxSizeWithWaveCount;
-
- DEBUG(
- dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
- << " Rounding size to " << MaxSizeWithWaveCount
- << " with a maximum occupancy of " << MaxOccupancy << '\n'
- << " and " << (LocalMemLimit - CurrentLocalMemUsage)
- << " available for promotion\n"
- );
+ AS = AMDGPU::getAMDGPUAS(*F.getParent());
+ bool SufficientLDS = hasSufficientLocalMem(F);
+ bool Changed = false;
BasicBlock &EntryBB = *F.begin();
for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
AllocaInst *AI = dyn_cast<AllocaInst>(I);
++I;
if (AI)
- handleAlloca(*AI);
+ Changed |= handleAlloca(*AI, SufficientLDS);
}
- return true;
+ return Changed;
}
std::pair<Value *, Value *>
@@ -661,12 +580,105 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
return true;
}
+bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
+
+ FunctionType *FTy = F.getFunctionType();
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+
+ // If the function has any arguments in the local address space, then it's
+ // possible these arguments require the entire local memory space, so
+ // we cannot use local memory in the pass.
+ for (Type *ParamTy : FTy->params()) {
+ PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
+ if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
+ LocalMemLimit = 0;
+ DEBUG(dbgs() << "Function has local memory argument. Promoting to "
+ "local memory disabled.\n");
+ return false;
+ }
+ }
+
+ LocalMemLimit = ST.getLocalMemorySize();
+ if (LocalMemLimit == 0)
+ return false;
+
+ const DataLayout &DL = Mod->getDataLayout();
+
+ // Check how much local memory is being used by global objects
+ CurrentLocalMemUsage = 0;
+ for (GlobalVariable &GV : Mod->globals()) {
+ if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
+ continue;
+
+ for (const User *U : GV.users()) {
+ const Instruction *Use = dyn_cast<Instruction>(U);
+ if (!Use)
+ continue;
+
+ if (Use->getParent()->getParent() == &F) {
+ unsigned Align = GV.getAlignment();
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(GV.getValueType());
+
+ // FIXME: Try to account for padding here. The padding is currently
+ // determined from the inverse order of uses in the function. I'm not
+ // sure if the use list order is in any way connected to this, so the
+ // total reported size is likely incorrect.
+ uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
+ CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
+ CurrentLocalMemUsage += AllocSize;
+ break;
+ }
+ }
+ }
+
+ unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
+ F);
+
+ // Restrict local memory usage so that we don't drastically reduce occupancy,
+ // unless it is already significantly reduced.
+
+ // TODO: Have some sort of hint or other heuristics to guess occupancy based
+ // on other factors..
+ unsigned OccupancyHint = ST.getWavesPerEU(F).second;
+ if (OccupancyHint == 0)
+ OccupancyHint = 7;
+
+ // Clamp to max value.
+ OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
+
+ // Check the hint but ignore it if it's obviously wrong from the existing LDS
+ // usage.
+ MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
+
+
+ // Round up to the next tier of usage.
+ unsigned MaxSizeWithWaveCount
+ = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
+
+ // Program is possibly broken by using more local mem than available.
+ if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
+ return false;
+
+ LocalMemLimit = MaxSizeWithWaveCount;
+
+ DEBUG(
+ dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
+ << " Rounding size to " << MaxSizeWithWaveCount
+ << " with a maximum occupancy of " << MaxOccupancy << '\n'
+ << " and " << (LocalMemLimit - CurrentLocalMemUsage)
+ << " available for promotion\n"
+ );
+
+ return true;
+}
+
// FIXME: Should try to pick the most likely to be profitable allocas first.
-void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
+bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// Array allocations are probably not worth handling, since an allocation of
// the array type is the canonical form.
if (!I.isStaticAlloca() || I.isArrayAllocation())
- return;
+ return false;
IRBuilder<> Builder(&I);
@@ -675,10 +687,8 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Trying to promote " << I << '\n');
- if (tryPromoteAllocaToVector(&I, AS)) {
- DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
- return;
- }
+ if (tryPromoteAllocaToVector(&I, AS))
+ return true; // Promoted to vector.
const Function &ContainingFunction = *I.getParent()->getParent();
CallingConv::ID CC = ContainingFunction.getCallingConv();
@@ -692,9 +702,13 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
break;
default:
DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
- return;
+ return false;
}
+ // Not likely to have sufficient local memory for promotion.
+ if (!SufficientLDS)
+ return false;
+
const AMDGPUSubtarget &ST =
TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
@@ -718,7 +732,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
if (NewSize > LocalMemLimit) {
DEBUG(dbgs() << " " << AllocSize
<< " bytes of local memory not available to promote\n");
- return;
+ return false;
}
CurrentLocalMemUsage = NewSize;
@@ -727,7 +741,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
DEBUG(dbgs() << " Do not know how to convert all uses\n");
- return;
+ return false;
}
DEBUG(dbgs() << "Promoting alloca to local memory\n");
@@ -873,6 +887,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
llvm_unreachable("Don't know how to promote alloca intrinsic use.");
}
}
+ return true;
}
FunctionPass *llvm::createAMDGPUPromoteAlloca() {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index e543cae07ada..660879426810 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -416,6 +416,10 @@ public:
return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
+ bool hasSDWA() const {
+ return HasSDWA;
+ }
+
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
@@ -670,10 +674,6 @@ public:
return HasInv2PiInlineImm;
}
- bool hasSDWA() const {
- return HasSDWA;
- }
-
bool hasDPP() const {
return HasDPP;
}
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b52ea2b3a2c6..f5541e08e1b7 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -881,6 +881,10 @@ public:
return AMDGPU::isVI(getSTI());
}
+ bool isGFX9() const {
+ return AMDGPU::isGFX9(getSTI());
+ }
+
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@@ -989,7 +993,6 @@ private:
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
- bool isSGPR(unsigned Reg);
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@@ -1042,9 +1045,10 @@ public:
OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
+ void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
- uint64_t BasicInstType);
+ uint64_t BasicInstType, bool skipVcc = false);
};
struct OptionalOperand {
@@ -1966,7 +1970,8 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
}
if (isForcedSDWA()) {
- static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA};
+ static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
+ AMDGPUAsmVariants::SDWA9};
return makeArrayRef(Variants);
}
@@ -1977,7 +1982,7 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
static const unsigned Variants[] = {
AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
- AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP
+ AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
};
return makeArrayRef(Variants);
@@ -2000,14 +2005,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
return AMDGPU::NoRegister;
}
-bool AMDGPUAsmParser::isSGPR(unsigned Reg) {
- const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
- const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
- return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
- Reg == AMDGPU::SCC;
-}
-
// NB: This code is correct only when used to check constant
// bus limitations because GFX7 support no f16 inline constants.
// Note that there are no cases when a GFX7 opcode violates
@@ -2049,7 +2046,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
if (MO.isImm()) {
return !isInlineConstant(Inst, OpIdx);
}
- return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg()));
+ return !MO.isReg() ||
+ isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
}
bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
@@ -2060,7 +2058,8 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
- SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) {
+ SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
+ SIInstrFlags::SDWA)) {
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
@@ -4151,14 +4150,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
}
+void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
+ cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
+}
+
void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
- cvtSDWA(Inst, Operands, SIInstrFlags::VOPC);
+ cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
}
void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
- uint64_t BasicInstType) {
+ uint64_t BasicInstType, bool skipVcc) {
using namespace llvm::AMDGPU::SDWA;
OptionalImmIndexMap OptionalIdx;
+ bool skippedVcc = false;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
@@ -4168,15 +4172,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
- // Add the register arguments
- if ((BasicInstType == SIInstrFlags::VOPC ||
- BasicInstType == SIInstrFlags::VOP2)&&
- Op.isReg() &&
- Op.Reg.RegNo == AMDGPU::VCC) {
- // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
- // Skip it.
- continue;
- } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+ // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
+ // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
+ // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
+ // Skip VCC only if we didn't skip it on previous iteration.
+ if (BasicInstType == SIInstrFlags::VOP2 &&
+ (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
+ skippedVcc = true;
+ continue;
+ } else if (BasicInstType == SIInstrFlags::VOPC &&
+ Inst.getNumOperands() == 0) {
+ skippedVcc = true;
+ continue;
+ }
+ }
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithInputModsOperands(Inst, 2);
} else if (Op.isImm()) {
// Handle optional arguments
@@ -4184,20 +4195,30 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
} else {
llvm_unreachable("Invalid operand type");
}
+ skippedVcc = false;
}
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
-
- if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
+ if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
+ Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
// V_NOP_sdwa_vi has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+ if (isGFX9() &&
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
+ }
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOP2:
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+ if (isGFX9() &&
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
+ }
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
@@ -4205,6 +4226,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
break;
case SIInstrFlags::VOPC:
+ if (isVI()) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+ }
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
@@ -4220,10 +4244,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
auto it = Inst.begin();
std::advance(
- it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+ it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
Inst.insert(it, Inst.getOperand(0)); // src2 = dst
}
-
}
/// Force static initialization.
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 137b5cca96ce..9b3cde7c4df6 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -62,32 +62,33 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
return addOperand(Inst, MCOperand::createImm(Imm));
}
-#define DECODE_OPERAND2(RegClass, DecName) \
-static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \
- unsigned Imm, \
- uint64_t /*Addr*/, \
- const void *Decoder) { \
+#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
+static DecodeStatus StaticDecoderName(MCInst &Inst, \
+ unsigned Imm, \
+ uint64_t /*Addr*/, \
+ const void *Decoder) { \
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
- return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \
+ return addOperand(Inst, DAsm->DecoderName(Imm)); \
}
-#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass)
+#define DECODE_OPERAND_REG(RegClass) \
+DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
-DECODE_OPERAND(VGPR_32)
-DECODE_OPERAND(VS_32)
-DECODE_OPERAND(VS_64)
+DECODE_OPERAND_REG(VGPR_32)
+DECODE_OPERAND_REG(VS_32)
+DECODE_OPERAND_REG(VS_64)
-DECODE_OPERAND(VReg_64)
-DECODE_OPERAND(VReg_96)
-DECODE_OPERAND(VReg_128)
+DECODE_OPERAND_REG(VReg_64)
+DECODE_OPERAND_REG(VReg_96)
+DECODE_OPERAND_REG(VReg_128)
-DECODE_OPERAND(SReg_32)
-DECODE_OPERAND(SReg_32_XM0_XEXEC)
-DECODE_OPERAND(SReg_64)
-DECODE_OPERAND(SReg_64_XEXEC)
-DECODE_OPERAND(SReg_128)
-DECODE_OPERAND(SReg_256)
-DECODE_OPERAND(SReg_512)
+DECODE_OPERAND_REG(SReg_32)
+DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
+DECODE_OPERAND_REG(SReg_64)
+DECODE_OPERAND_REG(SReg_64_XEXEC)
+DECODE_OPERAND_REG(SReg_128)
+DECODE_OPERAND_REG(SReg_256)
+DECODE_OPERAND_REG(SReg_512)
static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
@@ -106,6 +107,13 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
+#define DECODE_SDWA9(DecName) \
+DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName)
+
+DECODE_SDWA9(Src32)
+DECODE_SDWA9(Src16)
+DECODE_SDWA9(VopcDst)
+
#include "AMDGPUGenDisassemblerTables.inc"
//===----------------------------------------------------------------------===//
@@ -164,6 +172,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
if (Res) break;
+
+ Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
+ if (Res) break;
}
// Reinitialize Bytes as DPP64 could have eaten too much
@@ -582,6 +593,48 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
+MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width,
+ unsigned Val) const {
+ using namespace AMDGPU::SDWA;
+
+ if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_VGPR_MAX) {
+ return createRegOperand(getVgprClassId(Width),
+ Val - SDWA9EncValues::SRC_VGPR_MIN);
+ }
+ if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_SGPR_MAX) {
+ return createSRegOperand(getSgprClassId(Width),
+ Val - SDWA9EncValues::SRC_SGPR_MIN);
+ }
+
+ return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN);
+}
+
+MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const {
+ return decodeSDWA9Src(OPW16, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const {
+ return decodeSDWA9Src(OPW32, Val);
+}
+
+
+MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const {
+ using namespace AMDGPU::SDWA;
+
+ if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
+ Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
+ if (Val > AMDGPU::EncValues::SGPR_MAX) {
+ return decodeSpecialReg64(Val);
+ } else {
+ return createSRegOperand(getSgprClassId(OPW64), Val);
+ }
+ } else {
+ return createRegOperand(AMDGPU::VCC);
+ }
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 620bae0a6d1a..0ff405a71e9b 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -104,6 +104,11 @@ public:
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
+
+ MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeSDWA9Src16(unsigned Val) const;
+ MCOperand decodeSDWA9Src32(unsigned Val) const;
+ MCOperand decodeSDWA9VopcDst(unsigned Val) const;
};
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 3bb5c9bc22b7..8ead48067336 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -191,6 +191,7 @@ public:
}
};
+namespace {
// just a stub to make base class happy
class SchedStrategyStub : public MachineSchedStrategy {
public:
@@ -202,6 +203,7 @@ public:
void releaseTopNode(SUnit *SU) override {}
void releaseBottomNode(SUnit *SU) override {}
};
+} // namespace
GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
StrategyKind S)
diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index c6d0f2179950..d378df674be9 100644
--- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -17,6 +17,7 @@ using namespace llvm;
#define DEBUG_TYPE "misched"
+namespace {
class GCNMinRegScheduler {
struct Candidate : ilist_node<Candidate> {
const SUnit *SU;
@@ -71,6 +72,7 @@ public:
std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
const ScheduleDAG &DAG);
};
+} // namespace
void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
NumPreds.resize(SUnits.size());
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 18374dca3f84..390a8286c76a 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -211,9 +211,9 @@ static LaneBitmask getUsedRegMask(const MachineOperand &MO,
return getLiveLaneMask(MO.getReg(), SI, LIS, MRI);
}
-SmallVector<RegisterMaskPair, 8> collectVirtualRegUses(const MachineInstr &MI,
- const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI) {
+static SmallVector<RegisterMaskPair, 8>
+collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
SmallVector<RegisterMaskPair, 8> Res;
for (const auto &MO : MI.operands()) {
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 3d3858ab47ec..a856b17a228f 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -52,6 +52,18 @@ public:
return 0;
}
+ virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 0;
+ }
+
+ virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 0;
+ }
+
protected:
uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
void verifyInstructionPredicates(const MCInst &MI,
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index bda0928036fd..e02acf516c0d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -69,6 +69,14 @@ public:
unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+
+ unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
};
} // end anonymous namespace
@@ -319,6 +327,44 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
return getMachineOpValue(MI, MO, Fixups, STI);
}
+unsigned
+SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ using namespace AMDGPU::SDWA;
+
+ uint64_t RegEnc = 0;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ unsigned Reg = MO.getReg();
+ RegEnc |= MRI.getEncodingValue(Reg);
+ RegEnc &= SDWA9EncValues::SRC_VGPR_MASK;
+ if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
+ RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
+ }
+ return RegEnc;
+}
+
+unsigned
+SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ using namespace AMDGPU::SDWA;
+
+ uint64_t RegEnc = 0;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ unsigned Reg = MO.getReg();
+ if (Reg != AMDGPU::VCC) {
+ RegEnc |= MRI.getEncodingValue(Reg);
+ RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
+ RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
+ }
+ return RegEnc;
+}
+
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 3590a9b05e1d..60b913cfd39a 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1618,6 +1618,14 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
+bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const {
+ // Local and Private addresses do not handle vectors. Limit to i32
+ if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
+ return (MemVT.getSizeInBits() <= 32);
+ }
+ return true;
+}
+
bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
unsigned Align,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index 9700ce14c6f3..d6a0876a6ee7 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -44,6 +44,8 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const override;
+ bool canMergeStoresTo(unsigned AS, EVT MemVT) const override;
+
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
bool *IsFast) const override;
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
index cc667d985a82..3c1e8527284c 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -226,7 +226,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_Addr,
R600_KC0, R600_KC1,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
- ALU_CONST, ALU_PARAM, OQAP
+ ALU_CONST, ALU_PARAM, OQAP, INDIRECT_BASE_ADDR
)>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index a01330cb9171..80967edee0ab 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -118,6 +118,10 @@ namespace AMDGPU {
// Operand for source modifiers for VOP instructions
OPERAND_INPUT_MODS,
+ // Operand for GFX9 SDWA instructions
+ OPERAND_SDWA9_SRC,
+ OPERAND_SDWA9_VOPC_DST,
+
/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
OPERAND_KIMM16
@@ -160,7 +164,8 @@ namespace AMDGPUAsmVariants {
DEFAULT = 0,
VOP3 = 1,
SDWA = 2,
- DPP = 3
+ SDWA9 = 3,
+ DPP = 4
};
}
@@ -294,6 +299,18 @@ enum DstUnused {
UNUSED_PRESERVE = 2,
};
+enum SDWA9EncValues{
+ SRC_SGPR_MASK = 0x100,
+ SRC_VGPR_MASK = 0xFF,
+ VOPC_DST_VCC_MASK = 0x80,
+ VOPC_DST_SGPR_MASK = 0x7F,
+
+ SRC_VGPR_MIN = 0,
+ SRC_VGPR_MAX = 255,
+ SRC_SGPR_MIN = 256,
+ SRC_SGPR_MAX = 357,
+};
+
} // namespace SDWA
} // namespace AMDGPU
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 01c1f78e7ca4..76c2644867aa 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -698,6 +698,18 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
}
+bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const {
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
+ return (MemVT.getSizeInBits() <= 4 * 32);
+ } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
+ return (MemVT.getSizeInBits() <= MaxPrivateBits);
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ return (MemVT.getSizeInBits() <= 2 * 32);
+ }
+ return true;
+}
+
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
unsigned Align,
@@ -4229,12 +4241,40 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
SDValue RHS = N->getOperand(1);
- if (VT == MVT::i64) {
- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
- if (CRHS) {
- if (SDValue Split
- = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS))
- return Split;
+ const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (VT == MVT::i64 && CRHS) {
+ if (SDValue Split
+ = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS))
+ return Split;
+ }
+
+ if (CRHS && VT == MVT::i32) {
+ // and (srl x, c), mask => shl (bfe x, nb + c, mask >> nb), nb
+ // nb = number of trailing zeroes in mask
+ // It can be optimized out using SDWA for GFX8+ in the SDWA peephole pass,
+ // given that we are selecting 8 or 16 bit fields starting at byte boundary.
+ uint64_t Mask = CRHS->getZExtValue();
+ unsigned Bits = countPopulation(Mask);
+ if (getSubtarget()->hasSDWA() && LHS->getOpcode() == ISD::SRL &&
+ (Bits == 8 || Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) {
+ if (auto *CShift = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
+ unsigned Shift = CShift->getZExtValue();
+ unsigned NB = CRHS->getAPIntValue().countTrailingZeros();
+ unsigned Offset = NB + Shift;
+ if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary.
+ SDLoc SL(N);
+ SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
+ LHS->getOperand(0),
+ DAG.getConstant(Offset, SL, MVT::i32),
+ DAG.getConstant(Bits, SL, MVT::i32));
+ EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
+ SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE,
+ DAG.getValueType(NarrowVT));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext,
+ DAG.getConstant(NB, SDLoc(CRHS), MVT::i32));
+ return Shl;
+ }
+ }
}
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index e68837747491..8e2ec40b224c 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -150,6 +150,8 @@ public:
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
+ bool canMergeStoresTo(unsigned AS, EVT MemVT) const override;
+
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
bool *IsFast) const override;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 38a16b525a75..36d29b8ecf06 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2331,6 +2331,10 @@ static bool isSubRegOf(const SIRegisterInfo &TRI,
bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI.getOpcode();
+
+ if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
+ return true;
+
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 7b052844f177..c5287c7f64ba 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -439,6 +439,27 @@ def ExpSrc3 : RegisterOperand<VGPR_32> {
let ParserMatchClass = VReg32OrOffClass;
}
+class SDWA9Src : RegisterOperand<VS_32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_SDWA9_SRC";
+ let EncoderMethod = "getSDWA9SrcEncoding";
+}
+
+def SDWA9Src32 : SDWA9Src {
+ let DecoderMethod = "decodeSDWA9Src32";
+}
+
+def SDWA9Src16 : SDWA9Src {
+ let DecoderMethod = "decodeSDWA9Src16";
+}
+
+def SDWA9VopcDst : VOPDstOperand<SReg_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_SDWA9_VOPC_DST";
+ let EncoderMethod = "getSDWA9VopcDstEncoding";
+ let DecoderMethod = "decodeSDWA9VopcDst";
+}
+
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
let Name = "Imm"#CName;
let PredicateMethod = "is"#CName;
@@ -588,6 +609,16 @@ class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass>
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
+def FPRegInputModsMatchClass : AsmOperandClass {
+ let Name = "RegWithFPInputMods";
+ let ParserMethod = "parseRegWithFPInputMods";
+ let PredicateMethod = "isRegKind";
+}
+
+def FPRegInputMods : InputMods <FPRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndFPInputMods";
+}
+
def FPVRegInputModsMatchClass : AsmOperandClass {
let Name = "VRegWithFPInputMods";
let ParserMethod = "parseRegWithFPInputMods";
@@ -598,6 +629,17 @@ def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndFPInputMods";
}
+
+def IntRegInputModsMatchClass : AsmOperandClass {
+ let Name = "RegWithIntInputMods";
+ let ParserMethod = "parseRegWithIntInputMods";
+ let PredicateMethod = "isRegKind";
+}
+
+def IntRegInputMods : InputMods <IntRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndIntInputMods";
+}
+
def IntVRegInputModsMatchClass : AsmOperandClass {
let Name = "VRegWithIntInputMods";
let ParserMethod = "parseRegWithIntInputMods";
@@ -783,6 +825,14 @@ class getVALUDstForVT<ValueType VT> {
VOPDstOperand<SReg_64>)))); // else VT == i1
}
+// Returns the register class to use for the destination of VOP[12C]
+// instructions with GFX9 SDWA extension
+class getSDWA9DstForVT<ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 1),
+ SDWA9VopcDst, // VOPC
+ VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
+}
+
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
@@ -823,6 +873,9 @@ class getVregSrcForVT<ValueType VT> {
!if(!eq(VT.Size, 64), VReg_64, VGPR_32));
}
+class getSDWA9SrcForVT <ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32);
+}
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
@@ -926,6 +979,15 @@ class getSrcModExt <ValueType VT> {
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
+// Return type of input modifiers operand specified input operand for SDWA 9
+class getSrcModSDWA9 <ValueType VT> {
+ bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, f32.Value), 1,
+ !if(!eq(VT.Value, f64.Value), 1,
+ 0)));
+ Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods);
+}
+
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
@@ -1062,6 +1124,7 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
// VOP1 without input operands (V_NOP)
(ins),
!if(!eq(NumSrcArgs, 1),
+ // VOP1_SDWA
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel),
@@ -1071,7 +1134,7 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA or VOPC_SDWA with modifiers
+ // VOP2_SDWA with modifiers
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
@@ -1079,12 +1142,65 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
(ins)/* endif */)));
}
+// Ins for GFX9 SDWA
+class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
+ bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
+ ValueType DstVT> {
+
+ dag ret = !if(!eq(NumSrcArgs, 0),
+ // VOP1 without input operands (V_NOP)
+ (ins),
+ !if(!eq(NumSrcArgs, 1),
+ // VOP1
+ !if(!eq(HasSDWAOMod, 0),
+ // VOP1_SDWA9 without omod
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ clampmod:$clamp,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel),
+ // VOP1_SDWA9 with omod
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel)),
+ !if(!eq(NumSrcArgs, 2),
+ !if(!eq(DstVT.Size, 1),
+ // VOPC_SDWA9
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ src0_sel:$src0_sel, src1_sel:$src1_sel),
+ // VOP2_SDWA9
+ !if(!eq(HasSDWAOMod, 0),
+ // VOP2_SDWA9 without omod
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel),
+ // VOP1_SDWA9 with omod
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel))),
+ (ins)/* endif */)));
+}
+
// Outs for DPP and SDWA
-class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
+class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
dag ret = !if(HasDst,
!if(!eq(DstVT.Size, 1),
(outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
- (outs DstRCDPP:$vdst)),
+ (outs DstRCExt:$vdst)),
+ (outs)); // V_NOP
+}
+
+// Outs for GFX9 SDWA
+class getOutsSDWA9 <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA9> {
+ dag ret = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ (outs DstRCSDWA9:$sdst),
+ (outs DstRCSDWA9:$vdst)),
(outs)); // V_NOP
}
@@ -1153,8 +1269,7 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
}
-class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
- ValueType DstVT = i32> {
+class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
" vcc", // use vcc token as dst for VOPC instructioins
@@ -1182,6 +1297,35 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
string ret = dst#args#sdwa;
}
+class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
+ ValueType DstVT = i32> {
+ string dst = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ "$sdst", // VOPC
+ "$vdst"), // VOP1/2
+ "");
+ string src0 = "$src0_modifiers";
+ string src1 = "$src1_modifiers";
+ string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod");
+ string args = !if(!eq(NumSrcArgs, 0), "",
+ !if(!eq(NumSrcArgs, 1),
+ ", "#src0,
+ ", "#src0#", "#src1
+ )
+ );
+ string sdwa = !if(!eq(NumSrcArgs, 0), "",
+ !if(!eq(NumSrcArgs, 1),
+ out_mods#" $dst_sel $dst_unused $src0_sel",
+ !if(!eq(DstVT.Size, 1),
+ " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
+ out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
+ )
+ )
+ );
+ string ret = dst#args#sdwa;
+}
+
+
// Function that checks if instruction supports DPP and SDWA
class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
@@ -1219,6 +1363,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -1228,6 +1373,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
+ field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT<Src0VT>.ret;
+ field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT<Src0VT>.ret;
field Operand Src0Mod = getSrcMod<Src0VT>.ret;
field Operand Src1Mod = getSrcMod<Src1VT>.ret;
field Operand Src2Mod = getSrcMod<Src2VT>.ret;
@@ -1235,6 +1382,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret;
+ field Operand Src0ModSDWA9 = getSrcModSDWA9<Src0VT>.ret;
+ field Operand Src1ModSDWA9 = getSrcModSDWA9<Src1VT>.ret;
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
@@ -1261,14 +1410,16 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
field bit HasClamp = HasModifiers;
- field bit HasSDWAClamp = HasSrc0;
+ field bit HasSDWAClamp = EmitDst;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
field bit IsPacked = isPackedType<Src0VT>.ret;
field bit HasOpSel = IsPacked;
field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
+ field bit HasSDWAOMod = isFloatType<DstVT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field bit HasSDWA9 = HasExt;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -1282,6 +1433,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Outs64 = Outs;
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCSDWA>.ret;
+ field dag OutsSDWA9 = getOutsSDWA9<HasDst, DstVT, DstRCSDWA9>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
@@ -1296,16 +1448,21 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasModifiers, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
+ field dag InsSDWA9 = getInsSDWA9<Src0SDWA9, Src1SDWA9, NumSrcArgs,
+ HasSDWAOMod, Src0ModSDWA9, Src1ModSDWA9,
+ DstVT>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
- field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
+ field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
}
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExt = 0;
+ let HasSDWA9 = 0;
}
def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
@@ -1446,6 +1603,15 @@ def getSDWAOp : InstrMapping {
let ValueCols = [["SDWA"]];
}
+// Maps ordinary instructions to their SDWA GFX9 counterparts
+def getSDWA9Op : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["Default"];
+ let ValueCols = [["SDWA9"]];
+}
+
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index f2d8b6f7b7a4..ec29a66c8bbb 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -184,7 +184,9 @@ def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">;
def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">;
def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">;
def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">;
-def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">;
+def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64",
+ [(set i64:$sdst, (int_amdgcn_s_getpc))]
+>;
let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2abd4afad3b6..630f469eabf0 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -544,6 +544,17 @@ bool isVI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
}
+bool isGFX9(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
+}
+
+bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
+ const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
+ const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
+ return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
+ Reg == AMDGPU::SCC;
+}
+
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
switch(Reg) {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 8e74aa2cc9a8..19888ad7556a 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -273,6 +273,10 @@ inline bool isKernel(CallingConv::ID CC) {
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
+bool isGFX9(const MCSubtargetInfo &STI);
+
+/// \brief Is Reg - scalar register
+bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
/// If \p Reg is a pseudo reg, return the correct hardware register given
/// \p STI otherwise return \p Reg.
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 1febc6bf8ec2..95b5ef0a49db 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -30,6 +30,15 @@ class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
let Inst{31-25} = 0x3f; // encoding
}
+class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> {
+ bits<8> vdst;
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f; // encoding
+}
+
class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> :
InstSI <P.Outs32, P.Ins32, "", pattern>,
VOP <opName>,
@@ -84,6 +93,11 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP1";
}
+class VOP1_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_SDWA9_Pseudo <OpName, P, pattern> {
+ let AsmMatchConverter = "cvtSdwaVOP1";
+}
+
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
@@ -103,6 +117,7 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+ def _sdwa9 : VOP1_SDWA9_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
@@ -243,6 +258,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC64 = VRegSrc_32;
let HasExt = 0;
+ let HasSDWA9 = 0;
}
// Special case because there are no true output operands. Hack vdst
@@ -258,16 +274,21 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
- let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0,
+ let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel);
+ let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
+ clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
- let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
+ let AsmSDWA = getAsmSDWA<1, 1>.ret;
+ let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let HasExt = 0;
+ let HasSDWA9 = 0;
let HasDst = 0;
let EmitDst = 1; // force vdst emission
}
@@ -324,7 +345,7 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
} // End SubtargetPredicate = isCIVI
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = Has16BitInsts in {
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
@@ -347,7 +368,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
}
-let Predicates = [isVI] in {
+let Predicates = [Has16BitInsts] in {
def : Pat<
(f32 (f16_to_fp i16:$src)),
@@ -523,6 +544,10 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+
// For now left dpp only for asm/dasm
// TODO: add corresponding pseudo
def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index 4a11d9471f1d..657cacaa792c 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -48,6 +48,18 @@ class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
let Inst{31} = 0x0; // encoding
}
+class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> {
+ bits<8> vdst;
+ bits<9> src1;
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+ let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr
+}
+
class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
InstSI <P.Outs32, P.Ins32, "", pattern>,
VOP <opName>,
@@ -102,6 +114,11 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP2";
}
+class VOP2_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_SDWA9_Pseudo <OpName, P, pattern> {
+ let AsmMatchConverter = "cvtSdwaVOP2";
+}
+
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
@@ -121,10 +138,10 @@ multiclass VOP2Inst <string opName,
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P>;
}
-// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst
multiclass VOP2bInst <string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@@ -136,7 +153,13 @@ multiclass VOP2bInst <string opName,
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+ let AsmMatchConverter = "cvtSdwaVOP2b";
+ }
+
+ def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P> {
+ let AsmMatchConverter = "cvtSdwaVOP2b";
+ }
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -203,13 +226,21 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
VGPR_32:$src2, // stub argument
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
+ let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
+ Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
+ VGPR_32:$src2, // stub argument
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
- let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
+ let AsmSDWA = getAsmSDWA<1, 2, vt>.ret;
+ let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
let HasExt = 1;
+ let HasSDWA9 = 0;
}
def VOP_MAC_F16 : VOP_MAC <f16> {
@@ -229,6 +260,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
let Asm32 = "$vdst, vcc, $src0, $src1";
let Asm64 = "$vdst, $sdst, $src0, $src1";
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
@@ -246,6 +278,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
@@ -254,16 +287,23 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
// implicit VCC use.
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
- let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0,
- Src1Mod:$src1_modifiers, Src1SDWA:$src1,
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
+ let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
+ Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
+
let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
Src1Mod:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
+ let HasSDWA9 = 1;
}
// Read in from vcc or arbitrary SGPR
@@ -387,7 +427,7 @@ defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
} // End let SubtargetPredicate = SICI
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = Has16BitInsts in {
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
@@ -418,7 +458,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
}
} // End isCommutable = 1
-} // End SubtargetPredicate = isVI
+} // End SubtargetPredicate = Has16BitInsts
// Note: 16-bit instructions produce a 0 result in the high 16-bits.
multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {
@@ -468,7 +508,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
>;
-let Predicates = [isVI] in {
+let Predicates = [Has16BitInsts] in {
defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>;
defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>;
@@ -513,7 +553,7 @@ def : Pat<
(V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1)
>;
-} // End Predicates = [isVI]
+} // End Predicates = [Has16BitInsts]
//===----------------------------------------------------------------------===//
// SI
@@ -686,15 +726,21 @@ multiclass VOP2_SDWA_Real <bits<6> op> {
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
+multiclass VOP2_SDWA9_Real <bits<6> op> {
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9")>,
+ VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+}
+
multiclass VOP2be_Real_e32e64_vi <bits<6> op> :
- Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> {
+ Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
// For now left dpp only for asm/dasm
// TODO: add corresponding pseudo
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
}
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
- Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> {
+ Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
// For now left dpp only for asm/dasm
// TODO: add corresponding pseudo
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index c0b5069948fb..001fc960b228 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -243,7 +243,7 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
} // End SubtargetPredicate = isCIVI
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = Has16BitInsts in {
let isCommutable = 1 in {
@@ -258,12 +258,13 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
} // End isCommutable = 1
+} // End SubtargetPredicate = Has16BitInsts
+let SubtargetPredicate = isVI in {
def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-
} // End SubtargetPredicate = isVI
-let Predicates = [isVI] in {
+let Predicates = [Has16BitInsts] in {
multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
Instruction inst, SDPatternOperator op3> {
@@ -288,7 +289,7 @@ def : Pat<
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
-} // End Predicates = [isVI]
+} // End Predicates = [Has16BitInsts]
let SubtargetPredicate = isGFX9 in {
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index a3550a63677b..cd347b86d305 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -34,6 +34,17 @@ class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
let Inst{44-43} = SDWA.UNUSED_PRESERVE;
}
+class VOPC_SDWA9e <bits<8> op, VOPProfile P> : VOP_SDWA9Be <P> {
+ bits<9> src1;
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e; // encoding
+ let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr
+}
+
+
//===----------------------------------------------------------------------===//
// VOPC classes
//===----------------------------------------------------------------------===//
@@ -102,6 +113,11 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOPC";
}
+class VOPC_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_SDWA9_Pseudo <OpName, P, pattern> {
+ let AsmMatchConverter = "cvtSdwaVOPC";
+}
+
// This class is used only with VOPC instructions. Use $sdst for out operand
class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
@@ -173,6 +189,13 @@ multiclass VOPC_Pseudos <string opName,
let isConvergent = DefExec;
let isCompare = 1;
}
+
+ def _sdwa9 : VOPC_SDWA9_Pseudo <opName, P> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let SchedRW = P.Schedule;
+ let isConvergent = DefExec;
+ let isCompare = 1;
+ }
}
def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>;
@@ -520,7 +543,11 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
+ let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
+ Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
+ //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
@@ -553,6 +580,12 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
+
+ def _sdwa9 : VOPC_SDWA9_Pseudo <opName, p> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let SchedRW = p.Schedule;
+ let isConvergent = DefExec;
+ }
}
def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>;
@@ -920,6 +953,10 @@ multiclass VOPC_Real_vi <bits<10> op> {
VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9")>,
+ VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+
def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
!cast<Instruction>(NAME#"_e32_vi")> {
let AssemblerPredicate = isVI;
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 69906c419db3..4da654f84f9d 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -293,11 +293,52 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE);
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD);
- let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
+ let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD);
+ let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
+}
+
+// gfx9 SDWA basic encoding
+class VOP_SDWA9e<VOPProfile P> : Enc64 {
+ bits<9> src0; // {src0_sgpr{0}, src0{7-0}}
+ bits<3> src0_sel;
+ bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
+ bits<3> src1_sel;
+ bits<2> src1_modifiers;
+ bits<1> src1_sgpr;
+
+ let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD);
+ let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
+ let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
+ let Inst{55} = !if(P.HasSrc0, src0{8}, 0);
+ let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD);
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
+ let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
+ let Inst{63} = 0; // src1_sgpr - should be specified in subclass
+}
+
+// gfx9 SDWA-A
+class VOP_SDWA9Ae<VOPProfile P> : VOP_SDWA9e<P> {
+ bits<3> dst_sel;
+ bits<2> dst_unused;
+ bits<1> clamp;
+ bits<2> omod;
+
+ let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD);
+ let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE);
+ let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
+ let Inst{47-46} = !if(P.HasSDWAOMod, omod{1-0}, 0);
+}
+
+// gfx9 SDWA-B
+class VOP_SDWA9Be<VOPProfile P> : VOP_SDWA9e<P> {
+ bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}}
+
+ let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0);
+ let Inst{47} = !if(P.EmitDst, sdst{7}, 0);
}
class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
@@ -331,6 +372,50 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
VOPProfile Pfl = P;
}
+// GFX9 adds two features to SDWA:
+// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD.
+// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather
+// than VGPRs (at most 1 can be an SGPR);
+// b. OMOD is the standard output modifier (result *2, *4, /2)
+// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This
+// replaces OMOD and the dest fields with SD and SDST (SGPR destination)
+// field.
+// a. When SD=1, the SDST is used as the destination for the compare result;
+// b.when SD=0, VCC is used.
+//
+// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA
+
+class VOP_SDWA9_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+ InstSI <P.OutsSDWA9, P.InsSDWA9, "", pattern>,
+ VOP <opName>,
+ SIMCInstr <opName#"_sdwa9", SIEncodingFamily.NONE>,
+ MnemonicAlias <opName#"_sdwa9", opName> {
+
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+ let UseNamedOperandTable = 1;
+
+ string Mnemonic = opName;
+ string AsmOperands = P.AsmSDWA9;
+
+ let Size = 8;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+
+ let VALU = 1;
+ let SDWA = 1;
+ let Uses = [EXEC];
+
+ let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst);
+ let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst);
+ let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9,
+ AMDGPUAsmVariants.Disable);
+ let DecoderNamespace = "SDWA9";
+
+ VOPProfile Pfl = P;
+}
+
class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
@@ -358,6 +443,33 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
let TSFlags = ps.TSFlags;
}
+class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
+ // Copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AssemblerPredicate = ps.AssemblerPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let AsmVariantName = ps.AsmVariantName;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let DecoderNamespace = ps.DecoderNamespace;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+}
+
class VOP_DPPe<VOPProfile P> : Enc64 {
bits<2> src0_modifiers;
bits<8> src0;
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 46ac4d0ad933..31a2f499a9a7 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -34,6 +34,9 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
+ if (T->isArrayTy())
+ return true;
+
EVT VT = TLI.getValueType(DL, T, true);
if (!VT.isSimple() || VT.isVector() ||
!(VT.isInteger() || VT.isFloatingPoint()))
@@ -148,23 +151,47 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
};
} // End anonymous namespace.
-void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
- SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL,
- MachineRegisterInfo &MRI) const {
+void ARMCallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ MachineFunction &MF, const SplitArgTy &PerformArgSplit) const {
const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
+ const DataLayout &DL = MF.getDataLayout();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function *F = MF.getFunction();
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
- assert(SplitVTs.size() == 1 && "Unsupported type");
+ if (SplitVTs.size() == 1) {
+ // Even if there is no splitting to do, we still want to replace the
+ // original type (e.g. pointer type -> integer).
+ SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
+ OrigArg.Flags, OrigArg.IsFixed);
+ return;
+ }
+
+ unsigned FirstRegIdx = SplitArgs.size();
+ for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
+ EVT SplitVT = SplitVTs[i];
+ Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
+ auto Flags = OrigArg.Flags;
+ bool NeedsConsecutiveRegisters =
+ TLI.functionArgumentNeedsConsecutiveRegisters(
+ SplitTy, F->getCallingConv(), F->isVarArg());
+ if (NeedsConsecutiveRegisters) {
+ Flags.setInConsecutiveRegs();
+ if (i == e - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+ SplitArgs.push_back(
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
+ SplitTy, Flags, OrigArg.IsFixed});
+ }
- // Even if there is no splitting to do, we still want to replace the original
- // type (e.g. pointer type -> integer).
- SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags, OrigArg.IsFixed);
+ for (unsigned i = 0; i < Offsets.size(); ++i)
+ PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
/// Lower the return value for the already existing \p Ret. This assumes that
@@ -187,7 +214,9 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitVTs;
ArgInfo RetInfo(VReg, Val->getType());
setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo());
+ splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, VReg, Offset);
+ });
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@@ -307,6 +336,26 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
return 1;
}
+ /// Merge the values in \p SrcRegs into \p DstReg at offsets \p SrcOffsets.
+ /// Note that the source registers are not required to have homogeneous types,
+ /// so we use G_INSERT rather than G_MERGE_VALUES.
+ // FIXME: Use G_MERGE_VALUES if the types are homogeneous.
+ void mergeRegisters(unsigned DstReg, ArrayRef<unsigned> SrcRegs,
+ ArrayRef<uint64_t> SrcOffsets) {
+ LLT Ty = MRI.getType(DstReg);
+
+ unsigned Dst = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Dst);
+
+ for (unsigned i = 0; i < SrcRegs.size(); ++i) {
+ unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInsert(Tmp, Dst, SrcRegs[i], SrcOffsets[i]);
+ Dst = Tmp;
+ }
+
+ MIRBuilder.buildCopy(DstReg, Dst);
+ }
+
/// Marking a physical register as used is different between formal
/// parameters, where it's a basic block live-in, and call returns, where it's
/// an implicit-def of the call instruction.
@@ -335,6 +384,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
return false;
auto &MF = MIRBuilder.getMF();
+ auto &MBB = MIRBuilder.getMBB();
auto DL = MF.getDataLayout();
auto &TLI = *getTLI<ARMTargetLowering>();
@@ -350,17 +400,34 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());
+ FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
+ AssignFn);
+
SmallVector<ArgInfo, 8> ArgInfos;
+ SmallVector<unsigned, 4> SplitRegs;
+ SmallVector<uint64_t, 4> RegOffsets;
unsigned Idx = 0;
for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[Idx], Arg.getType());
setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
- splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo());
+
+ SplitRegs.clear();
+ RegOffsets.clear();
+
+ splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
+ SplitRegs.push_back(Reg);
+ RegOffsets.push_back(Offset);
+ });
+
+ if (!SplitRegs.empty())
+ ArgHandler.mergeRegisters(VRegs[Idx], SplitRegs, RegOffsets);
+
Idx++;
}
- FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
- AssignFn);
+ if (!MBB.empty())
+ MIRBuilder.setInstr(*MBB.begin());
+
return handleAssignments(MIRBuilder, ArgInfos, ArgHandler);
}
@@ -407,7 +474,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!Arg.IsFixed)
return false;
- splitToValueTypes(Arg, ArgInfos, DL, MRI);
+ splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, Arg.Reg, Offset);
+ });
}
auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
@@ -423,12 +492,24 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
ArgInfos.clear();
- splitToValueTypes(OrigRet, ArgInfos, DL, MRI);
+ SmallVector<uint64_t, 8> RegOffsets;
+ SmallVector<unsigned, 8> SplitRegs;
+ splitToValueTypes(OrigRet, ArgInfos, MF,
+ [&](unsigned Reg, uint64_t Offset) {
+ RegOffsets.push_back(Offset);
+ SplitRegs.push_back(Reg);
+ });
auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
+
+ if (!RegOffsets.empty()) {
+ // We have split the value and allocated each individual piece, now build
+ // it up again.
+ RetHandler.mergeRegisters(OrigRet.Reg, SplitRegs, RegOffsets);
+ }
}
// We now know the size of the stack - update the ADJCALLSTACKDOWN
diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h
index 6404c7a2689e..f5a6872336f6 100644
--- a/lib/Target/ARM/ARMCallLowering.h
+++ b/lib/Target/ARM/ARMCallLowering.h
@@ -42,11 +42,14 @@ private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
unsigned VReg, MachineInstrBuilder &Ret) const;
+ typedef std::function<void(unsigned Reg, uint64_t Offset)> SplitArgTy;
+
/// Split an argument into one or more arguments that the CC lowering can cope
/// with (e.g. replace pointers with integers).
void splitToValueTypes(const ArgInfo &OrigArg,
SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI) const;
+ MachineFunction &MF,
+ const SplitArgTy &PerformArgSplit) const;
};
} // End of namespace llvm
#endif
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 78a9144bd321..90baabcdb652 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -779,7 +779,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineOperand &Desired = MI.getOperand(3);
MachineOperand &New = MI.getOperand(4);
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+ LivePhysRegs LiveRegs(TII->getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
LiveRegs.stepBackward(*I);
@@ -903,7 +903,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0);
unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1);
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+ LivePhysRegs LiveRegs(TII->getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
LiveRegs.stepBackward(*I);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f8b584db7b99..62e774d869da 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -127,7 +127,7 @@ static cl::opt<bool> EnableConstpoolPromotion(
"arm-promote-constant", cl::Hidden,
cl::desc("Enable / disable promotion of unnamed_addr constants into "
"constant pools"),
- cl::init(true));
+ cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
static cl::opt<unsigned> ConstpoolPromotionMaxSize(
"arm-promote-constant-max-size", cl::Hidden,
cl::desc("Maximum size of constant to promote into a constant pool"),
@@ -12147,12 +12147,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
- // Lowering to i32/i16 if the size permits.
- if (Size >= 4)
- return MVT::i32;
- else if (Size >= 2)
- return MVT::i16;
-
// Let the target-independent logic figure it out.
return MVT::Other;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 875c06210ae6..26da528c19e6 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -510,7 +510,7 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
- bool canMergeStoresTo(EVT MemVT) const override {
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override {
// Do not merge to larger than i32.
return (MemVT.getSizeInBits() <= 32);
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 51290e5a5b93..858136a82078 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -674,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -682,7 +682,7 @@ class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x2,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -703,7 +703,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -711,7 +711,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -720,7 +720,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -728,7 +728,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -747,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
- "$Vd, $Rn", "", []> {
+ "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -756,7 +756,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -764,7 +764,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -780,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
- "$Vd, $Rn", "", []> {
+ "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -797,7 +797,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -805,7 +805,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -821,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -837,22 +837,22 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
-def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
-def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
-def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
@@ -875,45 +875,45 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
}
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
-def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
// ...with double-spaced registers
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn), IIC_VLD3,
- "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
@@ -923,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
-def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
-def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -933,7 +933,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
@@ -942,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
-def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
-def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// ...with double-spaced registers:
def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
@@ -954,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
-def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
-def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn), IIC_VLD4,
- "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
+ Sched<[WriteVLD4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
@@ -982,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
-def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
-def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -992,7 +993,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
@@ -1001,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
-def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
-def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
// ...with double-spaced registers:
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
@@ -1013,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
-def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
-def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1076,11 +1077,12 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6oneL32:$Rn)),
- imm:$lane))]> {
+ imm:$lane))]>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
-class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
+ Sched<[WriteVLD1]> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
(i32 (LoadOp addrmode6:$addr)),
imm:$lane))];
@@ -1117,7 +1119,7 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$src = $Vd, $Rn.addr = $wb", []> {
+ "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let DecoderMethod = "DecodeVLD1LN";
}
@@ -1134,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
let Inst{4} = Rn{4};
}
-def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
-def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
-def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
// VLD2LN : Vector Load (single 2-element structure to one lane)
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2", []> {
+ "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
@@ -1159,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
-def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
-def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
@@ -1171,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
-def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1195,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1206,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1215,7 +1217,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD3LN";
}
@@ -1230,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
@@ -1242,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1254,7 +1256,7 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
- []> {
+ []>, Sched<[WriteVLD2]> {
let DecoderMethod = "DecodeVLD3LN";
}
@@ -1268,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1279,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1289,7 +1291,8 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
@@ -1306,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
@@ -1319,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1347,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1359,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1371,7 +1374,8 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
(ins AddrMode:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1434,7 +1438,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1491,7 +1495,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
(outs VdTy:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
@@ -1500,7 +1504,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
(outs VdTy:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
@@ -1524,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
class VLD3DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6dup:$Rn), IIC_VLD3dup,
- "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
@@ -1534,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
-def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
-def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
-def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
// ...with double-spaced registers (not used for codegen):
def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
@@ -1548,7 +1553,7 @@ class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
@@ -1561,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
-def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
-def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
-def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
class VLD4DUP<bits<4> op7_4, string Dt>
@@ -1580,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
-def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
-def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
// ...with double-spaced registers (not used for codegen):
def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
@@ -1595,7 +1600,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt>
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
@@ -1608,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1657,14 +1662,14 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
- IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
- IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1685,7 +1690,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1694,7 +1699,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1703,7 +1708,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1712,7 +1717,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1732,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins AddrMode:$Rn, VecListThreeD:$Vd),
- IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1741,7 +1746,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1750,7 +1755,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1766,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
-def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
+def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins AddrMode:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
- []> {
+ []>, Sched<[WriteVST4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1784,7 +1789,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1793,7 +1798,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1809,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
-def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
+def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1824,22 +1829,22 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
-def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
-def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
-def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
@@ -1847,7 +1852,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
@@ -1855,7 +1860,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
@@ -1864,7 +1869,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
@@ -1873,7 +1878,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
(ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
@@ -1890,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
-def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
@@ -1915,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
- "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
@@ -1925,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
-def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
-def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1935,7 +1940,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
@@ -1944,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
-def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
-def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// ...with double-spaced registers:
def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
@@ -1956,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
-def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// ...alternate versions to be allocated odd register numbers:
-def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
-def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
@@ -1984,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
-def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
-def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1994,7 +1999,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
@@ -2003,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
-def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
-def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
// ...with double-spaced registers:
def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
@@ -2015,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
-def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
// ...alternate versions to be allocated odd register numbers:
-def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
-def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
@@ -2060,12 +2065,13 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
+ Sched<[WriteVST1]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
- : VSTQLNPseudo<IIC_VST1ln> {
+ : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr)];
}
@@ -2104,11 +2110,12 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- AdrMode:$Rn, am6offset:$Rm))]> {
+ AdrMode:$Rn, am6offset:$Rm))]>,
+ Sched<[WriteVST1]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
- : VSTQLNWBPseudo<IIC_VST1lnu> {
+ : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr, am6offset:$offset))];
}
@@ -2139,7 +2146,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
@@ -2155,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
-def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
-def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
@@ -2169,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
let Inst{4} = Rn{4};
}
-def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
-def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2193,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2204,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
let Inst{7} = lane{0};
}
-def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
- "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
+ Sched<[WriteVST2]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST3LN";
}
@@ -2227,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
-def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
-def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
@@ -2263,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2274,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
let Inst{7} = lane{0};
}
-def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2283,7 +2291,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
@@ -2300,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
-def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
-def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
@@ -2313,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
-def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2339,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2351,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
let Inst{5} = Rn{5};
}
-def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 87eb4c2b9074..ec5b97cba8cd 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -131,6 +131,17 @@ def WriteFPDIV64 : SchedWrite;
def WriteFPSQRT32 : SchedWrite;
def WriteFPSQRT64 : SchedWrite;
+// Vector load and stores
+def WriteVLD1 : SchedWrite;
+def WriteVLD2 : SchedWrite;
+def WriteVLD3 : SchedWrite;
+def WriteVLD4 : SchedWrite;
+def WriteVST1 : SchedWrite;
+def WriteVST2 : SchedWrite;
+def WriteVST3 : SchedWrite;
+def WriteVST4 : SchedWrite;
+
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 8fb8a2a3b6d2..4e72b13d94cb 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1981,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
// Reserve A9UnitFP for 2 consecutive cycles.
def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
let Latency = 4;
diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td
index 537e5da9669f..782be9b60a7a 100644
--- a/lib/Target/ARM/ARMScheduleR52.td
+++ b/lib/Target/ARM/ARMScheduleR52.td
@@ -120,6 +120,12 @@ def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
+// Overriden via InstRW for this processor.
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
@@ -712,20 +718,20 @@ def R52WriteSTM : SchedWriteVariant<[
// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
// another instruction in slot-1, but only in the last issue.
-def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
-def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
+def : WriteRes<WriteVLD2, [R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2];
let SingleIssue = 1;
}
-def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD3, [R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [3];
let SingleIssue = 1;
}
-def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD4, [R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
let ResourceCycles = [4];
@@ -829,95 +835,6 @@ def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VR
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
//---
-// VLDx. Vector Loads
-//---
-// 1-element structure load
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
-
-// 2-element structure load
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
-
-// 3-element structure load
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
-
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
-
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
-
-// 4-element structure load
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
-
-
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
-
-//---
// VSTx. Vector Stores
//---
// 1-element structure store
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index dc041c6c6006..b838688c6f04 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1070,6 +1070,16 @@ let SchedModel = SwiftModel in {
def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;
+ // Overriden via InstRW for this processor.
+ def : WriteRes<WriteVLD1, []>;
+ def : WriteRes<WriteVLD2, []>;
+ def : WriteRes<WriteVLD3, []>;
+ def : WriteRes<WriteVLD4, []>;
+ def : WriteRes<WriteVST1, []>;
+ def : WriteRes<WriteVST2, []>;
+ def : WriteRes<WriteVST3, []>;
+ def : WriteRes<WriteVST4, []>;
+
// Not specified.
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1979cbf50125..c4f23c66e4ea 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -85,9 +85,9 @@ namespace llvm {
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
+ RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget());
RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
- RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget());
- RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget());
+ RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget());
PassRegistry &Registry = *PassRegistry::getPassRegistry();
initializeGlobalISel(Registry);
@@ -263,6 +263,11 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
else
this->Options.EABIVersion = EABI::EABI5;
}
+
+ initAsmInfo();
+ if (!Subtarget.isThumb() && !Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -355,22 +360,6 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
});
}
-void ARMTargetMachine::anchor() {}
-
-ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM, CodeGenOpt::Level OL,
- bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
- initAsmInfo();
- if (!Subtarget.hasARMOps())
- report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
- "support ARM mode execution!");
-}
-
-void ARMLETargetMachine::anchor() {}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -378,9 +367,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
Optional<Reloc::Model> RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
-void ARMBETargetMachine::anchor() {}
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -388,39 +375,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
Optional<Reloc::Model> RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-
-void ThumbTargetMachine::anchor() {}
-
-ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
- initAsmInfo();
-}
-
-void ThumbLETargetMachine::anchor() {}
-
-ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
-void ThumbBETargetMachine::anchor() {}
-
-ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index f0ca9427d9fb..e5eb27114c72 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -62,23 +62,9 @@ public:
}
};
-/// ARM target machine.
+/// ARM/Thumb little endian target machine.
///
-class ARMTargetMachine : public ARMBaseTargetMachine {
- virtual void anchor();
-
-public:
- ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle);
-};
-
-/// ARM little endian target machine.
-///
-class ARMLETargetMachine : public ARMTargetMachine {
- void anchor() override;
-
+class ARMLETargetMachine : public ARMBaseTargetMachine {
public:
ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -86,11 +72,9 @@ public:
CodeGenOpt::Level OL);
};
-/// ARM big endian target machine.
+/// ARM/Thumb big endian target machine.
///
-class ARMBETargetMachine : public ARMTargetMachine {
- void anchor() override;
-
+class ARMBETargetMachine : public ARMBaseTargetMachine {
public:
ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -98,44 +82,6 @@ public:
CodeGenOpt::Level OL);
};
-/// Thumb target machine.
-/// Due to the way architectures are handled, this represents both
-/// Thumb-1 and Thumb-2.
-///
-class ThumbTargetMachine : public ARMBaseTargetMachine {
- virtual void anchor();
-
-public:
- ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle);
-};
-
-/// Thumb little endian target machine.
-///
-class ThumbLETargetMachine : public ThumbTargetMachine {
- void anchor() override;
-
-public:
- ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
-/// Thumb big endian target machine.
-///
-class ThumbBETargetMachine : public ThumbTargetMachine {
- void anchor() override;
-
-public:
- ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 94f9e8dfebbf..edbf2b99126c 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -30,8 +30,8 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
- const ARMTargetMachine &ARM_TM = static_cast<const ARMTargetMachine &>(TM);
- bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
+ const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
+ bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 1a17d4e33e4f..f917c35b9ceb 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -535,14 +535,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Look for a temporary register to use.
// First, compute the liveness information.
- LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ LivePhysRegs UsedRegs(TRI);
UsedRegs.addLiveOuts(MBB);
// The semantic of pristines changed recently and now,
// the callee-saved registers that are touched in the function
// are not part of the pristines set anymore.
// Add those callee-saved now.
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i)
UsedRegs.addReg(CSRegs[i]);
@@ -561,12 +561,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// And some temporary register, just in case.
unsigned TemporaryReg = 0;
BitVector PopFriendly =
- TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID));
assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
// Rebuild the GPRs from the high registers because they are removed
// form the GPR reg class for thumb1.
BitVector GPRsNoLRSP =
- TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID));
GPRsNoLRSP |= PopFriendly;
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index 06ad2b3ffdf8..f10ca394f36c 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -902,7 +902,6 @@ let Defs = [SREG] in
// CPI Rd, K
// Compares a register with an 8 bit immediate.
- let Uses = [SREG] in
def CPIRdK : FRdK<0b0011,
(outs),
(ins GPR8:$rd, imm_ldi8:$k),
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 6897161c903c..cc7a7c3849bc 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -132,6 +132,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128;
}
+bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ return false;
+}
+
SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
case ISD::BR_CC:
@@ -496,8 +500,11 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
+ auto N = cast<GlobalAddressSDNode>(Op);
+ assert(N->getOffset() == 0 && "Invalid offset for global address");
+
SDLoc DL(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const GlobalValue *GV = N->getGlobal();
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64);
return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index 3d1726be286e..0b8a8ca20c3b 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -42,6 +42,10 @@ public:
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ // This method decides whether folding a constant offset
+ // with the given GlobalAddress is legal.
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index a04aca4afa0f..25018b9ed510 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1657,7 +1657,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
// defined. From the point of view of the liveness tracking, it is ok to
// store it as a whole, but if we break it up we may end up storing a
// register that is entirely undefined.
- LivePhysRegs LPR(&HRI);
+ LivePhysRegs LPR(HRI);
LPR.addLiveIns(B);
SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
for (auto R = B.begin(); R != It; ++R) {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 03794511414e..66e07c67958e 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1254,7 +1254,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
const MachineOperand &Op1 = MI.getOperand(1);
const MachineOperand &Op2 = MI.getOperand(2);
const MachineOperand &Op3 = MI.getOperand(3);
- LivePhysRegs LiveAtMI(&HRI);
+ LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
if (Op0.getReg() != Op2.getReg()) {
@@ -1283,7 +1283,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineOperand &Op1 = MI.getOperand(1);
MachineOperand &Op2 = MI.getOperand(2);
MachineOperand &Op3 = MI.getOperand(3);
- LivePhysRegs LiveAtMI(&HRI);
+ LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
index 0f99dfe342b8..93fb688fc1c0 100644
--- a/lib/Target/Hexagon/HexagonPseudo.td
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -412,6 +412,15 @@ def PS_vstorerwu_ai: STrivv_template<VecDblRegs, V6_vS32Ub_ai>,
def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32Ub_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in {
+ def PS_vstorerq_ai: Pseudo<(outs),
+ (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs:$Qt), "", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+ def PS_vstorerq_ai_128B: Pseudo<(outs),
+ (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs128B:$Qt), "", []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
// Vector load pseudos
let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1,
mayLoad = 1, hasSideEffects = 0 in
@@ -429,30 +438,16 @@ def PS_vloadrwu_ai: LDrivv_template<VecDblRegs, V6_vL32Ub_ai>,
def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32Ub_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
-// Store vector predicate pseudo.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
- def PS_vstorerq_ai : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1),
- ".error \"should not emit\" ", []>,
- Requires<[HasV60T,UseHVXSgl]>;
-
- def PS_vstorerq_ai_128B : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VectorRegs:$src1),
- ".error \"should not emit\" ", []>,
- Requires<[HasV60T,UseHVXSgl]>;
-
- def PS_vloadrq_ai : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
- ".error \"should not emit\" ", []>,
- Requires<[HasV60T,UseHVXDbl]>;
-
- def PS_vloadrq_ai_128B : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
- ".error \"should not emit\" ", []>,
- Requires<[HasV60T,UseHVXDbl]>;
+let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
+ def PS_vloadrq_ai: Pseudo<(outs VecPredRegs:$Qd),
+ (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+ def PS_vloadrq_ai_128B: Pseudo<(outs VecPredRegs128B:$Qd),
+ (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>,
+ Requires<[HasV60T,UseHVXDbl]>;
}
+
let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
class VSELInst<dag outs, dag ins, InstHexagon rootInst>
: InstHexagon<outs, ins, "", [], "", rootInst.Itinerary, rootInst.Type>;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2a1bb63af789..1fc157900ed5 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -50,11 +50,6 @@ bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const {
R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1;
}
-bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const {
- return Hexagon::R16 <= Reg && Reg <= Hexagon::R27;
-}
-
-
const MCPhysReg *
HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF,
const TargetRegisterClass *RC) const {
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 8a3f175b8488..5f65fad2cc04 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -77,7 +77,6 @@ public:
unsigned getFirstCallerSavedNonParamReg() const;
bool isEHReturnCalleeSaveReg(unsigned Reg) const;
- bool isCalleeSaveReg(unsigned Reg) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c21b6e2515d3..cd474921d4bc 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -214,12 +214,12 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
for (auto &MB : MF) {
auto Begin = MB.begin(), End = MB.end();
while (Begin != End) {
- // First the first non-boundary starting from the end of the last
+ // Find the first non-boundary starting from the end of the last
// scheduling region.
MachineBasicBlock::iterator RB = Begin;
while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF))
++RB;
- // First the first boundary starting from the beginning of the new
+ // Find the first boundary starting from the beginning of the new
// region.
MachineBasicBlock::iterator RE = RB;
while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF))
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 8be2a898e380..34b966df7761 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -29,6 +29,7 @@ subdirectories =
MSP430
NVPTX
Mips
+ Nios2
PowerPC
RISCV
Sparc
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index dfea669f3ba1..203864dd4065 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -22,6 +22,18 @@ def FeatureX
: SubtargetFeature<"ext", "ExtendedInsts", "true",
"Enable MSP430-X extensions">;
+def FeatureHWMult16
+ : SubtargetFeature<"hwmult16", "HWMultMode", "HWMult16",
+ "Enable 16-bit hardware multiplier">;
+
+def FeatureHWMult32
+ : SubtargetFeature<"hwmult32", "HWMultMode", "HWMult32",
+ "Enable 32-bit hardware multiplier">;
+
+def FeatureHWMultF5
+ : SubtargetFeature<"hwmultf5", "HWMultMode", "HWMultF5",
+ "Enable F5 series hardware multiplier">;
+
//===----------------------------------------------------------------------===//
// MSP430 supported processors.
//===----------------------------------------------------------------------===//
@@ -29,6 +41,8 @@ class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
def : Proc<"generic", []>;
+def : Proc<"msp430", []>;
+def : Proc<"msp430x", [FeatureX]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index cd58eda5d924..0b02f79f472a 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -403,12 +403,12 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) {
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
if (Node->hasOneUse()) {
- CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, TFI,
+ CurDAG->SelectNodeTo(Node, MSP430::ADDframe, MVT::i16, TFI,
CurDAG->getTargetConstant(0, dl, MVT::i16));
return;
}
ReplaceNode(Node, CurDAG->getMachineNode(
- MSP430::ADD16ri, dl, MVT::i16, TFI,
+ MSP430::ADDframe, dl, MVT::i16, TFI,
CurDAG->getTargetConstant(0, dl, MVT::i16)));
return;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index cc6e64043f54..dae14fd301ee 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -38,27 +38,6 @@ using namespace llvm;
#define DEBUG_TYPE "msp430-lower"
-typedef enum {
- NoHWMult,
- HWMult16,
- HWMult32,
- HWMultF5
-} HWMultUseMode;
-
-static cl::opt<HWMultUseMode>
-HWMultMode("mhwmult", cl::Hidden,
- cl::desc("Hardware multiplier use mode"),
- cl::init(NoHWMult),
- cl::values(
- clEnumValN(NoHWMult, "none",
- "Do not use hardware multiplier"),
- clEnumValN(HWMult16, "16bit",
- "Use 16-bit hardware multiplier"),
- clEnumValN(HWMult32, "32bit",
- "Use 32-bit hardware multiplier"),
- clEnumValN(HWMultF5, "f5series",
- "Use F5 series hardware multiplier")));
-
MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
const MSP430Subtarget &STI)
: TargetLowering(TM) {
@@ -262,7 +241,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
setCmpLibcallCC(LC.Op, LC.Cond);
}
- if (HWMultMode == HWMult16) {
+ if (STI.hasHWMult16()) {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -277,7 +256,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
}
- } else if (HWMultMode == HWMult32) {
+ } else if (STI.hasHWMult32()) {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -292,7 +271,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
for (const auto &LC : LibraryCalls) {
setLibcallName(LC.Op, LC.Name);
}
- } else if (HWMultMode == HWMultF5) {
+ } else if (STI.hasHWMultF5()) {
const struct {
const RTLIB::Libcall Op;
const char * const Name;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 1cd18611e52c..cec43040f60d 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -122,6 +122,11 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
[(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
}
+let Defs = [SR], Uses = [SP] in {
+def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset),
+ "# ADDframe PSEUDO", []>;
+}
+
let usesCustomInserter = 1 in {
let Uses = [SR] in {
def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 9600bc28f100..7a3b7a8bd5ff 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -127,7 +127,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Fold imm into offset
Offset += MI.getOperand(FIOperandNum + 1).getImm();
- if (MI.getOpcode() == MSP430::ADD16ri) {
+ if (MI.getOpcode() == MSP430::ADDframe) {
// This is actually "load effective address" of the stack slot
// instruction. We have only two-address instructions, thus we need to
// expand it into mov + add
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 6216348e4d71..776a9dcb11d4 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -19,6 +19,20 @@ using namespace llvm;
#define DEBUG_TYPE "msp430-subtarget"
+static cl::opt<MSP430Subtarget::HWMultEnum>
+HWMultModeOption("mhwmult", cl::Hidden,
+ cl::desc("Hardware multiplier use mode for MSP430"),
+ cl::init(MSP430Subtarget::NoHWMult),
+ cl::values(
+ clEnumValN(MSP430Subtarget::NoHWMult, "none",
+ "Do not use hardware multiplier"),
+ clEnumValN(MSP430Subtarget::HWMult16, "16bit",
+ "Use 16-bit hardware multiplier"),
+ clEnumValN(MSP430Subtarget::HWMult32, "32bit",
+ "Use 32-bit hardware multiplier"),
+ clEnumValN(MSP430Subtarget::HWMultF5, "f5series",
+ "Use F5 series hardware multiplier")));
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "MSP430GenSubtargetInfo.inc"
@@ -27,7 +41,18 @@ void MSP430Subtarget::anchor() { }
MSP430Subtarget &
MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- ParseSubtargetFeatures("generic", FS);
+ ExtendedInsts = false;
+ HWMultMode = NoHWMult;
+
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "msp430";
+
+ ParseSubtargetFeatures(CPUName, FS);
+
+ if (HWMultModeOption != NoHWMult)
+ HWMultMode = HWMultModeOption;
+
return *this;
}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 1a00d85e01cb..8828dfd65878 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -30,8 +30,15 @@ namespace llvm {
class StringRef;
class MSP430Subtarget : public MSP430GenSubtargetInfo {
+public:
+ enum HWMultEnum {
+ NoHWMult, HWMult16, HWMult32, HWMultF5
+ };
+
+private:
virtual void anchor();
bool ExtendedInsts;
+ HWMultEnum HWMultMode;
MSP430FrameLowering FrameLowering;
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
@@ -50,6 +57,10 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool hasHWMult16() const { return HWMultMode == HWMult16; }
+ bool hasHWMult32() const { return HWMultMode == HWMult32; }
+ bool hasHWMultF5() const { return HWMultMode == HWMultF5; }
+
const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 3641a70d61b5..8fe4e75f3e18 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -813,28 +813,28 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
!isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
return SDValue();
- // The shift masks must have the same position and size.
- if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
- return SDValue();
+ // The shift masks must have the same position and size.
+ if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
+ return SDValue();
- SDValue Shl = And1.getOperand(0);
+ SDValue Shl = And1.getOperand(0);
- if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
- return SDValue();
+ if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
+ return SDValue();
- unsigned Shamt = CN->getZExtValue();
+ unsigned Shamt = CN->getZExtValue();
- // Return if the shift amount and the first bit position of mask are not the
- // same.
- EVT ValTy = N->getValueType(0);
- if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
- return SDValue();
+ // Return if the shift amount and the first bit position of mask are not the
+ // same.
+ EVT ValTy = N->getValueType(0);
+ if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
+ return SDValue();
- SDLoc DL(N);
- return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0),
- DAG.getConstant(SMPos0, DL, MVT::i32),
- DAG.getConstant(SMSize0, DL, MVT::i32),
- And0.getOperand(0));
+ SDLoc DL(N);
+ return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0),
+ DAG.getConstant(SMPos0, DL, MVT::i32),
+ DAG.getConstant(SMSize0, DL, MVT::i32),
+ And0.getOperand(0));
} else {
// Pattern match DINS.
// $dst = or (and $src, mask0), mask1
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 8f5ecadecdea..1f4e933db2a2 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -59,9 +59,8 @@ static cl::opt<bool>
void MipsSubtarget::anchor() { }
-MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, bool little,
- const MipsTargetMachine &TM)
+MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ bool little, const MipsTargetMachine &TM)
: MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
@@ -77,8 +76,6 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
FrameLowering(MipsFrameLowering::create(*this)),
TLInfo(MipsTargetLowering::create(TM, *this)) {
- PreviousInMips16Mode = InMips16Mode;
-
if (MipsArchVersion == MipsDefault)
MipsArchVersion = Mips32;
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index cca2cb8a4660..b4d15ee361ff 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -119,9 +119,6 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// Mips16 hard float
bool InMips16HardFloat;
- // PreviousInMips16 -- the function we just processed was in Mips 16 Mode
- bool PreviousInMips16Mode;
-
// InMicroMips -- can process MicroMips instructions
bool InMicroMipsMode;
@@ -178,8 +175,8 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
- MipsSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
- bool little, const MipsTargetMachine &TM);
+ MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little,
+ const MipsTargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Nios2/CMakeLists.txt b/lib/Target/Nios2/CMakeLists.txt
new file mode 100644
index 000000000000..78db452094bd
--- /dev/null
+++ b/lib/Target/Nios2/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(LLVM_TARGET_DEFINITIONS Nios2.td)
+
+#Generate Nios2GenRegisterInfo.inc and Nios2GenInstrInfo.inc which included by
+#your hand code C++ files.
+#Nios2GenRegisterInfo.inc came from Nios2RegisterInfo.td, Nios2GenInstrInfo.inc
+#came from Nios2InstrInfo.td.
+tablegen(LLVM Nios2GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM Nios2GenInstrInfo.inc -gen-instr-info)
+
+#Nios2CommonTableGen must be defined
+add_public_tablegen_target(Nios2CommonTableGen)
+
+#Nios2CodeGen should match with LLVMBuild.txt Nios2CodeGen
+add_llvm_target(Nios2CodeGen Nios2TargetMachine.cpp)
+
+#Should match with "subdirectories = MCTargetDesc TargetInfo" in LLVMBuild.txt
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Nios2/LLVMBuild.txt b/lib/Target/Nios2/LLVMBuild.txt
new file mode 100644
index 000000000000..b40a76379706
--- /dev/null
+++ b/lib/Target/Nios2/LLVMBuild.txt
@@ -0,0 +1,61 @@
+;===- ./lib/Target/Nios2/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+#Following comments extracted from http: // llvm.org/docs/LLVMBuild.html
+
+[common]
+subdirectories =
+ MCTargetDesc
+ TargetInfo
+
+[component_0]
+#TargetGroup components are an extension of LibraryGroups, specifically for
+#defining LLVM targets(which are handled specially in a few places).
+type = TargetGroup
+#The name of the component should always be the name of the target.(should
+#match "def Nios2 : Target" in Nios2.td)
+name = Nios2
+#Nios2 component is located in directory Target /
+parent = Target
+#Whether this target defines an assembly parser, assembly printer, disassembler
+#, and supports JIT compilation.They are optional.
+
+[component_1]
+#component_1 is a Library type and name is Nios2CodeGen.After build it will
+#in lib / libLLVMNios2CodeGen.a of your build command directory.
+type = Library
+name = Nios2CodeGen
+#Nios2CodeGen component(Library) is located in directory Nios2 /
+parent = Nios2
+#If given, a list of the names of Library or LibraryGroup components which
+#must also be linked in whenever this library is used.That is, the link time
+#dependencies for this component.When tools are built, the build system will
+#include the transitive closure of all required_libraries for the components
+#the tool needs.
+required_libraries = CodeGen
+ Core
+ GlobalISel
+ MC
+ Nios2Desc
+ Nios2Info
+ Support
+ Target
+#end of required_libraries
+
+#All LLVMBuild.txt in Target / Nios2 and subdirectory use 'add_to_library_groups
+#= Nios2'
+add_to_library_groups = Nios2
diff --git a/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt b/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..21def509a232
--- /dev/null
+++ b/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,2 @@
+#MCTargetDesc / CMakeLists.txt
+add_llvm_library(LLVMNios2Desc Nios2MCTargetDesc.cpp)
diff --git a/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt b/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..4dc6995e7f5c
--- /dev/null
+++ b/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Nios2Desc
+parent = Nios2
+required_libraries = MC
+ Nios2Info
+ Support
+add_to_library_groups = Nios2
diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
new file mode 100644
index 000000000000..d913166399c6
--- /dev/null
+++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
@@ -0,0 +1,25 @@
+//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Nios2 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Nios2MCTargetDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "Nios2GenInstrInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "Nios2GenRegisterInfo.inc"
+
+extern "C" void LLVMInitializeNios2TargetMC() {}
diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
new file mode 100644
index 000000000000..d426062db168
--- /dev/null
+++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
@@ -0,0 +1,34 @@
+//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Nios2 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
+#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
+
+namespace llvm {
+class Target;
+class Triple;
+
+Target &getTheNios2Target();
+
+} // namespace llvm
+
+// Defines symbolic names for Nios2 registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "Nios2GenRegisterInfo.inc"
+
+// Defines symbolic names for the Nios2 instructions.
+#define GET_INSTRINFO_ENUM
+#include "Nios2GenInstrInfo.inc"
+
+#endif
diff --git a/lib/Target/Nios2/Nios2.h b/lib/Target/Nios2/Nios2.h
new file mode 100644
index 000000000000..87202f48cfbe
--- /dev/null
+++ b/lib/Target/Nios2/Nios2.h
@@ -0,0 +1,25 @@
+//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Nios2 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H
+#define LLVM_LIB_TARGET_NIOS2_NIOS2_H
+
+#include "MCTargetDesc/Nios2MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class Nios2TargetMachine;
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Nios2/Nios2.td b/lib/Target/Nios2/Nios2.td
new file mode 100644
index 000000000000..e8abba863370
--- /dev/null
+++ b/lib/Target/Nios2/Nios2.td
@@ -0,0 +1,29 @@
+//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Target-dependent interfaces
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "Nios2RegisterInfo.td"
+include "Nios2InstrInfo.td"
+
+def Nios2InstrInfo : InstrInfo;
+
+def Nios2 : Target { let InstructionSet = Nios2InstrInfo; }
diff --git a/lib/Target/Nios2/Nios2InstrFormats.td b/lib/Target/Nios2/Nios2InstrFormats.td
new file mode 100644
index 000000000000..79868be48a48
--- /dev/null
+++ b/lib/Target/Nios2/Nios2InstrFormats.td
@@ -0,0 +1,117 @@
+//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe NIOS2 instructions format
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def Pseudo : Format<0>;
+def FrmI : Format<1>;
+def FrmR : Format<2>;
+def FrmJ : Format<3>;
+def FrmOther : Format<4>; // Instruction w/ a custom format
+
+// Generic Nios2 Format
+class Nios2Inst<dag outs, dag ins, string asmstr, list<dag> pattern, Format f>
+ : Instruction {
+ field bits<32> Inst;
+ Format Form = f;
+
+ let Namespace = "Nios2";
+
+ let Size = 4;
+
+ bits<6> Opcode = 0;
+
+ // Bottom 6 bits are the 'opcode' field
+ let Inst{5 - 0} = Opcode;
+
+ let OutOperandList = outs;
+ let InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+
+ //
+ // Attributes specific to Nios2 instructions:
+ //
+ bits<3> FormBits = Form.Value;
+
+ // TSFlags layout should be kept in sync with Nios2InstrInfo.h.
+ let TSFlags{2 - 0} = FormBits;
+
+ let DecoderNamespace = "Nios2";
+}
+
+// Nios2 Instruction Format
+class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, Format f>
+ : Nios2Inst<outs, ins, asmstr, pattern, f> {
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Nios2 : <|A|B|immediate|opcode|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSE<outs, ins, asmstr, pattern, FrmI> {
+ bits<5> rA;
+ bits<5> rB;
+ bits<16> imm;
+
+ let Opcode = op;
+
+ let Inst{31 - 27} = rA;
+ let Inst{26 - 22} = rB;
+ let Inst{21 - 6} = imm;
+}
+
+//===----------------------------------------------------------------------===//
+// Format R instruction : <|A|B|C|opx|imm|opcode|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSE<outs, ins, asmstr, pattern, FrmR> {
+ bits<5> rA;
+ bits<5> rB;
+ bits<5> rC;
+ bits<5> imm = 0;
+
+ // opcode is always 0x3a for R instr.
+ let Opcode = 0x3a;
+
+ let Inst{31 - 27} = rA;
+ let Inst{26 - 22} = rB;
+ let Inst{21 - 17} = rC;
+ // opx stands for opcode extension
+ let Inst{16 - 11} = opx;
+ // optional 5-bit immediate value
+ let Inst{10 - 6} = imm;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Nios2 : <|address|opcode|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSE<outs, ins, asmstr, pattern, FrmJ> {
+ bits<26> addr;
+
+ let Opcode = op;
+
+ let Inst{31 - 6} = addr;
+}
diff --git a/lib/Target/Nios2/Nios2InstrInfo.td b/lib/Target/Nios2/Nios2InstrInfo.td
new file mode 100644
index 000000000000..5e4815ab3e16
--- /dev/null
+++ b/lib/Target/Nios2/Nios2InstrInfo.td
@@ -0,0 +1,50 @@
+//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Nios2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "Nios2InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Nios2 Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+def simm16 : Operand<i32> {
+ let DecoderMethod= "DecodeSimm16";
+}
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic and logical instructions with 2 register operands.
+class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rB), (ins RC:$rA, Od:$imm16),
+ !strconcat(instr_asm, "\t$rB, $rA, $imm16"),
+ [(set RC:$rB, (OpNode RC:$rA, imm_type:$imm16))]> {
+ let isReMaterializable = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Nios2 R1 Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDi : ArithLogicI<0x04, "addi", add, simm16, immSExt16, CPURegs>;
diff --git a/lib/Target/Nios2/Nios2RegisterInfo.td b/lib/Target/Nios2/Nios2RegisterInfo.td
new file mode 100644
index 000000000000..1808815816f3
--- /dev/null
+++ b/lib/Target/Nios2/Nios2RegisterInfo.td
@@ -0,0 +1,60 @@
+//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// We have bank of 32 registers.
+class Nios2Reg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Nios2";
+}
+
+// Nios2 CPU Registers
+class Nios2GPRReg<bits<5> num, string n> : Nios2Reg<n> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Nios2" in {
+ // General Purpose Registers
+ def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>;
+ def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>;
+ foreach RegNum = 2 - 23 in {
+ def R #RegNum : Nios2GPRReg<RegNum, "r" #RegNum>, DwarfRegNum<[ RegNum ]>;
+ }
+ def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>;
+ def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>;
+ def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>;
+ def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>;
+ def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>;
+ def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>;
+ def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>;
+ def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>;
+ def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Nios2", [ i32 ], 32,
+ (add
+ // Reserved
+ ZERO,
+ AT,
+ // Return Values and Arguments
+ (sequence "R%u", 2, 7),
+ // Not preserved across procedure calls
+ // Caller saved
+ (sequence "R%u", 8, 15),
+ // Callee saved
+ (sequence "R%u", 16, 23),
+ // Reserved
+ ET, BT, GP, SP, FP, EA, BA, RA, PC)>;
diff --git a/lib/Target/Nios2/Nios2TargetMachine.cpp b/lib/Target/Nios2/Nios2TargetMachine.cpp
new file mode 100644
index 000000000000..16d4eabcfaf7
--- /dev/null
+++ b/lib/Target/Nios2/Nios2TargetMachine.cpp
@@ -0,0 +1,46 @@
+//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Nios2 target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Nios2TargetMachine.h"
+#include "Nios2.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nios2"
+
+extern "C" void LLVMInitializeNios2Target() {
+ // Register the target.
+}
+
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
+ const TargetOptions &Options) {
+ return "e-p:32:32:32-i8:8:32-i16:16:32-n32";
+}
+
+static Reloc::Model getEffectiveRelocModel(CodeModel::Model CM,
+ Optional<Reloc::Model> RM) {
+ if (!RM.hasValue() || CM == CodeModel::JITDefault)
+ return Reloc::Static;
+ return *RM;
+}
+
+Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
+ Options, getEffectiveRelocModel(CM, RM), CM, OL) {}
+
+Nios2TargetMachine::~Nios2TargetMachine() {}
diff --git a/lib/Target/Nios2/Nios2TargetMachine.h b/lib/Target/Nios2/Nios2TargetMachine.h
new file mode 100644
index 000000000000..7f145c82f32c
--- /dev/null
+++ b/lib/Target/Nios2/Nios2TargetMachine.h
@@ -0,0 +1,30 @@
+//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Nios2 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
+#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class Nios2TargetMachine : public LLVMTargetMachine {
+public:
+ Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~Nios2TargetMachine() override;
+};
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Nios2/TargetInfo/CMakeLists.txt b/lib/Target/Nios2/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..394d2c2680b7
--- /dev/null
+++ b/lib/Target/Nios2/TargetInfo/CMakeLists.txt
@@ -0,0 +1 @@
+add_llvm_library(LLVMNios2Info Nios2TargetInfo.cpp)
diff --git a/lib/Target/Nios2/TargetInfo/LLVMBuild.txt b/lib/Target/Nios2/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..558f7501ea6b
--- /dev/null
+++ b/lib/Target/Nios2/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Nios2/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Nios2Info
+parent = Nios2
+required_libraries = Support
+add_to_library_groups = Nios2
diff --git a/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
new file mode 100644
index 000000000000..e317686140f7
--- /dev/null
+++ b/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
@@ -0,0 +1,24 @@
+//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Nios2.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target &llvm::getTheNios2Target() {
+ static Target TheNios2Target;
+ return TheNios2Target;
+}
+
+extern "C" void LLVMInitializeNios2TargetInfo() {
+ RegisterTarget<Triple::nios2,
+ /*HasJIT=*/true>
+ X(getTheNios2Target(), "nios2", "Nios2");
+}
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp
index ebd414baf1d2..41e3190c3eec 100644
--- a/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -339,7 +339,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
// Note: Cannot use stepBackward instead since we are using the Reg
// liveness state at the end of MBB (liveOut of MBB) as the liveIn for
// NewSuccessor. Otherwise, will cause cyclic dependence.
- LivePhysRegs LPR(MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
+ LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
for (MachineInstr &MI : *MBB)
LPR.stepForward(MI, Clobbers);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e65b1f1aa0a5..b90a5ee28342 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1596,9 +1596,8 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
- unsigned &InsertAtByte, bool &Swap, bool IsLE) {
// Check that the mask is shuffling words
+static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
unsigned B1 = N->getMaskElt(i*4+1);
@@ -1610,6 +1609,14 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
+ return true;
+}
+
+bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ unsigned &InsertAtByte, bool &Swap, bool IsLE) {
+ if (!isWordShuffleMask(N))
+ return false;
+
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
@@ -1680,6 +1687,69 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
+bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+ // Ensure each byte index of the word is consecutive.
+ if (!isWordShuffleMask(N))
+ return false;
+
+ // Now we look at mask elements 0,4,8,12, which are the beginning of words.
+ unsigned M0 = N->getMaskElt(0) / 4;
+ unsigned M1 = N->getMaskElt(4) / 4;
+ unsigned M2 = N->getMaskElt(8) / 4;
+ unsigned M3 = N->getMaskElt(12) / 4;
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ assert(M0 < 4 && "Indexing into an undef vector?");
+ if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
+ return false;
+
+ ShiftElts = IsLE ? (4 - M0) % 4 : M0;
+ Swap = false;
+ return true;
+ }
+
+ // Ensure each word index of the ShuffleVector Mask is consecutive.
+ if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
+ return false;
+
+ if (IsLE) {
+ if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the second vector
+ // (or if there is no shift to be done at all).
+ Swap = false;
+ ShiftElts = (8 - M0) % 8;
+ } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the first vector
+ // (or if we're shifting by 4 - thereby simply swapping the vectors).
+ Swap = true;
+ ShiftElts = (4 - M0) % 4;
+ }
+
+ return true;
+ } else { // BE
+ if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 4 elements of the first vector.
+ Swap = false;
+ ShiftElts = M0;
+ } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 4 elements of the right vector.
+ Swap = true;
+ ShiftElts = M0 - 4;
+ }
+
+ return true;
+ }
+}
+
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -7679,6 +7749,20 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+
+ if (Subtarget.hasVSX() &&
+ PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue Conv2 =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
+
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
@@ -8212,10 +8296,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDLoc DL(Op);
switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
case Intrinsic::ppc_cfence: {
+ assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
- Op.getOperand(ArgStart + 1))),
+ Op.getOperand(ArgStart + 1)),
+ Op.getOperand(0)),
0);
}
default:
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index acb77943b118..2f9eb95f6de6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -450,7 +450,11 @@ namespace llvm {
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
-
+ /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXSLDWI instruction.
+ bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE);
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index a3f894c81a01..165970f9678c 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1001,7 +1001,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be
+// correct because the branch select pass is relying on it.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 46f103141bc1..fd6785e963a6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1931,6 +1931,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case PPC::DFSTOREf64: {
assert(Subtarget.hasP9Vector() &&
"Invalid D-Form Pseudo-ops on non-P9 target.");
+ assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
+ "D-form op must have register and immediate operands");
unsigned UpperOpcode, LowerOpcode;
switch (MI.getOpcode()) {
case PPC::DFLOADf32:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 0766cfe4a987..26b99eced23c 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,7 +46,7 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
]>;
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
- SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index b98140fedfc0..1589ab03e507 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1066,6 +1066,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
+
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
(SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
@@ -2379,8 +2383,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Load Vector Indexed
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xoaddr:$src))]>;
-
+ [(set v2f64:$XT, (load xaddr:$src))]>;
// Load Vector (Left-justified) with Length
def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvl $XT, $src, $rB", IIC_LdStLoad,
@@ -2430,7 +2433,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Store Vector Indexed
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xoaddr:$dst)]>;
+ [(store v2f64:$XT, xaddr:$dst)]>;
// Store Vector (Left-justified) with Length
def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB),
@@ -2498,21 +2501,38 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
} // IsLittleEndian, HasP9Vector
- def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
-
+ // D-Form Load/Store
+ def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>;
+
+ def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst),
+ (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst),
+ (STXV $rS, memrix16:$dst)>;
+
+
+ def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst),
+ (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst),
+ (STXVX $rS, xaddr:$dst)>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
@@ -2704,9 +2724,15 @@ def FltToUIntLoad {
def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
+def FltToLongLoadP9 {
+ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A)))));
+}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
+def FltToULongLoadP9 {
+ dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A)))));
+}
def FltToLong {
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
}
@@ -2728,9 +2754,15 @@ def DblToULong {
def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
+def DblToIntLoadP9 {
+ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A)))));
+}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
+def DblToUIntLoadP9 {
+ dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A)))));
+}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
}
@@ -2898,17 +2930,17 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+ def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+ def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+ def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddr:$A),
VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+ def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddr:$A),
VSFRC)), 0))>;
diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
index 92ce8089c24f..d02db9a617a3 100644
--- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp
+++ b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
@@ -74,7 +74,7 @@ bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB,
unsigned CCValid = MI.getOperand(3).getImm();
unsigned CCMask = MI.getOperand(4).getImm();
- LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+ LivePhysRegs LiveRegs(TII->getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
LiveRegs.stepBackward(*I);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index a30bf34857b5..b34c181124de 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -236,32 +236,30 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction &MF = *MBB->getParent();
- const unsigned Reg = MI->getOperand(0).getReg();
+ const unsigned Reg64 = MI->getOperand(0).getReg();
+ const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32);
- // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD,
- // so they already have operand 0 set to reg.
+ // EAR can only load the low subregister so us a shift for %a0 to produce
+ // the GR containing %a0 and %a1.
// ear <reg>, %a0
- MachineInstr *Ear1MI = MF.CloneMachineInstr(MI);
- MBB->insert(MI, Ear1MI);
- Ear1MI->setDesc(get(SystemZ::EAR));
- MachineInstrBuilder(MF, Ear1MI).addReg(SystemZ::A0);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32)
+ .addReg(SystemZ::A0)
+ .addReg(Reg64, RegState::ImplicitDefine);
// sllg <reg>, <reg>, 32
- MachineInstr *SllgMI = MF.CloneMachineInstr(MI);
- MBB->insert(MI, SllgMI);
- SllgMI->setDesc(get(SystemZ::SLLG));
- MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64)
+ .addReg(Reg64)
+ .addReg(0)
+ .addImm(32);
// ear <reg>, %a1
- MachineInstr *Ear2MI = MF.CloneMachineInstr(MI);
- MBB->insert(MI, Ear2MI);
- Ear2MI->setDesc(get(SystemZ::EAR));
- MachineInstrBuilder(MF, Ear2MI).addReg(SystemZ::A1);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32)
+ .addReg(SystemZ::A1);
// lg <reg>, 40(<reg>)
MI->setDesc(get(SystemZ::LG));
- MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0);
+ MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0);
}
// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 3766ed45b8c4..ad597f5c65f0 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -55,6 +55,7 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
+ bool prefersVectorizedAddressing() { return false; }
bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 32ab475f1186..e5d3209ec6a9 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1316,16 +1316,17 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
while (!Done) {
bool UpdateLocLex = true;
+ AsmToken::TokenKind TK = getLexer().getKind();
// The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
// identifier. Don't try an parse it as a register.
- if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
+ if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") &&
+ TK != AsmToken::Identifier)
break;
// If we're parsing an immediate expression, we don't expect a '['.
if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
break;
- AsmToken::TokenKind TK = getLexer().getKind();
switch (TK) {
default: {
if (SM.isValidEndState()) {
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index fc4adddc149b..7471373334f6 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -11,6 +11,7 @@ tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
+tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables)
if(LLVM_BUILD_GLOBAL_ISEL)
tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 3a421fe77392..fe105298f5c1 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -127,6 +127,9 @@ def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
+def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
+ "true", "Enable AVX-512 Population Count Instructions",
+ [FeatureAVX512]>;
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index a5489b9aa8b7..313920e02c3e 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1655,8 +1655,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
}
void FPS::setKillFlags(MachineBasicBlock &MBB) const {
- const TargetRegisterInfo *TRI =
- MBB.getParent()->getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo &TRI =
+ *MBB.getParent()->getSubtarget().getRegisterInfo();
LivePhysRegs LPR(TRI);
LPR.addLiveOuts(MBB);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 37b248416e4a..86744b064132 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1364,6 +1364,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
+ if (Subtarget.hasVPOPCNTDQ()) {
+ // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
+ // version of popcntd/q.
+ for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
+ MVT::v4i32, MVT::v2i64})
+ setOperationAction(ISD::CTPOP, VT, Legal);
+ }
+
// Custom lower several nodes.
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index f9344413bbcf..d8702693884d 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2693,22 +2693,22 @@ multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
}
multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- PatFrag st_frag, PatFrag mstore> {
+ PatFrag st_frag, PatFrag mstore, string Name> {
let hasSideEffects = 0 in {
def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
- [], _.ExeDomain>, EVEX;
+ [], _.ExeDomain>, EVEX, FoldGenData<Name#rr>;
def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
"${dst} {${mask}}, $src}",
- [], _.ExeDomain>, EVEX, EVEX_K;
+ [], _.ExeDomain>, EVEX, EVEX_K, FoldGenData<Name#rrk>;
def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
"${dst} {${mask}} {z}, $src}",
- [], _.ExeDomain>, EVEX, EVEX_KZ;
+ [], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData<Name#rrkz>;
}
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
@@ -2726,80 +2726,92 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _, Predicate prd> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ string Name> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
- masked_store_unaligned>, EVEX_V512;
+ masked_store_unaligned, Name#Z>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
- masked_store_unaligned>, EVEX_V256;
+ masked_store_unaligned, Name#Z256>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
- masked_store_unaligned>, EVEX_V128;
+ masked_store_unaligned, Name#Z128>, EVEX_V128;
}
}
multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _, Predicate prd> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ string Name> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
- masked_store_aligned512>, EVEX_V512;
+ masked_store_aligned512, Name#Z>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
- masked_store_aligned256>, EVEX_V256;
+ masked_store_aligned256, Name#Z256>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
- masked_store_aligned128>, EVEX_V128;
+ masked_store_aligned128, Name#Z128>, EVEX_V128;
}
}
defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
HasAVX512>,
avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
- HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+ HasAVX512, "VMOVAPS">,
+ PS, EVEX_CD8<32, CD8VF>;
defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
HasAVX512>,
avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
- HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ HasAVX512, "VMOVAPD">,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
null_frag>,
- avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
+ avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
+ "VMOVUPS">,
PS, EVEX_CD8<32, CD8VF>;
defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
null_frag>,
- avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
+ avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
+ "VMOVUPD">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
- HasAVX512>, PD, EVEX_CD8<32, CD8VF>;
+ HasAVX512, "VMOVDQA32">,
+ PD, EVEX_CD8<32, CD8VF>;
defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
- HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ HasAVX512, "VMOVDQA64">,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
- avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
- HasBWI>, XD, EVEX_CD8<8, CD8VF>;
+ avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
+ HasBWI, "VMOVDQU8">,
+ XD, EVEX_CD8<8, CD8VF>;
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
- HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
+ HasBWI, "VMOVDQU16">,
+ XD, VEX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
null_frag>,
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
- HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
+ HasAVX512, "VMOVDQU32">,
+ XS, EVEX_CD8<32, CD8VF>;
defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
null_frag>,
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
- HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
+ HasAVX512, "VMOVDQU64">,
+ XS, VEX_W, EVEX_CD8<64, CD8VF>;
// Special instructions to help with spilling when we don't have VLX. We need
// to load or store from a ZMM register instead. These are converted in
@@ -3354,17 +3366,52 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
(VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
-let hasSideEffects = 0 in
-defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
- (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2),
- "vmovss.s", "$src2, $src1", "$src1, $src2", []>,
- XS, EVEX_4V, VEX_LIG;
-
-let hasSideEffects = 0 in
-defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
- (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2),
- "vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
- XD, EVEX_4V, VEX_LIG, VEX_W;
+let hasSideEffects = 0 in {
+ def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, FR32X:$src2),
+ "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [], NoItinerary>, XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSSZrr">;
+
+let Constraints = "$src0 = $dst" in
+ def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
+ VR128X:$src1, FR32X:$src2),
+ "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
+ "$dst {${mask}}, $src1, $src2}",
+ [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSSZrrk">;
+
+ def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2),
+ "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, $src1, $src2}",
+ [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSSZrrkz">;
+
+ def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, FR64X:$src2),
+ "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W,
+ FoldGenData<"VMOVSDZrr">;
+
+let Constraints = "$src0 = $dst" in
+ def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
+ VR128X:$src1, FR64X:$src2),
+ "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
+ "$dst {${mask}}, $src1, $src2}",
+ [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG,
+ VEX_W, FoldGenData<"VMOVSDZrrk">;
+
+ def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f64x_info.KRCWM:$mask, VR128X:$src1,
+ FR64X:$src2),
+ "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, $src1, $src2}",
+ [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
+ VEX_W, FoldGenData<"VMOVSDZrrkz">;
+}
let Predicates = [HasAVX512] in {
let AddedComplexity = 15 in {
@@ -8649,6 +8696,41 @@ let Predicates = [HasCDI, NoVLX] in {
}
//===---------------------------------------------------------------------===//
+// Counts number of ones - VPOPCNTD and VPOPCNTQ
+//===---------------------------------------------------------------------===//
+
+multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo VTInfo> {
+ let Predicates = [HasVPOPCNTDQ] in
+ defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, VTInfo>, EVEX_V512;
+}
+
+// Use 512bit version to implement 128/256 bit.
+multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in {
+ def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME # "Zrr")
+ (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
+ _.info256.RC:$src1,
+ _.info256.SubRegIdx)),
+ _.info256.SubRegIdx)>;
+
+ def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME # "Zrr")
+ (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
+ _.info128.RC:$src1,
+ _.info128.SubRegIdx)),
+ _.info128.SubRegIdx)>;
+ }
+}
+
+defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>,
+ avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
+defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>,
+ avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
+
+//===---------------------------------------------------------------------===//
// Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
@@ -8795,7 +8877,7 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX, TAPD;
+ EVEX, TAPD, FoldGenData<NAME#rr>;
defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 66382014f6e8..e38bbc9b3d36 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -964,10 +964,10 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
} // isConvertibleToThreeAddress
} // isCommutable
- def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>, FoldGenData<NAME#8rr>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>, FoldGenData<NAME#16rr>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>, FoldGenData<NAME#32rr>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>, FoldGenData<NAME#64rr>;
def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
@@ -1049,10 +1049,10 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
} // isConvertibleToThreeAddress
} // isCommutable
- def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>, FoldGenData<NAME#8rr>;
+ def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>, FoldGenData<NAME#16rr>;
+ def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>, FoldGenData<NAME#32rr>;
+ def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>, FoldGenData<NAME#64rr>;
def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
@@ -1129,10 +1129,10 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // isCommutable
- def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>, FoldGenData<NAME#8rr>;
+ def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, FoldGenData<NAME#16rr>;
+ def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, FoldGenData<NAME#32rr>;
+ def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>, FoldGenData<NAME#64rr>;
def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 1941ae57f0f1..3a3cdc9fa574 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -297,7 +297,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
- VEX_LIG;
+ VEX_LIG, FoldGenData<NAME#rr>;
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
@@ -321,6 +321,12 @@ let isCodeGenOnly = 1 in {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
+let hasSideEffects = 0 in
+ def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_LIG, FoldGenData<NAME#rr_Int>;
} // isCodeGenOnly = 1
}
@@ -372,12 +378,13 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+ FoldGenData<NAME#rr>;
def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
- VEX_L;
+ VEX_L, FoldGenData<NAME#Yrr>;
} // isCodeGenOnly = 1
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index c2fe786732dc..bfcbf71d252f 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -225,6 +225,12 @@ class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
class XOP { Encoding OpEnc = EncXOP; }
class XOP_4V : XOP { bit hasVEX_4V = 1; }
+// Specify the alternative register form instruction to replace the current
+// instruction in case it was picked during generation of memory folding tables
+class FoldGenData<string _RegisterForm> {
+ string FoldGenRegForm = _RegisterForm;
+}
+
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr,
InstrItinClass itin,
@@ -304,6 +310,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
CD8_EltSize,
!srl(VectSize, CD8_Form{1-0}))), 0);
+ // Used in the memory folding generation (TableGen backend) to point to an alternative
+ // instruction to replace the current one in case it got picked during generation.
+ string FoldGenRegForm = ?;
+
// TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index f7083a7448ce..33fbd41bb631 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -121,172 +121,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
(STI.is64Bit() ? X86::RETQ : X86::RETL)),
Subtarget(STI), RI(STI.getTargetTriple()) {
- static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
- { X86::ADC32ri, X86::ADC32mi, 0 },
- { X86::ADC32ri8, X86::ADC32mi8, 0 },
- { X86::ADC32rr, X86::ADC32mr, 0 },
- { X86::ADC64ri32, X86::ADC64mi32, 0 },
- { X86::ADC64ri8, X86::ADC64mi8, 0 },
- { X86::ADC64rr, X86::ADC64mr, 0 },
- { X86::ADD16ri, X86::ADD16mi, 0 },
- { X86::ADD16ri8, X86::ADD16mi8, 0 },
- { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
- { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
- { X86::ADD16rr, X86::ADD16mr, 0 },
- { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
- { X86::ADD32ri, X86::ADD32mi, 0 },
- { X86::ADD32ri8, X86::ADD32mi8, 0 },
- { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
- { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
- { X86::ADD32rr, X86::ADD32mr, 0 },
- { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
- { X86::ADD64ri32, X86::ADD64mi32, 0 },
- { X86::ADD64ri8, X86::ADD64mi8, 0 },
- { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
- { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
- { X86::ADD64rr, X86::ADD64mr, 0 },
- { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
- { X86::ADD8ri, X86::ADD8mi, 0 },
- { X86::ADD8rr, X86::ADD8mr, 0 },
- { X86::AND16ri, X86::AND16mi, 0 },
- { X86::AND16ri8, X86::AND16mi8, 0 },
- { X86::AND16rr, X86::AND16mr, 0 },
- { X86::AND32ri, X86::AND32mi, 0 },
- { X86::AND32ri8, X86::AND32mi8, 0 },
- { X86::AND32rr, X86::AND32mr, 0 },
- { X86::AND64ri32, X86::AND64mi32, 0 },
- { X86::AND64ri8, X86::AND64mi8, 0 },
- { X86::AND64rr, X86::AND64mr, 0 },
- { X86::AND8ri, X86::AND8mi, 0 },
- { X86::AND8rr, X86::AND8mr, 0 },
- { X86::DEC16r, X86::DEC16m, 0 },
- { X86::DEC32r, X86::DEC32m, 0 },
- { X86::DEC64r, X86::DEC64m, 0 },
- { X86::DEC8r, X86::DEC8m, 0 },
- { X86::INC16r, X86::INC16m, 0 },
- { X86::INC32r, X86::INC32m, 0 },
- { X86::INC64r, X86::INC64m, 0 },
- { X86::INC8r, X86::INC8m, 0 },
- { X86::NEG16r, X86::NEG16m, 0 },
- { X86::NEG32r, X86::NEG32m, 0 },
- { X86::NEG64r, X86::NEG64m, 0 },
- { X86::NEG8r, X86::NEG8m, 0 },
- { X86::NOT16r, X86::NOT16m, 0 },
- { X86::NOT32r, X86::NOT32m, 0 },
- { X86::NOT64r, X86::NOT64m, 0 },
- { X86::NOT8r, X86::NOT8m, 0 },
- { X86::OR16ri, X86::OR16mi, 0 },
- { X86::OR16ri8, X86::OR16mi8, 0 },
- { X86::OR16rr, X86::OR16mr, 0 },
- { X86::OR32ri, X86::OR32mi, 0 },
- { X86::OR32ri8, X86::OR32mi8, 0 },
- { X86::OR32rr, X86::OR32mr, 0 },
- { X86::OR64ri32, X86::OR64mi32, 0 },
- { X86::OR64ri8, X86::OR64mi8, 0 },
- { X86::OR64rr, X86::OR64mr, 0 },
- { X86::OR8ri, X86::OR8mi, 0 },
- { X86::OR8rr, X86::OR8mr, 0 },
- { X86::ROL16r1, X86::ROL16m1, 0 },
- { X86::ROL16rCL, X86::ROL16mCL, 0 },
- { X86::ROL16ri, X86::ROL16mi, 0 },
- { X86::ROL32r1, X86::ROL32m1, 0 },
- { X86::ROL32rCL, X86::ROL32mCL, 0 },
- { X86::ROL32ri, X86::ROL32mi, 0 },
- { X86::ROL64r1, X86::ROL64m1, 0 },
- { X86::ROL64rCL, X86::ROL64mCL, 0 },
- { X86::ROL64ri, X86::ROL64mi, 0 },
- { X86::ROL8r1, X86::ROL8m1, 0 },
- { X86::ROL8rCL, X86::ROL8mCL, 0 },
- { X86::ROL8ri, X86::ROL8mi, 0 },
- { X86::ROR16r1, X86::ROR16m1, 0 },
- { X86::ROR16rCL, X86::ROR16mCL, 0 },
- { X86::ROR16ri, X86::ROR16mi, 0 },
- { X86::ROR32r1, X86::ROR32m1, 0 },
- { X86::ROR32rCL, X86::ROR32mCL, 0 },
- { X86::ROR32ri, X86::ROR32mi, 0 },
- { X86::ROR64r1, X86::ROR64m1, 0 },
- { X86::ROR64rCL, X86::ROR64mCL, 0 },
- { X86::ROR64ri, X86::ROR64mi, 0 },
- { X86::ROR8r1, X86::ROR8m1, 0 },
- { X86::ROR8rCL, X86::ROR8mCL, 0 },
- { X86::ROR8ri, X86::ROR8mi, 0 },
- { X86::SAR16r1, X86::SAR16m1, 0 },
- { X86::SAR16rCL, X86::SAR16mCL, 0 },
- { X86::SAR16ri, X86::SAR16mi, 0 },
- { X86::SAR32r1, X86::SAR32m1, 0 },
- { X86::SAR32rCL, X86::SAR32mCL, 0 },
- { X86::SAR32ri, X86::SAR32mi, 0 },
- { X86::SAR64r1, X86::SAR64m1, 0 },
- { X86::SAR64rCL, X86::SAR64mCL, 0 },
- { X86::SAR64ri, X86::SAR64mi, 0 },
- { X86::SAR8r1, X86::SAR8m1, 0 },
- { X86::SAR8rCL, X86::SAR8mCL, 0 },
- { X86::SAR8ri, X86::SAR8mi, 0 },
- { X86::SBB32ri, X86::SBB32mi, 0 },
- { X86::SBB32ri8, X86::SBB32mi8, 0 },
- { X86::SBB32rr, X86::SBB32mr, 0 },
- { X86::SBB64ri32, X86::SBB64mi32, 0 },
- { X86::SBB64ri8, X86::SBB64mi8, 0 },
- { X86::SBB64rr, X86::SBB64mr, 0 },
- { X86::SHL16r1, X86::SHL16m1, 0 },
- { X86::SHL16rCL, X86::SHL16mCL, 0 },
- { X86::SHL16ri, X86::SHL16mi, 0 },
- { X86::SHL32r1, X86::SHL32m1, 0 },
- { X86::SHL32rCL, X86::SHL32mCL, 0 },
- { X86::SHL32ri, X86::SHL32mi, 0 },
- { X86::SHL64r1, X86::SHL64m1, 0 },
- { X86::SHL64rCL, X86::SHL64mCL, 0 },
- { X86::SHL64ri, X86::SHL64mi, 0 },
- { X86::SHL8r1, X86::SHL8m1, 0 },
- { X86::SHL8rCL, X86::SHL8mCL, 0 },
- { X86::SHL8ri, X86::SHL8mi, 0 },
- { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
- { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
- { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
- { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
- { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
- { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
- { X86::SHR16r1, X86::SHR16m1, 0 },
- { X86::SHR16rCL, X86::SHR16mCL, 0 },
- { X86::SHR16ri, X86::SHR16mi, 0 },
- { X86::SHR32r1, X86::SHR32m1, 0 },
- { X86::SHR32rCL, X86::SHR32mCL, 0 },
- { X86::SHR32ri, X86::SHR32mi, 0 },
- { X86::SHR64r1, X86::SHR64m1, 0 },
- { X86::SHR64rCL, X86::SHR64mCL, 0 },
- { X86::SHR64ri, X86::SHR64mi, 0 },
- { X86::SHR8r1, X86::SHR8m1, 0 },
- { X86::SHR8rCL, X86::SHR8mCL, 0 },
- { X86::SHR8ri, X86::SHR8mi, 0 },
- { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
- { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
- { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
- { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
- { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
- { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
- { X86::SUB16ri, X86::SUB16mi, 0 },
- { X86::SUB16ri8, X86::SUB16mi8, 0 },
- { X86::SUB16rr, X86::SUB16mr, 0 },
- { X86::SUB32ri, X86::SUB32mi, 0 },
- { X86::SUB32ri8, X86::SUB32mi8, 0 },
- { X86::SUB32rr, X86::SUB32mr, 0 },
- { X86::SUB64ri32, X86::SUB64mi32, 0 },
- { X86::SUB64ri8, X86::SUB64mi8, 0 },
- { X86::SUB64rr, X86::SUB64mr, 0 },
- { X86::SUB8ri, X86::SUB8mi, 0 },
- { X86::SUB8rr, X86::SUB8mr, 0 },
- { X86::XOR16ri, X86::XOR16mi, 0 },
- { X86::XOR16ri8, X86::XOR16mi8, 0 },
- { X86::XOR16rr, X86::XOR16mr, 0 },
- { X86::XOR32ri, X86::XOR32mi, 0 },
- { X86::XOR32ri8, X86::XOR32mi8, 0 },
- { X86::XOR32rr, X86::XOR32mr, 0 },
- { X86::XOR64ri32, X86::XOR64mi32, 0 },
- { X86::XOR64ri8, X86::XOR64mi8, 0 },
- { X86::XOR64rr, X86::XOR64mr, 0 },
- { X86::XOR8ri, X86::XOR8mi, 0 },
- { X86::XOR8rr, X86::XOR8mr, 0 }
- };
+// Generated memory folding tables.
+#include "X86GenFoldTables.inc"
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
@@ -295,744 +131,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
- static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
- { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
- { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
- { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
- { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
- { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
- { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
- { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
- { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
- { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
- { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
- { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
- { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
- { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
- { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
- { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
- { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
- { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
- { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
- { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
- { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
- { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
- { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
- { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
- { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
- { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
- { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
- { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
- { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
- { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
- { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
- { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
- { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
- { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
- { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
- { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
- { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
- { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
- { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
- { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
- { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE },
- { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
- { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
- { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
- { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
- { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
- { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
- { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
- { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
- { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
- { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
- { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
- { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
- { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
- { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
- { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
- { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
- { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
- { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
- { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
- { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
- { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
- { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
- { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
- { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
- { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
- { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
- { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
- { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
- { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
- { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
- { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
- { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
- { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
- { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
- { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
- { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
- { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
- { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
-
- // AVX 128-bit versions of foldable instructions
- { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE },
- { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
- { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
- { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
- { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
- { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
- { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
- { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE },
- { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE },
-
- // AVX 256-bit foldable instructions
- { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE },
- { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
-
- // AVX-512 foldable instructions
- { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
- { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
- { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
- { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE },
- { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE },
- { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE },
- { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
- { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
- { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE },
- { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE },
- { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
- { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
- { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
- { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE },
- { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE },
- { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE },
- { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE },
- { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE },
- { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE },
- { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE },
-
- // AVX-512 foldable instructions (256-bit versions)
- { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE },
- { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
- { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
- { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE },
-
- // AVX-512 foldable instructions (128-bit versions)
- { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
- { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE },
-
- // F16C foldable instructions
- { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE },
- { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
- };
-
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
}
- static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
- { X86::BSF16rr, X86::BSF16rm, 0 },
- { X86::BSF32rr, X86::BSF32rm, 0 },
- { X86::BSF64rr, X86::BSF64rm, 0 },
- { X86::BSR16rr, X86::BSR16rm, 0 },
- { X86::BSR32rr, X86::BSR32rm, 0 },
- { X86::BSR64rr, X86::BSR64rm, 0 },
- { X86::CMP16rr, X86::CMP16rm, 0 },
- { X86::CMP32rr, X86::CMP32rm, 0 },
- { X86::CMP64rr, X86::CMP64rm, 0 },
- { X86::CMP8rr, X86::CMP8rm, 0 },
- { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
- { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
- { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
- { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
- { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
- { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
- { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
- { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
- { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
- { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::IMUL16rri, X86::IMUL16rmi, 0 },
- { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
- { X86::IMUL32rri, X86::IMUL32rmi, 0 },
- { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
- { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
- { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
- { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE },
- { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE },
- { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE },
- { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE },
- { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE },
- { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE },
- { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE },
- { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
- { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
- { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
- { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE },
- { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
- { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE },
- { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE },
- { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE },
- { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE },
- { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE },
- { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE },
- { X86::MOV16rr, X86::MOV16rm, 0 },
- { X86::MOV32rr, X86::MOV32rm, 0 },
- { X86::MOV64rr, X86::MOV64rm, 0 },
- { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
- { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
- { X86::MOV8rr, X86::MOV8rm, 0 },
- { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
- { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
- { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
- { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
- { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
- { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
- { X86::MOVDQUrr, X86::MOVDQUrm, 0 },
- { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
- { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
- { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
- { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
- { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
- { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
- { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
- { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
- { X86::MOVUPDrr, X86::MOVUPDrm, 0 },
- { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE },
- { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
- { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
- { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
- { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
- { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
- { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
- { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
- { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
- { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
- { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
- { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
- { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 },
- { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
- { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },
- { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE },
- { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE },
- { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE },
- { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE },
- { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE },
- { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE },
- { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE },
- { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE },
- { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE },
- { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE },
- { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
- { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
- { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
- { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 },
- { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
- { X86::RCPSSr, X86::RCPSSm, 0 },
- { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE },
- { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 },
- { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 },
- { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
- { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
- { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
- { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
- { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE },
- { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
- { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
- { X86::SQRTSDr, X86::SQRTSDm, 0 },
- { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE },
- { X86::SQRTSSr, X86::SQRTSSm, 0 },
- { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE },
- { X86::TEST16rr, X86::TEST16rm, 0 },
- { X86::TEST32rr, X86::TEST32rm, 0 },
- { X86::TEST64rr, X86::TEST64rm, 0 },
- { X86::TEST8rr, X86::TEST8rm, 0 },
- // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
- { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
- { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
-
- // MMX version of foldable instructions
- { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 },
- { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
- { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 },
- { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 },
- { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 },
- { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
- { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 },
- { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 },
- { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
- { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
-
- // 3DNow! version of foldable instructions
- { X86::PF2IDrr, X86::PF2IDrm, 0 },
- { X86::PF2IWrr, X86::PF2IWrm, 0 },
- { X86::PFRCPrr, X86::PFRCPrm, 0 },
- { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 },
- { X86::PI2FDrr, X86::PI2FDrm, 0 },
- { X86::PI2FWrr, X86::PI2FWrm, 0 },
- { X86::PSWAPDrr, X86::PSWAPDrm, 0 },
-
- // AVX 128-bit versions of foldable instructions
- { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, TB_NO_REVERSE },
- { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, TB_NO_REVERSE },
- { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE },
- { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE },
- { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE },
- { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE },
- { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE },
- { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
- { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 },
- { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 },
- { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
- { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
- { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
- { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
- { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
- { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
- { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
- { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
- { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
- { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
- { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 },
- { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 },
- { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
- { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
- { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm, TB_NO_REVERSE },
- { X86::VPABSBrr, X86::VPABSBrm, 0 },
- { X86::VPABSDrr, X86::VPABSDrm, 0 },
- { X86::VPABSWrr, X86::VPABSWrm, 0 },
- { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 },
- { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 },
- { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 },
- { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 },
- { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 },
- { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
- { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
- { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE },
- { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE },
- { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE },
- { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE },
- { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE },
- { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE },
- { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE },
- { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE },
- { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE },
- { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE },
- { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE },
- { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE },
- { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
- { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
- { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
- { X86::VPTESTrr, X86::VPTESTrm, 0 },
- { X86::VRCPPSr, X86::VRCPPSm, 0 },
- { X86::VROUNDPDr, X86::VROUNDPDm, 0 },
- { X86::VROUNDPSr, X86::VROUNDPSm, 0 },
- { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
- { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
- { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
- { X86::VTESTPDrr, X86::VTESTPDrm, 0 },
- { X86::VTESTPSrr, X86::VTESTPSrm, 0 },
- { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
- { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
-
- // AVX 256-bit foldable instructions
- { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE },
- { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
- { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
- { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
- { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
- { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE },
- { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
- { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
- { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
- { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
- { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 },
- { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 },
- { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 },
- { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
- { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
- { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
- { X86::VPTESTYrr, X86::VPTESTYrm, 0 },
- { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
- { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 },
- { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
- { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 },
- { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 },
-
- // AVX2 foldable instructions
-
- // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
- // VBROADCASTS{SD}rm memory instructions were available from AVX1.
- // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
- // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
- // so they don't need an equivalent limitation.
- { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- { X86::VPABSBYrr, X86::VPABSBYrm, 0 },
- { X86::VPABSDYrr, X86::VPABSDYrm, 0 },
- { X86::VPABSWYrr, X86::VPABSWYrm, 0 },
- { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE },
- { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE },
- { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE },
- { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE },
- { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE },
- { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
- { X86::VPERMQYri, X86::VPERMQYmi, 0 },
- { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE },
- { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE },
- { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 },
- { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 },
- { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 },
- { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE },
- { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE },
- { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE },
- { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 },
- { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 },
- { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 },
- { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE },
- { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
- { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
- { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
-
- // XOP foldable instructions
- { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
- { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 },
- { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 },
- { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 },
- { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 },
- { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 },
- { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 },
- { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 },
- { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 },
- { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 },
- { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 },
- { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 },
- { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 },
- { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 },
- { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 },
- { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 },
- { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 },
- { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 },
- { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 },
- { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 },
- { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 },
- { X86::VPROTBri, X86::VPROTBmi, 0 },
- { X86::VPROTBrr, X86::VPROTBmr, 0 },
- { X86::VPROTDri, X86::VPROTDmi, 0 },
- { X86::VPROTDrr, X86::VPROTDmr, 0 },
- { X86::VPROTQri, X86::VPROTQmi, 0 },
- { X86::VPROTQrr, X86::VPROTQmr, 0 },
- { X86::VPROTWri, X86::VPROTWmi, 0 },
- { X86::VPROTWrr, X86::VPROTWmr, 0 },
- { X86::VPSHABrr, X86::VPSHABmr, 0 },
- { X86::VPSHADrr, X86::VPSHADmr, 0 },
- { X86::VPSHAQrr, X86::VPSHAQmr, 0 },
- { X86::VPSHAWrr, X86::VPSHAWmr, 0 },
- { X86::VPSHLBrr, X86::VPSHLBmr, 0 },
- { X86::VPSHLDrr, X86::VPSHLDmr, 0 },
- { X86::VPSHLQrr, X86::VPSHLQmr, 0 },
- { X86::VPSHLWrr, X86::VPSHLWmr, 0 },
-
- // LWP foldable instructions
- { X86::LWPINS32rri, X86::LWPINS32rmi, 0 },
- { X86::LWPINS64rri, X86::LWPINS64rmi, 0 },
- { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 },
- { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 },
-
- // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
- { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
- { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
- { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
- { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
- { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
- { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
- { X86::BLCI32rr, X86::BLCI32rm, 0 },
- { X86::BLCI64rr, X86::BLCI64rm, 0 },
- { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
- { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
- { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
- { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
- { X86::BLCS32rr, X86::BLCS32rm, 0 },
- { X86::BLCS64rr, X86::BLCS64rm, 0 },
- { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
- { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
- { X86::BLSI32rr, X86::BLSI32rm, 0 },
- { X86::BLSI64rr, X86::BLSI64rm, 0 },
- { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
- { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
- { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
- { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
- { X86::BLSR32rr, X86::BLSR32rm, 0 },
- { X86::BLSR64rr, X86::BLSR64rm, 0 },
- { X86::BZHI32rr, X86::BZHI32rm, 0 },
- { X86::BZHI64rr, X86::BZHI64rm, 0 },
- { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
- { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
- { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
- { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
- { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
- { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
- { X86::RORX32ri, X86::RORX32mi, 0 },
- { X86::RORX64ri, X86::RORX64mi, 0 },
- { X86::SARX32rr, X86::SARX32rm, 0 },
- { X86::SARX64rr, X86::SARX64rm, 0 },
- { X86::SHRX32rr, X86::SHRX32rm, 0 },
- { X86::SHRX64rr, X86::SHRX64rm, 0 },
- { X86::SHLX32rr, X86::SHLX32rm, 0 },
- { X86::SHLX64rr, X86::SHLX64rm, 0 },
- { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
- { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
- { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
- { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
- { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
- { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
- { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
-
- // AVX-512 foldable instructions
- { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
- { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
- { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
- { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
- { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
- { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
- { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
- { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
- { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
- { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
- { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
- { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
- { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
- { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
- { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
- { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
- { X86::VPABSBZrr, X86::VPABSBZrm, 0 },
- { X86::VPABSDZrr, X86::VPABSDZrm, 0 },
- { X86::VPABSQZrr, X86::VPABSQZrm, 0 },
- { X86::VPABSWZrr, X86::VPABSWZrm, 0 },
- { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
- { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
- { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
- { X86::VPERMQZri, X86::VPERMQZmi, 0 },
- { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
- { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE },
- { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 },
- { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 },
- { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 },
- { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 },
- { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 },
- { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE },
- { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 },
- { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 },
- { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 },
- { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 },
- { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
- { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
- { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
- { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 },
- { X86::VPSLLDZri, X86::VPSLLDZmi, 0 },
- { X86::VPSLLQZri, X86::VPSLLQZmi, 0 },
- { X86::VPSLLWZri, X86::VPSLLWZmi, 0 },
- { X86::VPSRADZri, X86::VPSRADZmi, 0 },
- { X86::VPSRAQZri, X86::VPSRAQZmi, 0 },
- { X86::VPSRAWZri, X86::VPSRAWZmi, 0 },
- { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 },
- { X86::VPSRLDZri, X86::VPSRLDZmi, 0 },
- { X86::VPSRLQZri, X86::VPSRLQZmi, 0 },
- { X86::VPSRLWZri, X86::VPSRLWZmi, 0 },
-
- // AVX-512 foldable instructions (256-bit versions)
- { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
- { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
- { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
- { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
- { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
- { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
- { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
- { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
- { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
- { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
- { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
- { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 },
- { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
- { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
- { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
- { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
- { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
- { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
- { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 },
- { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 },
- { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 },
- { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 },
- { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 },
- { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 },
- { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 },
- { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE },
- { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
- { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
- { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
- { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 },
- { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 },
- { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 },
- { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 },
- { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 },
- { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 },
- { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 },
- { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 },
- { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 },
- { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 },
- { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 },
-
- // AVX-512 foldable instructions (128-bit versions)
- { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
- { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
- { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
- { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
- { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
- { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
- { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
- { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
- { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
- { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
- { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
- { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 },
- { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
- { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
- { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
- { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
- { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
- { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE },
- { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
- { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
- { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
- { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 },
- { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 },
- { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 },
- { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 },
- { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 },
- { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 },
- { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 },
- { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 },
- { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 },
- { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 },
- { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 },
-
- // F16C foldable instructions
- { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
- { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 },
-
- // AES foldable instructions
- { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
- { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
- { X86::VAESIMCrr, X86::VAESIMCrm, 0 },
- { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
- };
-
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp,
@@ -1040,1394 +143,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
- static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
- { X86::ADC32rr, X86::ADC32rm, 0 },
- { X86::ADC64rr, X86::ADC64rm, 0 },
- { X86::ADD16rr, X86::ADD16rm, 0 },
- { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
- { X86::ADD32rr, X86::ADD32rm, 0 },
- { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
- { X86::ADD64rr, X86::ADD64rm, 0 },
- { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
- { X86::ADD8rr, X86::ADD8rm, 0 },
- { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
- { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
- { X86::ADDSDrr, X86::ADDSDrm, 0 },
- { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE },
- { X86::ADDSSrr, X86::ADDSSrm, 0 },
- { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE },
- { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
- { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
- { X86::AND16rr, X86::AND16rm, 0 },
- { X86::AND32rr, X86::AND32rm, 0 },
- { X86::AND64rr, X86::AND64rm, 0 },
- { X86::AND8rr, X86::AND8rm, 0 },
- { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
- { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
- { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
- { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
- { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
- { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
- { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
- { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
- { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
- { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
- { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
- { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
- { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
- { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
- { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
- { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
- { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
- { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
- { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
- { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
- { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
- { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
- { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
- { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
- { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
- { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
- { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
- { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
- { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
- { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
- { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
- { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
- { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
- { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
- { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
- { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
- { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
- { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
- { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
- { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
- { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
- { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
- { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
- { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
- { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
- { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
- { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
- { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
- { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
- { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
- { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
- { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
- { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
- { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
- { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
- { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
- { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
- { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
- { X86::CMPSDrr, X86::CMPSDrm, 0 },
- { X86::CMPSSrr, X86::CMPSSrm, 0 },
- { X86::CRC32r32r32, X86::CRC32r32m32, 0 },
- { X86::CRC32r64r64, X86::CRC32r64m64, 0 },
- { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
- { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
- { X86::DIVSDrr, X86::DIVSDrm, 0 },
- { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE },
- { X86::DIVSSrr, X86::DIVSSrm, 0 },
- { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE },
- { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
- { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
- { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
- { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
- { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
- { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
- { X86::IMUL16rr, X86::IMUL16rm, 0 },
- { X86::IMUL32rr, X86::IMUL32rm, 0 },
- { X86::IMUL64rr, X86::IMUL64rm, 0 },
- { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE },
- { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE },
- { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
- { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 },
- { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
- { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 },
- { X86::MAXSDrr, X86::MAXSDrm, 0 },
- { X86::MAXCSDrr, X86::MAXCSDrm, 0 },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE },
- { X86::MAXSSrr, X86::MAXSSrm, 0 },
- { X86::MAXCSSrr, X86::MAXCSSrm, 0 },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE },
- { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
- { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 },
- { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
- { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 },
- { X86::MINSDrr, X86::MINSDrm, 0 },
- { X86::MINCSDrr, X86::MINCSDrm, 0 },
- { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE },
- { X86::MINSSrr, X86::MINSSrm, 0 },
- { X86::MINCSSrr, X86::MINCSSrm, 0 },
- { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE },
- { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
- { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
- { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
- { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
- { X86::MULSDrr, X86::MULSDrm, 0 },
- { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE },
- { X86::MULSSrr, X86::MULSSrm, 0 },
- { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE },
- { X86::OR16rr, X86::OR16rm, 0 },
- { X86::OR32rr, X86::OR32rm, 0 },
- { X86::OR64rr, X86::OR64rm, 0 },
- { X86::OR8rr, X86::OR8rm, 0 },
- { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
- { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
- { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
- { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
- { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
- { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
- { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
- { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
- { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
- { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
- { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
- { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
- { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
- { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
- { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 },
- { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
- { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
- { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
- { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
- { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 },
- { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
- { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 },
- { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
- { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
- { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
- { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
- { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
- { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
- { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
- { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
- { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
- { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
- { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
- { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
- { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
- { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
- { X86::PINSRBrr, X86::PINSRBrm, 0 },
- { X86::PINSRDrr, X86::PINSRDrm, 0 },
- { X86::PINSRQrr, X86::PINSRQrm, 0 },
- { X86::PINSRWrri, X86::PINSRWrmi, 0 },
- { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
- { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
- { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
- { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
- { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
- { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
- { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
- { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
- { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
- { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
- { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
- { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
- { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
- { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
- { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
- { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 },
- { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
- { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
- { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
- { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
- { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
- { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
- { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
- { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
- { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 },
- { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 },
- { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 },
- { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
- { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
- { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
- { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
- { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
- { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
- { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
- { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
- { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
- { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
- { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 },
- { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
- { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
- { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 },
- { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 },
- { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
- { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
- { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
- { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
- { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
- { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
- { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
- { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
- { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
- { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
- { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE },
- { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE },
- { X86::SBB32rr, X86::SBB32rm, 0 },
- { X86::SBB64rr, X86::SBB64rm, 0 },
- { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
- { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
- { X86::SUB16rr, X86::SUB16rm, 0 },
- { X86::SUB32rr, X86::SUB32rm, 0 },
- { X86::SUB64rr, X86::SUB64rm, 0 },
- { X86::SUB8rr, X86::SUB8rm, 0 },
- { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
- { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
- { X86::SUBSDrr, X86::SUBSDrm, 0 },
- { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
- { X86::SUBSSrr, X86::SUBSSrm, 0 },
- { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
- // FIXME: TEST*rr -> swapped operand of TEST*mr.
- { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
- { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
- { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
- { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
- { X86::XOR16rr, X86::XOR16rm, 0 },
- { X86::XOR32rr, X86::XOR32rm, 0 },
- { X86::XOR64rr, X86::XOR64rm, 0 },
- { X86::XOR8rr, X86::XOR8rm, 0 },
- { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
- { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
-
- // MMX version of foldable instructions
- { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
- { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
- { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
- { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
- { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
- { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
- { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
- { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
- { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
- { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
- { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
- { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
- { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 },
- { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
- { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
- { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
- { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
- { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
- { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
- { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
- { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
- { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
- { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
- { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 },
- { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 },
- { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 },
- { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 },
- { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 },
- { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 },
- { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 },
- { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
- { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
- { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
- { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
- { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
- { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
- { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 },
- { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
- { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
- { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
- { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
- { X86::MMX_PORirr, X86::MMX_PORirm, 0 },
- { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
- { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 },
- { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 },
- { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 },
- { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 },
- { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
- { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
- { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
- { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
- { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
- { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
- { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
- { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
- { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
- { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
- { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
- { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
- { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
- { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
- { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
- { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
- { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
- { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
- { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
- { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 },
- { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 },
- { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
- { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
-
- // 3DNow! version of foldable instructions
- { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 },
- { X86::PFACCrr, X86::PFACCrm, 0 },
- { X86::PFADDrr, X86::PFADDrm, 0 },
- { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 },
- { X86::PFCMPGErr, X86::PFCMPGErm, 0 },
- { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 },
- { X86::PFMAXrr, X86::PFMAXrm, 0 },
- { X86::PFMINrr, X86::PFMINrm, 0 },
- { X86::PFMULrr, X86::PFMULrm, 0 },
- { X86::PFNACCrr, X86::PFNACCrm, 0 },
- { X86::PFPNACCrr, X86::PFPNACCrm, 0 },
- { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 },
- { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 },
- { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 },
- { X86::PFSUBrr, X86::PFSUBrm, 0 },
- { X86::PFSUBRrr, X86::PFSUBRrm, 0 },
- { X86::PMULHRWrr, X86::PMULHRWrm, 0 },
-
- // AVX 128-bit versions of foldable instructions
- { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
- { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
- { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
- { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
- { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
- { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
- { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
- { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
- { X86::VADDPDrr, X86::VADDPDrm, 0 },
- { X86::VADDPSrr, X86::VADDPSrm, 0 },
- { X86::VADDSDrr, X86::VADDSDrm, 0 },
- { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE },
- { X86::VADDSSrr, X86::VADDSSrm, 0 },
- { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE },
- { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
- { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
- { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
- { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
- { X86::VANDPDrr, X86::VANDPDrm, 0 },
- { X86::VANDPSrr, X86::VANDPSrm, 0 },
- { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
- { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
- { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
- { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
- { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
- { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
- { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
- { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
- { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
- { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
- { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
- { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE },
- { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
- { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE },
- { X86::VDPPDrri, X86::VDPPDrmi, 0 },
- { X86::VDPPSrri, X86::VDPPSrmi, 0 },
- { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
- { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
- { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
- { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
- { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE },
- { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE },
- { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 },
- { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 },
- { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 },
- { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 },
- { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
- { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
- { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
- { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE },
- { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
- { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE },
- { X86::VMINCPDrr, X86::VMINCPDrm, 0 },
- { X86::VMINCPSrr, X86::VMINCPSrm, 0 },
- { X86::VMINCSDrr, X86::VMINCSDrm, 0 },
- { X86::VMINCSSrr, X86::VMINCSSrm, 0 },
- { X86::VMINPDrr, X86::VMINPDrm, 0 },
- { X86::VMINPSrr, X86::VMINPSrm, 0 },
- { X86::VMINSDrr, X86::VMINSDrm, 0 },
- { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE },
- { X86::VMINSSrr, X86::VMINSSrm, 0 },
- { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE },
- { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
- { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
- { X86::VMULPDrr, X86::VMULPDrm, 0 },
- { X86::VMULPSrr, X86::VMULPSrm, 0 },
- { X86::VMULSDrr, X86::VMULSDrm, 0 },
- { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE },
- { X86::VMULSSrr, X86::VMULSSrm, 0 },
- { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE },
- { X86::VORPDrr, X86::VORPDrm, 0 },
- { X86::VORPSrr, X86::VORPSrm, 0 },
- { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
- { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
- { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
- { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
- { X86::VPADDBrr, X86::VPADDBrm, 0 },
- { X86::VPADDDrr, X86::VPADDDrm, 0 },
- { X86::VPADDQrr, X86::VPADDQrm, 0 },
- { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
- { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
- { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
- { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
- { X86::VPADDWrr, X86::VPADDWrm, 0 },
- { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 },
- { X86::VPANDNrr, X86::VPANDNrm, 0 },
- { X86::VPANDrr, X86::VPANDrm, 0 },
- { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
- { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
- { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 },
- { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
- { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 },
- { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
- { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
- { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
- { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
- { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
- { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
- { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
- { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
- { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
- { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
- { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
- { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
- { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
- { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
- { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
- { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
- { X86::VPINSRBrr, X86::VPINSRBrm, 0 },
- { X86::VPINSRDrr, X86::VPINSRDrm, 0 },
- { X86::VPINSRQrr, X86::VPINSRQrm, 0 },
- { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
- { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
- { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
- { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
- { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
- { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
- { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
- { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
- { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
- { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
- { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
- { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
- { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
- { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
- { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
- { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
- { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 },
- { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
- { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
- { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
- { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
- { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
- { X86::VPORrr, X86::VPORrm, 0 },
- { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
- { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
- { X86::VPSIGNBrr128, X86::VPSIGNBrm128, 0 },
- { X86::VPSIGNWrr128, X86::VPSIGNWrm128, 0 },
- { X86::VPSIGNDrr128, X86::VPSIGNDrm128, 0 },
- { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
- { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
- { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
- { X86::VPSRADrr, X86::VPSRADrm, 0 },
- { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
- { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
- { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
- { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
- { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
- { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
- { X86::VPSUBQrr, X86::VPSUBQrm, 0 },
- { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
- { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
- { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 },
- { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 },
- { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
- { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
- { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
- { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
- { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
- { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
- { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
- { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
- { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
- { X86::VPXORrr, X86::VPXORrm, 0 },
- { X86::VRCPSSr, X86::VRCPSSm, 0 },
- { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE },
- { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
- { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE },
- { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
- { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE },
- { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
- { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE },
- { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
- { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
- { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
- { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE },
- { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
- { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE },
- { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
- { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
- { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
- { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE },
- { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
- { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE },
- { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
- { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
- { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
- { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
- { X86::VXORPDrr, X86::VXORPDrm, 0 },
- { X86::VXORPSrr, X86::VXORPSrm, 0 },
-
- // AVX 256-bit foldable instructions
- { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
- { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
- { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
- { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
- { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
- { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
- { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
- { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
- { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
- { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
- { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
- { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
- { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
- { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
- { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
- { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
- { X86::VDPPSYrri, X86::VDPPSYrmi, 0 },
- { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
- { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
- { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
- { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
- { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
- { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 },
- { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 },
- { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
- { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
- { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 },
- { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 },
- { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
- { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
- { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
- { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
- { X86::VORPDYrr, X86::VORPDYrm, 0 },
- { X86::VORPSYrr, X86::VORPSYrm, 0 },
- { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
- { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
- { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
- { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
- { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
- { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
- { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
- { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
- { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
- { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
- { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
- { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
- { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
-
- // AVX2 foldable instructions
- { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
- { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
- { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
- { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
- { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
- { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
- { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
- { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
- { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
- { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
- { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
- { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
- { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
- { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 },
- { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
- { X86::VPANDYrr, X86::VPANDYrm, 0 },
- { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
- { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
- { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
- { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
- { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 },
- { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
- { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
- { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
- { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
- { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
- { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
- { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
- { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
- { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
- { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
- { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
- { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
- { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
- { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
- { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
- { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
- { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
- { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
- { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
- { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
- { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
- { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
- { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
- { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
- { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
- { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
- { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
- { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
- { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
- { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
- { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
- { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
- { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 },
- { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
- { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
- { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
- { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
- { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
- { X86::VPORYrr, X86::VPORYrm, 0 },
- { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
- { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
- { X86::VPSIGNBYrr256, X86::VPSIGNBYrm256, 0 },
- { X86::VPSIGNWYrr256, X86::VPSIGNWYrm256, 0 },
- { X86::VPSIGNDYrr256, X86::VPSIGNDYrm256, 0 },
- { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
- { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
- { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
- { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
- { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
- { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
- { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
- { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
- { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
- { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
- { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
- { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
- { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
- { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
- { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
- { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
- { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
- { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
- { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
- { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
- { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 },
- { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
- { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
- { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 },
- { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 },
- { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
- { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
- { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
- { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
- { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
- { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
- { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
- { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
- { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
- { X86::VPXORYrr, X86::VPXORYrm, 0 },
-
- // FMA4 foldable patterns
- { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE },
- { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE },
- { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE },
- { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE },
- { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE },
- { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE },
- { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE },
- { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE },
- { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE },
- { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE },
- { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE },
- { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE },
- { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE },
- { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE },
- { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE },
- { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE },
- { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE },
- { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE },
- { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE },
- { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE },
- { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE },
- { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE },
- { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE },
- { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE },
-
- // XOP foldable instructions
- { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
- { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 },
- { X86::VPCOMBri, X86::VPCOMBmi, 0 },
- { X86::VPCOMDri, X86::VPCOMDmi, 0 },
- { X86::VPCOMQri, X86::VPCOMQmi, 0 },
- { X86::VPCOMWri, X86::VPCOMWmi, 0 },
- { X86::VPCOMUBri, X86::VPCOMUBmi, 0 },
- { X86::VPCOMUDri, X86::VPCOMUDmi, 0 },
- { X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
- { X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
- { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
- { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 },
- { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
- { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 },
- { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
- { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
- { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
- { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 },
- { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 },
- { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 },
- { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 },
- { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 },
- { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 },
- { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 },
- { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 },
- { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 },
- { X86::VPPERMrrr, X86::VPPERMrmr, 0 },
- { X86::VPROTBrr, X86::VPROTBrm, 0 },
- { X86::VPROTDrr, X86::VPROTDrm, 0 },
- { X86::VPROTQrr, X86::VPROTQrm, 0 },
- { X86::VPROTWrr, X86::VPROTWrm, 0 },
- { X86::VPSHABrr, X86::VPSHABrm, 0 },
- { X86::VPSHADrr, X86::VPSHADrm, 0 },
- { X86::VPSHAQrr, X86::VPSHAQrm, 0 },
- { X86::VPSHAWrr, X86::VPSHAWrm, 0 },
- { X86::VPSHLBrr, X86::VPSHLBrm, 0 },
- { X86::VPSHLDrr, X86::VPSHLDrm, 0 },
- { X86::VPSHLQrr, X86::VPSHLQrm, 0 },
- { X86::VPSHLWrr, X86::VPSHLWrm, 0 },
-
- // BMI/BMI2 foldable instructions
- { X86::ANDN32rr, X86::ANDN32rm, 0 },
- { X86::ANDN64rr, X86::ANDN64rm, 0 },
- { X86::MULX32rr, X86::MULX32rm, 0 },
- { X86::MULX64rr, X86::MULX64rm, 0 },
- { X86::PDEP32rr, X86::PDEP32rm, 0 },
- { X86::PDEP64rr, X86::PDEP64rm, 0 },
- { X86::PEXT32rr, X86::PEXT32rm, 0 },
- { X86::PEXT64rr, X86::PEXT64rm, 0 },
-
- // ADX foldable instructions
- { X86::ADCX32rr, X86::ADCX32rm, 0 },
- { X86::ADCX64rr, X86::ADCX64rm, 0 },
- { X86::ADOX32rr, X86::ADOX32rm, 0 },
- { X86::ADOX64rr, X86::ADOX64rm, 0 },
-
- // AVX-512 foldable instructions
- { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
- { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
- { X86::VADDSDZrr, X86::VADDSDZrm, 0 },
- { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE },
- { X86::VADDSSZrr, X86::VADDSSZrm, 0 },
- { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE },
- { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
- { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
- { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 },
- { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
- { X86::VANDPDZrr, X86::VANDPDZrm, 0 },
- { X86::VANDPSZrr, X86::VANDPSZrm, 0 },
- { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
- { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
- { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
- { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
- { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
- { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
- { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
- { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
- { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
- { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
- { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
- { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
- { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 },
- { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 },
- { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 },
- { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 },
- { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 },
- { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 },
- { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 },
- { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 },
- { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
- { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 },
- { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
- { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
- { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
- { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
- { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 },
- { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE },
- { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
- { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE },
- { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
- { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 },
- { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
- { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
- { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
- { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
- { X86::VMINSDZrr, X86::VMINSDZrm, 0 },
- { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
- { X86::VMINSSZrr, X86::VMINSSZrm, 0 },
- { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
- { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
- { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
- { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
- { X86::VMULSDZrr, X86::VMULSDZrm, 0 },
- { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE },
- { X86::VMULSSZrr, X86::VMULSSZrm, 0 },
- { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
- { X86::VORPDZrr, X86::VORPDZrm, 0 },
- { X86::VORPSZrr, X86::VORPSZrm, 0 },
- { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 },
- { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 },
- { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 },
- { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 },
- { X86::VPADDBZrr, X86::VPADDBZrm, 0 },
- { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
- { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
- { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 },
- { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 },
- { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 },
- { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 },
- { X86::VPADDWZrr, X86::VPADDWZrm, 0 },
- { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 },
- { X86::VPANDDZrr, X86::VPANDDZrm, 0 },
- { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
- { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
- { X86::VPANDQZrr, X86::VPANDQZrm, 0 },
- { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
- { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
- { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
- { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
- { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
- { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 },
- { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 },
- { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 },
- { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 },
- { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 },
- { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 },
- { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 },
- { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 },
- { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 },
- { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 },
- { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 },
- { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 },
- { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 },
- { X86::VPERMBZrr, X86::VPERMBZrm, 0 },
- { X86::VPERMDZrr, X86::VPERMDZrm, 0 },
- { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 },
- { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 },
- { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 },
- { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
- { X86::VPERMQZrr, X86::VPERMQZrm, 0 },
- { X86::VPERMWZrr, X86::VPERMWZrm, 0 },
- { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 },
- { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 },
- { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 },
- { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 },
- { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
- { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
- { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 },
- { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
- { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
- { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 },
- { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 },
- { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
- { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
- { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 },
- { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 },
- { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
- { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
- { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 },
- { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 },
- { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
- { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
- { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 },
- { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
- { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 },
- { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 },
- { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 },
- { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
- { X86::VPORDZrr, X86::VPORDZrm, 0 },
- { X86::VPORQZrr, X86::VPORQZrm, 0 },
- { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 },
- { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 },
- { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 },
- { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 },
- { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
- { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
- { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 },
- { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 },
- { X86::VPSRADZrr, X86::VPSRADZrm, 0 },
- { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 },
- { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
- { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 },
- { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 },
- { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 },
- { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 },
- { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 },
- { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
- { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
- { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 },
- { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 },
- { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 },
- { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
- { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
- { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 },
- { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 },
- { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 },
- { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 },
- { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 },
- { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 },
- { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 },
- { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 },
- { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 },
- { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 },
- { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 },
- { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 },
- { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 },
- { X86::VPXORDZrr, X86::VPXORDZrm, 0 },
- { X86::VPXORQZrr, X86::VPXORQZrm, 0 },
- { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
- { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
- { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
- { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
- { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 },
- { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE },
- { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 },
- { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE },
- { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 },
- { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 },
- { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 },
- { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 },
- { X86::VXORPDZrr, X86::VXORPDZrm, 0 },
- { X86::VXORPSZrr, X86::VXORPSZrm, 0 },
-
- // AVX-512{F,VL} foldable instructions
- { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
- { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
- { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
- { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
- { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 },
- { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 },
- { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 },
- { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 },
- { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 },
- { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 },
- { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 },
- { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 },
- { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 },
- { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
- { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
- { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
- { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
- { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
- { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
- { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 },
- { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 },
- { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
- { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
- { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
- { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 },
- { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 },
- { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 },
- { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm, 0 },
- { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 },
- { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
- { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
- { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
- { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 },
- { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
- { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
- { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
- { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 },
- { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
- { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
- { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
- { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 },
- { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
- { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
- { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
- { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 },
- { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 },
- { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 },
- { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 },
- { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 },
- { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
- { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
- { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
- { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 },
- { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 },
- { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 },
- { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 },
- { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 },
- { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 },
- { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 },
- { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 },
- { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
- { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
- { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
- { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 },
- { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 },
- { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 },
- { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 },
- { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 },
- { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 },
- { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 },
- { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 },
- { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 },
- { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 },
- { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 },
- { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 },
- { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 },
- { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 },
- { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 },
- { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 },
- { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 },
- { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 },
- { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 },
- { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 },
- { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
- { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
- { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
- { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 },
- { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
- { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
- { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
- { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
- { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
- { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
- { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 },
- { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 },
- { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 },
- { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 },
- { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 },
- { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 },
- { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 },
- { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 },
- { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 },
- { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 },
- { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 },
- { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 },
- { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 },
- { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 },
- { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 },
- { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 },
- { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 },
- { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 },
- { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 },
- { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 },
- { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 },
- { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 },
- { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 },
- { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 },
- { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 },
- { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 },
- { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 },
- { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 },
- { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 },
- { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 },
- { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 },
- { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 },
- { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 },
- { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 },
- { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 },
- { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 },
- { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 },
- { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 },
- { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 },
- { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 },
- { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 },
- { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 },
- { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
- { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
- { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
- { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 },
- { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 },
- { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 },
- { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 },
- { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 },
- { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 },
- { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 },
- { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 },
- { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 },
- { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 },
- { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 },
- { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 },
- { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 },
- { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 },
- { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 },
- { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 },
- { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 },
- { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 },
- { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 },
- { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 },
- { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 },
- { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 },
- { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 },
- { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 },
- { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 },
- { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 },
- { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 },
- { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 },
- { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 },
- { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 },
- { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 },
- { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 },
- { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 },
- { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 },
- { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 },
- { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 },
- { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 },
- { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 },
- { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 },
- { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 },
- { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 },
- { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 },
- { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 },
- { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 },
- { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 },
- { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 },
- { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 },
- { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 },
- { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 },
- { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 },
- { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 },
- { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 },
- { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 },
- { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 },
- { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 },
- { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 },
- { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 },
- { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 },
- { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 },
- { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 },
- { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 },
- { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 },
- { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 },
- { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 },
- { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 },
- { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 },
- { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 },
- { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 },
- { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 },
- { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 },
- { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 },
- { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 },
- { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 },
- { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 },
- { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 },
- { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 },
- { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 },
- { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 },
- { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 },
- { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 },
- { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 },
- { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 },
- { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 },
- { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 },
- { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 },
- { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 },
- { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 },
- { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 },
- { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 },
- { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 },
- { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 },
- { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 },
- { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 },
- { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 },
- { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 },
- { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 },
- { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 },
- { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 },
- { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 },
- { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 },
- { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 },
- { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 },
- { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 },
- { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 },
- { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 },
- { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 },
- { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 },
- { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 },
- { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 },
- { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 },
- { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 },
- { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 },
- { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 },
- { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 },
- { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 },
- { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 },
- { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 },
- { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 },
- { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 },
- { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 },
- { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 },
- { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 },
- { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 },
- { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 },
- { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 },
- { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 },
- { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 },
- { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
- { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
- { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 },
- { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 },
- { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 },
- { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 },
- { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 },
- { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 },
- { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 },
- { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 },
- { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 },
- { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 },
- { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 },
- { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 },
- { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
-
- // AVX-512 masked foldable instructions
- { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
- { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
- { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
- { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
- { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
- { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
- { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
- { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
- { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 },
- { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
- { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 },
- { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 },
- { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 },
- { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 },
- { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 },
- { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 },
- { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 },
- { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 },
- { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 },
- { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
- { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
- { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
- { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 },
- { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 },
- { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 },
- { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 },
- { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 },
- { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 },
- { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 },
- { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 },
- { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 },
-
- // AVX-512VL 256-bit masked foldable instructions
- { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
- { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
- { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
- { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
- { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
- { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
- { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
- { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
- { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 },
- { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 },
- { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 },
- { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 },
- { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 },
- { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 },
- { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 },
- { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE },
- { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
- { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
- { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
- { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 },
- { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 },
- { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 },
- { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 },
- { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 },
- { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 },
- { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 },
- { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 },
- { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 },
-
- // AVX-512VL 128-bit masked foldable instructions
- { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
- { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
- { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
- { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
- { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
- { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
- { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
- { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE },
- { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
- { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
- { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
- { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 },
- { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 },
- { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 },
- { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 },
- { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 },
- { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 },
- { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 },
- { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 },
- { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 },
-
- // AES foldable instructions
- { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
- { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
- { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
- { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
- { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 },
- { X86::VAESDECrr, X86::VAESDECrm, 0 },
- { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 },
- { X86::VAESENCrr, X86::VAESENCrm, 0 },
-
- // SHA foldable instructions
- { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
- { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
- { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
- { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
- { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
- { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
- { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }
- };
-
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp,
@@ -2435,1103 +150,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
}
- static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
- // FMA4 foldable patterns
- { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
- { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE },
- { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
- { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE },
- { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE },
- { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE },
- { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE },
- { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE },
- { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE },
- { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE },
- { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE },
- { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE },
- { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE },
- { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE },
- { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE },
- { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE },
- { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE },
- { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE },
- { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE },
- { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE },
- { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE },
- { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE },
- { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE },
- { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE },
-
- // XOP foldable instructions
- { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
- { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
- { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
- { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 },
- { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
- { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 },
- { X86::VPPERMrrr, X86::VPPERMrrm, 0 },
-
- // AVX-512 instructions with 3 source operands.
- { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 },
- { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
- { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
- { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
- { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
- { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 },
- { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 },
- { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 },
- { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 },
- { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 },
- { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 },
- { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 },
- { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 },
- { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 },
-
- // AVX-512VL 256-bit instructions with 3 source operands.
- { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 },
- { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 },
- { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 },
- { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 },
- { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 },
- { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 },
- { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 },
- { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 },
- { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 },
- { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 },
- { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 },
- { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 },
- { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 },
- { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 },
-
- // AVX-512VL 128-bit instructions with 3 source operands.
- { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 },
- { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 },
- { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 },
- { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 },
- { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 },
- { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 },
- { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 },
- { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 },
- { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 },
- { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 },
- { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 },
- { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 },
- { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
- { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },
-
- // AVX-512 masked instructions
- { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
- { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
- { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
- { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
- { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
- { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 },
- { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 },
- { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
- { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
- { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
- { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
- { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
- { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
- { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 },
- { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 },
- { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 },
- { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 },
- { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 },
- { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
- { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
- { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
- { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
- { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 },
- { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 },
- { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
- { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
- { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
- { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
- { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 },
- { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 },
- { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
- { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
- { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
- { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
- { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 },
- { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 },
- { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 },
- { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 },
- { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
- { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
- { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
- { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 },
- { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 },
- { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 },
- { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 },
- { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 },
- { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 },
- { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 },
- { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
- { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
- { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
- { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
- { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
- { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
- { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
- { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
- { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 },
- { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 },
- { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 },
- { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 },
- { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
- { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
- { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
- { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 },
- { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 },
- { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 },
- { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 },
- { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 },
- { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 },
- { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 },
- { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 },
- { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 },
- { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 },
- { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 },
- { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 },
- { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 },
- { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
- { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
- { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
- { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
- { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
- { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
- { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
- { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 },
- { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
- { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
- { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
- { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
- { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
- { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
- { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
- { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
- { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
- { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
- { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
- { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
- { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
- { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
- { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
- { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
- { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
- { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
- { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
- { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
- { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
- { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
- { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
- { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
- { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 },
- { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 },
- { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 },
- { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 },
- { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 },
- { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 },
- { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 },
- { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 },
- { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 },
- { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 },
- { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 },
- { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 },
- { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
- { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
- { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
- { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 },
- { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 },
- { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
- { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
- { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
- { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
- { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
- { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 },
- { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 },
- { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 },
-
- // AVX-512{F,VL} masked arithmetic instructions 256-bit
- { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
- { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
- { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 },
- { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 },
- { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 },
- { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 },
- { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 },
- { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 },
- { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
- { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
- { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 },
- { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 },
- { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 },
- { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 },
- { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
- { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
- { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
- { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
- { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 },
- { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 },
- { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
- { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
- { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
- { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
- { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
- { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
- { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 },
- { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 },
- { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 },
- { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 },
- { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
- { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
- { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
- { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 },
- { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 },
- { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 },
- { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 },
- { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 },
- { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 },
- { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 },
- { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
- { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
- { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
- { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
- { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
- { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
- { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
- { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
- { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 },
- { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 },
- { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 },
- { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 },
- { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
- { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
- { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
- { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 },
- { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 },
- { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 },
- { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 },
- { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 },
- { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 },
- { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 },
- { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 },
- { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 },
- { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 },
- { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 },
- { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 },
- { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 },
- { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 },
- { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
- { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
- { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
- { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
- { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
- { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 },
- { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 },
- { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
- { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
- { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
- { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
- { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
- { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
- { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
- { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
- { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
- { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
- { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
- { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
- { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
- { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
- { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
- { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
- { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
- { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
- { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
- { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
- { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
- { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
- { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
- { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
- { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 },
- { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 },
- { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 },
- { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 },
- { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 },
- { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 },
- { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 },
- { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 },
- { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 },
- { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 },
- { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 },
- { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 },
- { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
- { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
- { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
- { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 },
- { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 },
- { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
- { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
- { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
- { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 },
- { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 },
- { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 },
- { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 },
- { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 },
-
- // AVX-512{F,VL} masked arithmetic instructions 128-bit
- { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
- { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
- { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 },
- { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 },
- { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 },
- { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 },
- { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 },
- { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 },
- { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
- { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
- { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
- { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 },
- { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 },
- { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
- { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 },
- { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 },
- { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
- { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
- { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
- { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
- { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
- { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
- { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 },
- { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 },
- { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 },
- { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 },
- { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
- { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
- { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
- { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 },
- { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 },
- { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 },
- { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 },
- { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 },
- { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 },
- { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 },
- { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
- { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
- { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
- { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
- { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
- { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
- { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
- { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
- { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
- { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
- { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
- { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 },
- { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 },
- { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 },
- { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 },
- { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 },
- { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 },
- { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 },
- { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 },
- { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 },
- { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 },
- { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 },
- { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 },
- { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 },
- { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 },
- { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
- { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
- { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
- { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
- { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
- { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 },
- { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 },
- { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
- { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
- { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
- { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
- { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
- { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
- { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
- { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
- { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
- { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
- { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
- { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
- { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
- { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
- { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
- { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
- { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
- { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
- { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
- { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
- { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
- { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
- { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
- { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
- { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 },
- { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 },
- { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 },
- { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 },
- { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 },
- { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 },
- { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 },
- { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 },
- { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 },
- { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 },
- { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 },
- { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 },
- { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
- { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
- { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
- { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 },
- { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 },
- { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
- { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
- { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
- { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 },
- { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 },
- { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 },
- { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 },
- { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
-
- // AVX-512 masked foldable instructions
- { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
- { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
- { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
- { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
- { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
- { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
- { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
- { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
- { X86::VPERMQZrik, X86::VPERMQZmik, 0 },
- { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
- { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE },
- { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 },
- { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 },
- { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 },
- { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 },
- { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 },
- { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE },
- { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 },
- { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 },
- { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 },
- { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 },
- { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
- { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
- { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
- { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 },
- { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 },
- { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 },
- { X86::VPSRADZrik, X86::VPSRADZmik, 0 },
- { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 },
- { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 },
- { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 },
- { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 },
- { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 },
-
- // AVX-512VL 256-bit masked foldable instructions
- { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
- { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
- { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
- { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
- { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
- { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
- { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
- { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
- { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 },
- { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 },
- { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 },
- { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 },
- { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 },
- { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 },
- { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 },
- { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE },
- { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
- { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
- { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
- { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 },
- { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 },
- { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 },
- { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 },
- { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 },
- { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 },
- { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 },
- { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 },
- { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 },
-
- // AVX-512VL 128-bit masked foldable instructions
- { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
- { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
- { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
- { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
- { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
- { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
- { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
- { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE },
- { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
- { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
- { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
- { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 },
- { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 },
- { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 },
- { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 },
- { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 },
- { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 },
- { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
- { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
- { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
- };
-
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
Entry.RegOp, Entry.MemOp,
// Index 3, folded load
Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
- auto I = X86InstrFMA3Info::rm_begin();
- auto E = X86InstrFMA3Info::rm_end();
- for (; I != E; ++I) {
- if (!I.getGroup()->isKMasked()) {
- // Intrinsic forms need to pass TB_NO_REVERSE.
- if (I.getGroup()->isIntrinsic()) {
- AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
- I.getRegOpcode(), I.getMemOpcode(),
- TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE);
- } else {
- AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
- I.getRegOpcode(), I.getMemOpcode(),
- TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD);
- }
- }
- }
-
- static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
- // AVX-512 foldable masked instructions
- { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
- { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
- { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
- { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
- { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
- { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
- { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
- { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 },
- { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 },
- { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
- { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
- { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
- { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
- { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
- { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
- { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
- { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
- { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 },
- { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 },
- { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 },
- { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 },
- { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 },
- { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
- { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
- { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
- { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
- { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 },
- { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 },
- { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
- { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
- { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
- { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
- { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 },
- { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 },
- { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
- { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
- { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
- { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
- { X86::VORPDZrrk, X86::VORPDZrmk, 0 },
- { X86::VORPSZrrk, X86::VORPSZrmk, 0 },
- { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 },
- { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 },
- { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 },
- { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 },
- { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
- { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
- { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
- { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 },
- { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 },
- { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 },
- { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 },
- { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 },
- { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 },
- { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 },
- { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
- { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
- { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
- { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
- { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
- { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
- { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
- { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
- { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 },
- { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 },
- { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 },
- { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 },
- { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 },
- { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 },
- { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 },
- { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 },
- { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 },
- { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 },
- { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 },
- { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 },
- { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 },
- { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 },
- { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 },
- { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 },
- { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
- { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
- { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
- { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
- { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 },
- { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 },
- { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 },
- { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 },
- { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 },
- { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 },
- { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 },
- { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 },
- { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 },
- { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 },
- { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 },
- { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 },
- { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 },
- { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
- { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
- { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
- { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
- { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
- { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 },
- { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 },
- { X86::VPORDZrrk, X86::VPORDZrmk, 0 },
- { X86::VPORQZrrk, X86::VPORQZrmk, 0 },
- { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
- { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
- { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
- { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
- { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
- { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
- { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
- { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
- { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
- { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
- { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
- { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
- { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
- { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
- { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
- { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
- { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
- { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
- { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
- { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
- { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
- { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
- { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 },
- { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 },
- { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 },
- { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 },
- { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 },
- { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 },
- { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 },
- { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 },
- { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 },
- { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 },
- { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 },
- { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 },
- { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 },
- { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
- { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
- { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
- { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 },
- { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
- { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
- { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
- { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
- { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
- { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
- { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
- { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
- { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 },
- { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 },
- { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 },
-
- // AVX-512{F,VL} foldable masked instructions 256-bit
- { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
- { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
- { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 },
- { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 },
- { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 },
- { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 },
- { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 },
- { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
- { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
- { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
- { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 },
- { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 },
- { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 },
- { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 },
- { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
- { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
- { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
- { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
- { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 },
- { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 },
- { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
- { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
- { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
- { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
- { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
- { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
- { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 },
- { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 },
- { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 },
- { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 },
- { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
- { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
- { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
- { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 },
- { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 },
- { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 },
- { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 },
- { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 },
- { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 },
- { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 },
- { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
- { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
- { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
- { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
- { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
- { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
- { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
- { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
- { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 },
- { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 },
- { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 },
- { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 },
- { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 },
- { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 },
- { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 },
- { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 },
- { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 },
- { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 },
- { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 },
- { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 },
- { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 },
- { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 },
- { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 },
- { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 },
- { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
- { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
- { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
- { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
- { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 },
- { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 },
- { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 },
- { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 },
- { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 },
- { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 },
- { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 },
- { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 },
- { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 },
- { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 },
- { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 },
- { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 },
- { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 },
- { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
- { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
- { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
- { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
- { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
- { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 },
- { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 },
- { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
- { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
- { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
- { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
- { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
- { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
- { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
- { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
- { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
- { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
- { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
- { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
- { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
- { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
- { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
- { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
- { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
- { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
- { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
- { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
- { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
- { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
- { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
- { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
- { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 },
- { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 },
- { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 },
- { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 },
- { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 },
- { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 },
- { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 },
- { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 },
- { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 },
- { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 },
- { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 },
- { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 },
- { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 },
- { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 },
- { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
- { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
- { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
- { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 },
- { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
- { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
- { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
- { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
- { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 },
- { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 },
- { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 },
- { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 },
- { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 },
-
- // AVX-512{F,VL} foldable instructions 128-bit
- { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
- { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
- { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 },
- { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 },
- { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 },
- { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 },
- { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 },
- { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 },
- { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
- { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
- { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
- { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
- { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
- { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
- { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 },
- { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 },
- { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
- { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
- { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
- { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
- { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
- { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
- { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 },
- { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 },
- { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 },
- { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 },
- { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
- { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
- { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
- { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 },
- { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 },
- { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 },
- { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 },
- { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 },
- { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 },
- { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 },
- { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
- { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
- { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
- { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
- { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
- { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
- { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
- { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
- { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 },
- { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 },
- { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 },
- { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 },
- { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 },
- { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 },
- { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 },
- { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 },
- { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 },
- { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 },
- { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 },
- { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 },
- { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
- { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
- { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
- { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
- { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 },
- { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 },
- { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 },
- { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 },
- { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 },
- { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 },
- { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 },
- { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 },
- { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 },
- { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 },
- { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 },
- { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 },
- { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 },
- { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
- { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
- { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
- { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
- { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
- { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 },
- { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 },
- { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
- { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
- { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
- { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
- { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
- { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
- { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
- { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
- { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
- { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
- { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
- { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
- { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
- { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
- { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
- { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
- { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
- { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
- { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
- { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
- { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
- { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
- { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
- { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
- { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 },
- { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 },
- { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 },
- { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 },
- { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 },
- { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 },
- { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 },
- { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 },
- { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 },
- { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 },
- { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 },
- { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 },
- { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 },
- { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 },
- { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
- { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
- { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
- { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 },
- { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 },
- { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
- { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
- { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
- { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 },
- { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 },
- { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 },
- { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 },
- { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 },
-
- // 512-bit three source instructions with zero masking.
- { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 },
- { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 },
- { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 },
- { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 },
- { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 },
- { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 },
- { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 },
- { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 },
- { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 },
- { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 },
- { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 },
- { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
- { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
- { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
-
- // 256-bit three source instructions with zero masking.
- { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 },
- { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 },
- { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 },
- { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 },
- { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 },
- { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 },
- { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 },
- { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 },
- { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 },
- { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 },
- { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 },
- { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
- { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
- { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
-
- // 128-bit three source instructions with zero masking.
- { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 },
- { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 },
- { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 },
- { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 },
- { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 },
- { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 },
- { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 },
- { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 },
- { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 },
- { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 },
- { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 },
- { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
- { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
- { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
- };
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
@@ -3539,20 +163,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// Index 4, folded load
Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
}
- for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) {
- if (I.getGroup()->isKMasked()) {
- // Intrinsics need to pass TB_NO_REVERSE.
- if (I.getGroup()->isIntrinsic()) {
- AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
- I.getRegOpcode(), I.getMemOpcode(),
- TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE);
- } else {
- AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
- I.getRegOpcode(), I.getMemOpcode(),
- TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD);
- }
- }
- }
}
void
@@ -5930,7 +2540,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
// Add implicit uses and defs of all live regs potentially clobbered by the
// call. This way they still appear live across the call.
- LivePhysRegs LiveRegs(&getRegisterInfo());
+ LivePhysRegs LiveRegs(getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
LiveRegs.stepForward(*MIB, Clobbers);
@@ -6545,9 +3155,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// first frame index.
// See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
MachineBasicBlock::LivenessQueryResult LQR =
- MBB.computeRegisterLiveness(TRI, AX, MI);
+ MBB.computeRegisterLiveness(&TRI, AX, MI);
// We do not want to save and restore AX if we do not have to.
// Moreover, if we do so whereas AX is dead, we would need to set
// an undef flag on the use of AX, otherwise the verifier will
@@ -6564,7 +3174,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// AX contains the top most register in the aliasing hierarchy.
// It may not be live, but one of its aliases may be.
- for (MCRegAliasIterator AI(AX, TRI, true);
+ for (MCRegAliasIterator AI(AX, &TRI, true);
AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI)
LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live
: MachineBasicBlock::LQR_Dead;
@@ -8374,7 +4984,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
unsigned Opc = LoadMI.getOpcode();
unsigned UserOpc = UserMI.getOpcode();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- const TargetRegisterClass *RC =
+ const TargetRegisterClass *RC =
MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
unsigned RegSize = TRI.getRegSizeInBits(*RC);
@@ -10473,7 +7083,7 @@ X86InstrInfo::getOutliningType(MachineInstr &MI) const {
// catch it.
if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
- MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
+ MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
return MachineOutlinerInstrType::Illegal;
// Outlined calls change the instruction pointer, so don't read from it.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 01df07e1715f..fab70e918b8a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -813,6 +813,8 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">,
AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">;
+def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">,
+ AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">;
def HasPFI : Predicate<"Subtarget->hasPFI()">,
AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">;
def HasERI : Predicate<"Subtarget->hasERI()">,
@@ -1436,11 +1438,14 @@ def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
// Longer forms that use a ModR/M byte. Needed for disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src),
- "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+ "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>,
+ FoldGenData<"MOV8ri">;
def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16,
+ FoldGenData<"MOV16ri">;
def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32,
+ FoldGenData<"MOV32ri">;
}
} // SchedRW
@@ -1563,13 +1568,17 @@ def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
- "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+ "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>,
+ FoldGenData<"MOV8rr">;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16,
+ FoldGenData<"MOV16rr">;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32,
+ FoldGenData<"MOV32rr">;
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>,
+ FoldGenData<"MOV64rr">;
}
let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index dc3800ce381b..2c047722db24 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -248,7 +248,8 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst,
(MMX_X86movd2w (x86mmx VR64:$src)))],
- IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
+ IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>,
+ FoldGenData<"MMX_MOVD64rr">;
let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -277,7 +278,7 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
- IIC_MMX_MOVQ_RR>;
+ IIC_MMX_MOVQ_RR>, FoldGenData<"MMX_MOVQ64rr">;
}
} // SchedRW
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f73d85e7e01b..a3e677209305 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -507,7 +507,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string base_opc,
- string asm_opr, Domain d = GenericDomain> {
+ string asm_opr, Domain d = GenericDomain,
+ string Name> {
let isCommutable = 1 in
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
@@ -521,15 +522,17 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
!strconcat(base_opc, asm_opr),
- [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
+ [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>,
+ FoldGenData<Name#rr>;
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string OpcodeStr,
- Domain d = GenericDomain> {
+ Domain d = GenericDomain, string Name> {
// AVX
defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
+ "V"#Name>,
VEX_4V, VEX_LIG, VEX_WIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
@@ -539,7 +542,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}", d>;
+ "\t{$src2, $dst|$dst, $src2}", d, Name>;
}
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
@@ -563,9 +566,9 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
- SSEPackedSingle>, XS;
+ SSEPackedSingle, "MOVSS">, XS;
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
- SSEPackedDouble>, XD;
+ SSEPackedDouble, "MOVSD">, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
@@ -864,35 +867,43 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG,
+ FoldGenData<"VMOVAPSrr">;
def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG,
+ FoldGenData<"VMOVAPDrr">;
def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG,
+ FoldGenData<"VMOVUPSrr">;
def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG,
+ FoldGenData<"VMOVUPDrr">;
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG,
+ FoldGenData<"VMOVAPSYrr">;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG,
+ FoldGenData<"VMOVAPDYrr">;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG,
+ FoldGenData<"VMOVUPSYrr">;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG,
+ FoldGenData<"VMOVUPDYrr">;
}
// Aliases to help the assembler pick two byte VEX encodings by swapping the
@@ -938,16 +949,16 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
SchedRW = [WriteFShuffle] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
+ IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPSrr">;
def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
+ IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPDrr">;
def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>;
+ IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPSrr">;
def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>;
+ IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPDrr">;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -3752,17 +3763,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
- VEX, VEX_WIG;
+ VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG,
+ FoldGenData<"VMOVDQAYrr">;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVU_P_RR>,
- VEX, VEX_WIG;
+ VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG,
+ FoldGenData<"VMOVDQUYrr">;
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
@@ -3820,11 +3833,12 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
+ IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVDQArr">;
def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>,
+ FoldGenData<"MOVDQUrr">;
}
} // SchedRW
@@ -5915,7 +5929,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[WriteShuffle]>;
+ []>, Sched<[WriteShuffle]>, FoldGenData<NAME#ri>;
let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteShuffleLd, WriteRMW] in
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 53224431c0e9..5dde2d07babe 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -111,7 +111,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>,
- XOP_4V, VEX_W, Sched<[WriteVarVecShift]>;
+ XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedInt in {
@@ -282,7 +282,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W;
+ []>, XOP_4V, VEX_W, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
@@ -318,7 +318,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W;
+ []>, XOP_4V, VEX_W, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
@@ -357,7 +357,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W;
+ []>, VEX_W, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedDouble in {
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index 61956f741820..77dead8d2413 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -302,6 +302,26 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
: HasAVX512
? X86::VMOVUPSZ128mr_NOVLX
: HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ } else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
+ if (Alignment >= 32)
+ return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
+ : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
+ : X86::VMOVAPSYrm)
+ : (HasVLX ? X86::VMOVAPSZ256mr
+ : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
+ : X86::VMOVAPSYmr);
+ else
+ return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
+ : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
+ : X86::VMOVUPSYrm)
+ : (HasVLX ? X86::VMOVUPSZ256mr
+ : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
+ : X86::VMOVUPSYmr);
+ } else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
+ if (Alignment >= 64)
+ return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
+ else
+ return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
return Opc;
}
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index da724f5d8989..979aaee110aa 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -35,6 +35,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizerInfoSSE1();
setLegalizerInfoSSE2();
setLegalizerInfoSSE41();
+ setLegalizerInfoAVX();
setLegalizerInfoAVX2();
setLegalizerInfoAVX512();
setLegalizerInfoAVX512DQ();
@@ -209,6 +210,18 @@ void X86LegalizerInfo::setLegalizerInfoSSE41() {
setAction({G_MUL, v4s32}, Legal);
}
+void X86LegalizerInfo::setLegalizerInfoAVX() {
+ if (!Subtarget.hasAVX())
+ return;
+
+ const LLT v8s32 = LLT::vector(8, 32);
+ const LLT v4s64 = LLT::vector(4, 64);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE})
+ for (auto Ty : {v8s32, v4s64})
+ setAction({MemOp, Ty}, Legal);
+}
+
void X86LegalizerInfo::setLegalizerInfoAVX2() {
if (!Subtarget.hasAVX2())
return;
@@ -239,6 +252,10 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
setAction({G_MUL, v16s32}, Legal);
+ for (unsigned MemOp : {G_LOAD, G_STORE})
+ for (auto Ty : {v16s32, v8s64})
+ setAction({MemOp, Ty}, Legal);
+
/************ VLX *******************/
if (!Subtarget.hasVLX())
return;
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
index ab5405a70427..135950a95f84 100644
--- a/lib/Target/X86/X86LegalizerInfo.h
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -39,6 +39,7 @@ private:
void setLegalizerInfoSSE1();
void setLegalizerInfoSSE2();
void setLegalizerInfoSSE41();
+ void setLegalizerInfoAVX();
void setLegalizerInfoAVX2();
void setLegalizerInfoAVX512();
void setLegalizerInfoAVX512DQ();
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 2b1f43bffd71..84ec98484f8e 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -286,6 +286,7 @@ void X86Subtarget::initializeEnvironment() {
HasCDI = false;
HasPFI = false;
HasDQI = false;
+ HasVPOPCNTDQ = false;
HasBWI = false;
HasVLX = false;
HasADX = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index a9f3a2aee1be..550e95c39ab5 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -270,6 +270,9 @@ protected:
/// Processor has AVX-512 Conflict Detection Instructions
bool HasCDI;
+ /// Processor has AVX-512 population count Instructions
+ bool HasVPOPCNTDQ;
+
/// Processor has AVX-512 Doubleword and Quadword instructions
bool HasDQI;
@@ -494,6 +497,7 @@ public:
bool slow3OpsLEA() const { return Slow3OpsLEA; }
bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
+ bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index a97db6fde454..5cf2a8c25d83 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -124,6 +124,7 @@ struct CoroCleanup : FunctionPass {
if (!L)
AU.setPreservesAll();
}
+ StringRef getPassName() const override { return "Coroutine Cleanup"; }
};
}
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index e8bb0ca99d8a..b52989186165 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -208,6 +208,9 @@ struct CoroEarly : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
+ StringRef getPassName() const override {
+ return "Lower early coroutine intrinsics";
+ }
};
}
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index c6ac3f614ff7..acb22449142b 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -301,6 +301,7 @@ struct CoroElide : FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
}
+ StringRef getPassName() const override { return "Coroutine Elision"; }
};
}
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 417d57f7625b..85e9003ec3c5 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -799,9 +799,9 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
splitAround(CSI, "CoroSuspend");
}
- // Put fallthrough CoroEnd into its own block. Note: Shape::buildFrom places
- // the fallthrough coro.end as the first element of CoroEnds array.
- splitAround(Shape.CoroEnds.front(), "CoroEnd");
+ // Put CoroEnds into their own blocks.
+ for (CoroEndInst *CE : Shape.CoroEnds)
+ splitAround(CE, "CoroEnd");
// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
// never has its definition separated from the PHI by the suspend point.
@@ -813,19 +813,24 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
IRBuilder<> Builder(F.getContext());
SpillInfo Spills;
- // See if there are materializable instructions across suspend points.
- for (Instruction &I : instructions(F))
- if (materializable(I))
- for (User *U : I.users())
- if (Checker.isDefinitionAcrossSuspend(I, U))
- Spills.emplace_back(&I, U);
+ for (int Repeat = 0; Repeat < 4; ++Repeat) {
+ // See if there are materializable instructions across suspend points.
+ for (Instruction &I : instructions(F))
+ if (materializable(I))
+ for (User *U : I.users())
+ if (Checker.isDefinitionAcrossSuspend(I, U))
+ Spills.emplace_back(&I, U);
- // Rewrite materializable instructions to be materialized at the use point.
- DEBUG(dump("Materializations", Spills));
- rewriteMaterializableInstructions(Builder, Spills);
+ if (Spills.empty())
+ break;
+
+ // Rewrite materializable instructions to be materialized at the use point.
+ DEBUG(dump("Materializations", Spills));
+ rewriteMaterializableInstructions(Builder, Spills);
+ Spills.clear();
+ }
// Collect the spills for arguments and other not-materializable values.
- Spills.clear();
for (Argument &A : F.args())
for (User *U : A.users())
if (Checker.isDefinitionAcrossSuspend(A, U))
@@ -847,8 +852,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
if (I.getType()->isTokenTy())
report_fatal_error(
"token definition is separated from the use by a suspend point");
- assert(!materializable(I) &&
- "rewriteMaterializable did not do its job");
Spills.emplace_back(&I, U);
}
}
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 12eb16789825..cd549e4be282 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -228,15 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
SmallVector<ReturnInst *, 4> Returns;
- if (DISubprogram *SP = F.getSubprogram()) {
- // If we have debug info, add mapping for the metadata nodes that should not
- // be cloned by CloneFunctionInfo.
- auto &MD = VMap.MD();
- MD[SP->getUnit()].reset(SP->getUnit());
- MD[SP->getType()].reset(SP->getType());
- MD[SP->getFile()].reset(SP->getFile());
- }
- CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns);
+ CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns);
// Remove old returns.
for (ReturnInst *Return : Returns)
@@ -509,12 +501,87 @@ static void simplifySuspendPoints(coro::Shape &Shape) {
S.resize(N);
}
+static SmallPtrSet<BasicBlock *, 4> getCoroBeginPredBlocks(CoroBeginInst *CB) {
+ // Collect all blocks that we need to look for instructions to relocate.
+ SmallPtrSet<BasicBlock *, 4> RelocBlocks;
+ SmallVector<BasicBlock *, 4> Work;
+ Work.push_back(CB->getParent());
+
+ do {
+ BasicBlock *Current = Work.pop_back_val();
+ for (BasicBlock *BB : predecessors(Current))
+ if (RelocBlocks.count(BB) == 0) {
+ RelocBlocks.insert(BB);
+ Work.push_back(BB);
+ }
+ } while (!Work.empty());
+ return RelocBlocks;
+}
+
+static SmallPtrSet<Instruction *, 8>
+getNotRelocatableInstructions(CoroBeginInst *CoroBegin,
+ SmallPtrSetImpl<BasicBlock *> &RelocBlocks) {
+ SmallPtrSet<Instruction *, 8> DoNotRelocate;
+ // Collect all instructions that we should not relocate
+ SmallVector<Instruction *, 8> Work;
+
+ // Start with CoroBegin and terminators of all preceding blocks.
+ Work.push_back(CoroBegin);
+ BasicBlock *CoroBeginBB = CoroBegin->getParent();
+ for (BasicBlock *BB : RelocBlocks)
+ if (BB != CoroBeginBB)
+ Work.push_back(BB->getTerminator());
+
+ // For every instruction in the Work list, place its operands in DoNotRelocate
+ // set.
+ do {
+ Instruction *Current = Work.pop_back_val();
+ DoNotRelocate.insert(Current);
+ for (Value *U : Current->operands()) {
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I)
+ continue;
+ if (isa<AllocaInst>(U))
+ continue;
+ if (DoNotRelocate.count(I) == 0) {
+ Work.push_back(I);
+ DoNotRelocate.insert(I);
+ }
+ }
+ } while (!Work.empty());
+ return DoNotRelocate;
+}
+
+static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) {
+ // Analyze which non-alloca instructions are needed for allocation and
+ // relocate the rest to after coro.begin. We need to do it, since some of the
+ // targets of those instructions may be placed into coroutine frame memory
+ // for which becomes available after coro.begin intrinsic.
+
+ auto BlockSet = getCoroBeginPredBlocks(CoroBegin);
+ auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet);
+
+ Instruction *InsertPt = CoroBegin->getNextNode();
+ BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well.
+ for (auto B = BB.begin(), E = BB.end(); B != E;) {
+ Instruction &I = *B++;
+ if (isa<AllocaInst>(&I))
+ continue;
+ if (&I == CoroBegin)
+ break;
+ if (DoNotRelocateSet.count(&I))
+ continue;
+ I.moveBefore(InsertPt);
+ }
+}
+
static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
coro::Shape Shape(F);
if (!Shape.CoroBegin)
return;
simplifySuspendPoints(Shape);
+ relocateInstructionBefore(Shape.CoroBegin, F);
buildCoroutineFrame(F, Shape);
replaceFrameSize(Shape);
@@ -660,6 +727,7 @@ struct CoroSplit : public CallGraphSCCPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
CallGraphSCCPass::getAnalysisUsage(AU);
}
+ StringRef getPassName() const override { return "Coroutine Splitting"; }
};
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 8dff2fb3be8a..4c417f1c55eb 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -558,17 +558,17 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
std::vector<User *> Users(DuplicateFunction->user_begin(),
DuplicateFunction->user_end());
Function *CurrentCaller = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> TempBFI;
BlockFrequencyInfo *CurrentCallerBFI = nullptr;
auto ComputeCurrBFI = [&,this](Function *Caller) {
// For the old pass manager:
if (!GetBFI) {
- if (CurrentCallerBFI)
- delete CurrentCallerBFI;
DominatorTree DT(*Caller);
LoopInfo LI(DT);
BranchProbabilityInfo BPI(*Caller, LI);
- CurrentCallerBFI = new BlockFrequencyInfo(*Caller, BPI, LI);
+ TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
+ CurrentCallerBFI = TempBFI.get();
} else {
// New pass manager:
CurrentCallerBFI = &(*GetBFI)(*Caller);
@@ -591,10 +591,6 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
else
CallSiteToProfCountMap[User] = 0;
}
- if (!GetBFI) {
- if (CurrentCallerBFI)
- delete CurrentCallerBFI;
- }
}
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index ec06d5f9fb05..9fd3a9021a27 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -155,6 +155,10 @@ static cl::opt<bool>
cl::Hidden,
cl::desc("Enable the simple loop unswitch pass."));
+static cl::opt<bool> EnableGVNSink(
+ "enable-gvn-sink", cl::init(false), cl::Hidden,
+ cl::desc("Enable the GVN sinking pass (default = on)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -307,6 +311,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
+ if (EnableGVNSink) {
+ MPM.add(createGVNSinkPass());
+ MPM.add(createCFGSimplificationPass());
+ }
+
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
MPM.add(createJumpThreadingPass()); // Thread jumps.
@@ -904,6 +913,12 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel != 0)
addLTOOptimizationPasses(PM);
+ else {
+ // The whole-program-devirt pass needs to run at -O0 because only it knows
+ // about the llvm.type.checked.load intrinsic: it needs to both lower the
+ // intrinsic itself and handle it in the summary.
+ PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
+ }
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 733eeb1767a3..7204bf517681 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -861,12 +861,9 @@ bool InstCombiner::willNotOverflowSignedSub(const Value *LHS,
ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
- unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- KnownBits LHSKnown(BitWidth);
- computeKnownBits(LHS, LHSKnown, 0, &CxtI);
+ KnownBits LHSKnown = computeKnownBits(LHS, 0, &CxtI);
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(RHS, RHSKnown, 0, &CxtI);
+ KnownBits RHSKnown = computeKnownBits(RHS, 0, &CxtI);
// Subtraction of two 2's complement numbers having identical signs will
// never overflow.
@@ -1059,9 +1056,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// If this is a xor that was canonicalized from a sub, turn it back into
// a sub and fuse this add with it.
if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
- IntegerType *IT = cast<IntegerType>(I.getType());
- KnownBits LHSKnown(IT->getBitWidth());
- computeKnownBits(XorLHS, LHSKnown, 0, &I);
+ KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I);
if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue())
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
@@ -1577,8 +1572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
if (Op0C->isMask()) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(Op1, RHSKnown, 0, &I);
+ KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
if ((*Op0C | RHSKnown.Zero).isAllOnesValue())
return BinaryOperator::CreateXor(Op1, Op0);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4227b2d01be8..1f8319efb3be 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1610,17 +1610,13 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Value *Mask = nullptr;
Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI,
- &DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI,
- &DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, CxtI) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, CxtI)) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC,
- CxtI, &DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC,
- CxtI, &DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, CxtI) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, CxtI)) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index face7abcc95f..92a38f26dde7 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1378,9 +1378,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
if (!IT)
return nullptr;
- unsigned BitWidth = IT->getBitWidth();
- KnownBits Known(BitWidth);
- IC.computeKnownBits(Op0, Known, 0, &II);
+ KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
@@ -1401,7 +1399,9 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
// If the input to cttz/ctlz is known to be non-zero,
// then change the 'ZeroIsUndef' parameter to 'true'
// because we know the zero behavior can't affect the result.
- if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
+ if (Known.One != 0 ||
+ isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
+ &IC.getDominatorTree())) {
if (!match(II.getArgOperand(1), m_One())) {
II.setOperand(1, IC.Builder->getTrue());
return &II;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f4bf5221f6a2..766939c56dff 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -692,8 +692,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// This only works for EQ and NE
ICI->isEquality()) {
// If Op1C some other power of two, convert:
- KnownBits Known(Op1C->getType()->getBitWidth());
- computeKnownBits(ICI->getOperand(0), Known, 0, &CI);
+ KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI);
APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -737,14 +736,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// may lead to additional simplifications.
if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
- uint32_t BitWidth = ITy->getBitWidth();
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
- KnownBits KnownLHS(BitWidth);
- KnownBits KnownRHS(BitWidth);
- computeKnownBits(LHS, KnownLHS, 0, &CI);
- computeKnownBits(RHS, KnownRHS, 0, &CI);
+ KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI);
+ KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI);
if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
@@ -1063,9 +1059,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
// the icmp and sext into bitwise/integer operations.
if (ICI->hasOneUse() &&
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
- unsigned BitWidth = Op1C->getType()->getBitWidth();
- KnownBits Known(BitWidth);
- computeKnownBits(Op0, Known, 0, &CI);
+ KnownBits Known = computeKnownBits(Op0, 0, &CI);
APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) {
@@ -1104,7 +1098,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
// Distribute the bit over the whole bit width.
In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
- BitWidth - 1), "sext");
+ KnownZeroMask.getBitWidth() - 1), "sext");
}
if (CI.getType() == In->getType())
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6492eaedae9c..2c2b7317a1c0 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1402,9 +1402,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
if (*C == 0 && Pred == ICmpInst::ICMP_SGT) {
SelectPatternResult SPR = matchSelectPattern(X, A, B);
if (SPR.Flavor == SPF_SMIN) {
- if (isKnownPositive(A, DL))
+ if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
- if (isKnownPositive(B, DL))
+ if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, A, Cmp.getOperand(1));
}
}
@@ -1478,8 +1478,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
// of the high bits truncated out of x are known.
unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
SrcBits = X->getType()->getScalarSizeInBits();
- KnownBits Known(SrcBits);
- computeKnownBits(X, Known, 0, &Cmp);
+ KnownBits Known = computeKnownBits(X, 0, &Cmp);
// If all the high bits are known, we can do this xform.
if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
@@ -3030,18 +3029,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
break;
case Instruction::Add:
case Instruction::Sub:
- case Instruction::Xor:
+ case Instruction::Xor: {
if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
- // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
- if (CI->getValue().isSignMask()) {
+
+ const APInt *C;
+ if (match(BO0->getOperand(1), m_APInt(C))) {
+ // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
+ if (C->isSignMask()) {
ICmpInst::Predicate NewPred =
I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
}
- if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) {
+ // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
+ if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
ICmpInst::Predicate NewPred =
I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
NewPred = I.getSwappedPredicate(NewPred);
@@ -3049,26 +3051,30 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
}
}
break;
- case Instruction::Mul:
+ }
+ case Instruction::Mul: {
if (!I.isEquality())
break;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
- // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
- // Mask = -1 >> count-trailing-zeros(Cst).
- if (!CI->isZero() && !CI->isOne()) {
- const APInt &AP = CI->getValue();
- ConstantInt *Mask = ConstantInt::get(
- I.getContext(),
- APInt::getLowBitsSet(AP.getBitWidth(),
- AP.getBitWidth() - AP.countTrailingZeros()));
+ const APInt *C;
+ if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) {
+ // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
+ // Mask = -1 >> count-trailing-zeros(C).
+ if (unsigned TZs = C->countTrailingZeros()) {
+ Constant *Mask = ConstantInt::get(
+ BO0->getType(),
+ APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
return new ICmpInst(Pred, And1, And2);
}
+ // If there are no trailing zeros in the multiplier, just eliminate
+ // the multiplies (no masking is needed):
+ // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
+ return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
}
break;
-
+ }
case Instruction::UDiv:
case Instruction::LShr:
if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
@@ -4497,7 +4503,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// if A is a power of 2.
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Zero()) &&
- isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality())
+ isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
return new ICmpInst(I.getInversePredicate(),
Builder->CreateAnd(A, B),
Op1);
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 6829be86885b..56f133de3de1 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -540,6 +540,12 @@ public:
return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT);
}
+ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false,
+ unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) {
+ return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT);
+ }
+
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
const Instruction *CxtI = nullptr) const {
return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index fc13854f8fe7..4d408359eeea 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -47,9 +47,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
// inexact. Similarly for <<.
BinaryOperator *I = dyn_cast<BinaryOperator>(V);
if (I && I->isLogicalShift() &&
- isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
- &IC.getAssumptionCache(), &CxtI,
- &IC.getDominatorTree())) {
+ IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, &CxtI)) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) {
@@ -1240,7 +1238,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BO;
}
- if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1487,7 +1485,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
- if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 219effce7ba5..b40d067b2817 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -44,7 +44,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
Value *A;
Constant *C;
if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C))))
- if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL))
+ if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) &&
+ isKnownNonNegative(C, DL, 0, &AC, &I, &DT))
return BinaryOperator::Create(
I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A);
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 4028a92771a4..5df55f01b83f 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -158,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
Depth + 1))
return I;
- assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
- assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
// Output known-0 are known to be clear if zero in either the LHS | RHS.
APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
@@ -192,8 +192,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
Depth + 1))
return I;
- assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
- assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
// Output known-0 bits are only known if clear in both the LHS & RHS.
APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
@@ -224,8 +224,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
return I;
- assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
- assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
@@ -313,8 +313,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
return I;
- assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
- assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
@@ -325,15 +325,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.One = RHSKnown.One & LHSKnown.One;
Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
break;
+ case Instruction::ZExt:
case Instruction::Trunc: {
- unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask = DemandedMask.zext(truncBf);
- Known = Known.zext(truncBf);
- if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
+ unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
+ KnownBits InputKnown(SrcBitWidth);
+ if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
return I;
- DemandedMask = DemandedMask.trunc(BitWidth);
- Known = Known.trunc(BitWidth);
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ Known = Known.zextOrTrunc(BitWidth);
+ // Any top bits are known to be zero.
+ if (BitWidth > SrcBitWidth)
+ Known.Zero.setBitsFrom(SrcBitWidth);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case Instruction::BitCast:
@@ -355,56 +359,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
- break;
- case Instruction::ZExt: {
- // Compute the bits in the result that are not present in the input.
- unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-
- DemandedMask = DemandedMask.trunc(SrcBitWidth);
- Known = Known.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
- return I;
- DemandedMask = DemandedMask.zext(BitWidth);
- Known = Known.zext(BitWidth);
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
- // The top bits are known to be zero.
- Known.Zero.setBitsFrom(SrcBitWidth);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
- }
case Instruction::SExt: {
// Compute the bits in the result that are not present in the input.
- unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+ unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
- APInt InputDemandedBits = DemandedMask &
- APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+ APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
- APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth));
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- if ((NewBits & DemandedMask) != 0)
+ if (DemandedMask.getActiveBits() > SrcBitWidth)
InputDemandedBits.setBit(SrcBitWidth-1);
- InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
- Known = Known.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1))
+ KnownBits InputKnown(SrcBitWidth);
+ if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1))
return I;
- InputDemandedBits = InputDemandedBits.zext(BitWidth);
- Known = Known.zext(BitWidth);
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
-
- // If the sign bit of the input is known set or clear, then we know the
- // top bits of the result.
// If the input sign bit is known zero, or if the NewBits are not demanded
// convert this into a zero extension.
- if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
- // Convert to ZExt cast
+ if (InputKnown.isNonNegative() ||
+ DemandedMask.getActiveBits() <= SrcBitWidth) {
+ // Convert to ZExt cast.
CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
return InsertNewInstWith(NewCast, *I);
- } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set
- Known.One |= NewBits;
- }
+ }
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ Known = InputKnown.sext(BitWidth);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case Instruction::Add:
@@ -467,7 +451,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShiftAmt;
Known.One <<= ShiftAmt;
// low bits known zero.
@@ -491,7 +475,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShiftAmt);
Known.One.lshrInPlace(ShiftAmt);
if (ShiftAmt)
@@ -535,7 +519,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
Known.Zero.lshrInPlace(ShiftAmt);
@@ -590,7 +574,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
Known.One |= ~LowBits;
- assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7ed9fd566b37..2730afc5c5b9 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1963,6 +1963,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
// Give up the moment we see something we can't handle.
return false;
+ case Instruction::AddrSpaceCast:
case Instruction::BitCast:
case Instruction::GetElementPtr:
Users.emplace_back(I);
@@ -2064,7 +2065,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
replaceInstUsesWith(*C,
ConstantInt::get(Type::getInt1Ty(C->getContext()),
C->isFalseWhenEqual()));
- } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<AddrSpaceCastInst>(I)) {
replaceInstUsesWith(*I, UndefValue::get(I->getType()));
}
eraseInstFromFunction(*I);
@@ -2180,8 +2182,7 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
// There might be assume intrinsics dominating this return that completely
// determine the value. If so, constant fold it.
- KnownBits Known(VTy->getPrimitiveSizeInBits());
- computeKnownBits(ResultOp, Known, 0, &RI);
+ KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
if (Known.isConstant())
RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant()));
@@ -2242,9 +2243,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
return &SI;
}
- unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
- KnownBits Known(BitWidth);
- computeKnownBits(Cond, Known, 0, &SI);
+ KnownBits Known = computeKnownBits(Cond, 0, &SI);
unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
@@ -2257,12 +2256,12 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
}
- unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes);
+ unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
// Shrink the condition operand if the new type is smaller than the old type.
// This may produce a non-standard type for the switch, but that's ok because
// the backend should extend back to a legal type for the target.
- if (NewWidth > 0 && NewWidth < BitWidth) {
+ if (NewWidth > 0 && NewWidth < Known.getBitWidth()) {
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder->SetInsertPoint(&SI);
Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc");
@@ -2841,9 +2840,7 @@ bool InstCombiner::run() {
// a value even when the operands are not all constants.
Type *Ty = I->getType();
if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
- unsigned BitWidth = Ty->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- computeKnownBits(I, Known, /*Depth*/0, I);
+ KnownBits Known = computeKnownBits(I, /*Depth*/0, I);
if (Known.isConstant()) {
Constant *C = ConstantInt::get(Ty, Known.getConstant());
DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 990bcec109de..1e30dbf6b55a 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -180,7 +180,7 @@ static cl::opt<bool>
static cl::opt<bool>
PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
cl::desc("Use this option to turn on/off "
- "memory instrinsic size profiling."));
+ "memory intrinsic size profiling."));
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 4bc0a7133118..300085eccb0c 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -401,7 +401,10 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
if (Options.NoPrune || &F.getEntryBlock() == BB)
return true;
- return !(isFullDominator(BB, DT) || isFullPostDominator(BB, PDT));
+ // Do not instrument full dominators, or full post-dominators with multiple
+ // predecessors.
+ return !isFullDominator(BB, DT)
+ && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
}
bool SanitizerCoverageModule::runOnFunction(Function &F) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 523390758769..f5196cc46181 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_library(LLVMScalarOpts
GuardWidening.cpp
GVN.cpp
GVNHoist.cpp
+ GVNSink.cpp
IVUsersPrinter.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index f62e111460ca..c3810366bf22 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -164,9 +164,9 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
/// \brief Given \p BBs as input, find another set of BBs which collectively
/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
/// set found in \p BBs.
-void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
- BasicBlock *Entry,
- SmallPtrSet<BasicBlock *, 8> &BBs) {
+static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
+ BasicBlock *Entry,
+ SmallPtrSet<BasicBlock *, 8> &BBs) {
assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
// Nodes on the current path to the root.
SmallPtrSet<BasicBlock *, 8> Path;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0490d93f6455..0d6e0538261d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
struct llvm::GVN::Expression {
uint32_t opcode;
Type *type;
+ bool commutative;
SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o) {}
+ Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
@@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
+ e.commutative = true;
}
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (C->getOpcode() << 8) | Predicate;
+ e.commutative = true;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (Opcode << 8) | Predicate;
+ e.commutative = true;
return e;
}
@@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
+ if (PHINode *PN = dyn_cast<PHINode>(V))
+ NumberingPhi[num] = PN;
}
uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) {
- e = nextValueNumber++;
- valueNumbering[C] = e;
- return e;
+ auto ValNum = assignExpNewValueNum(exp);
+ if (ValNum.second) {
+ valueNumbering[C] = ValNum.first;
+ return ValNum.first;
}
if (!MD) {
- e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
}
@@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
+ case Instruction::PHI:
+ valueNumbering[V] = nextValueNumber;
+ NumberingPhi[nextValueNumber] = cast<PHINode>(V);
+ return nextValueNumber++;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[V] = e;
return e;
}
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
- assert(VI != valueNumbering.end() && "Value not numbered?");
- return VI->second;
+ if (Verify) {
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+ return (VI != valueNumbering.end()) ? VI->second : 0;
}
/// Returns the value number of the given comparison,
@@ -549,21 +559,29 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
- return e;
+ return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
+ NumberingPhi.clear();
+ PhiTranslateTable.clear();
+ BlockRPONumber.clear();
nextValueNumber = 1;
+ Expressions.clear();
+ ExprIdx.clear();
+ nextExprNumber = 0;
}
/// Remove a value from the value numbering.
void GVN::ValueTable::erase(Value *V) {
+ uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
+ // If V is PHINode, V <--> value number is an one-to-one mapping.
+ if (isa<PHINode>(V))
+ NumberingPhi.erase(Num);
}
/// verifyRemoved - Verify that the value is removed from all internal data
@@ -1451,6 +1469,104 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
+/// Return a pair the first field showing the value number of \p Exp and the
+/// second field showing whether it is a value number newly created.
+std::pair<uint32_t, bool>
+GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+ uint32_t &e = expressionNumbering[Exp];
+ bool CreateNewValNum = !e;
+ if (CreateNewValNum) {
+ Expressions.push_back(Exp);
+ if (ExprIdx.size() < nextValueNumber + 1)
+ ExprIdx.resize(nextValueNumber * 2);
+ e = nextValueNumber;
+ ExprIdx[nextValueNumber++] = nextExprNumber++;
+ }
+ return {e, CreateNewValNum};
+}
+
+void GVN::ValueTable::assignBlockRPONumber(Function &F) {
+ uint32_t NextBlockNumber = 1;
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ for (BasicBlock *BB : RPOT)
+ BlockRPONumber[BB] = NextBlockNumber++;
+}
+
+/// Return whether all the values related with the same \p num are
+/// defined in \p BB.
+bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVN &Gvn) {
+ LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+ while (Vals && Vals->BB == BB)
+ Vals = Vals->Next;
+ return !Vals;
+}
+
+/// Wrap phiTranslateImpl to provide caching functionality.
+uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock, uint32_t Num,
+ GVN &Gvn) {
+ auto FindRes = PhiTranslateTable.find({Num, Pred});
+ if (FindRes != PhiTranslateTable.end())
+ return FindRes->second;
+ uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
+ PhiTranslateTable.insert({{Num, Pred}, NewNum});
+ return NewNum;
+}
+
+/// Translate value number \p Num using phis, so that it has the values of
+/// the phis in BB.
+uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVN &Gvn) {
+ if (PHINode *PN = NumberingPhi[Num]) {
+ if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock])
+ return Num;
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
+ if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
+ return TransVal;
+ }
+ return Num;
+ }
+
+ // If there is any value related with Num is defined in a BB other than
+ // PhiBlock, it cannot depend on a phi in PhiBlock without going through
+ // a backedge. We can do an early exit in that case to save compile time.
+ if (!areAllValsInBB(Num, PhiBlock, Gvn))
+ return Num;
+
+ if (ExprIdx[Num] == 0 || Num >= ExprIdx.size())
+ return Num;
+ Expression Exp = Expressions[ExprIdx[Num]];
+
+ for (unsigned i = 0; i < Exp.varargs.size(); i++) {
+ // For InsertValue and ExtractValue, some varargs are index numbers
+ // instead of value numbers. Those index numbers should not be
+ // translated.
+ if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
+ (i > 0 && Exp.opcode == Instruction::ExtractValue))
+ continue;
+ Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
+ }
+
+ if (Exp.commutative) {
+ assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+ if (Exp.varargs[0] > Exp.varargs[1]) {
+ std::swap(Exp.varargs[0], Exp.varargs[1]);
+ uint32_t Opcode = Exp.opcode >> 8;
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
+ Exp.opcode = (Opcode << 8) |
+ CmpInst::getSwappedPredicate(
+ static_cast<CmpInst::Predicate>(Exp.opcode & 255));
+ }
+ }
+
+ if (uint32_t NewNum = expressionNumbering[Exp])
+ return NewNum;
+ return Num;
+}
+
// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
@@ -1856,6 +1972,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
+ VN.assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -1945,7 +2062,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
success = false;
break;
}
- if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+ uint32_t TValNo =
+ VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this);
+ if (Value *V = findLeader(Pred, TValNo)) {
Instr->setOperand(i, V);
} else {
success = false;
@@ -1962,10 +2081,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Instr->insertBefore(Pred->getTerminator());
Instr->setName(Instr->getName() + ".pre");
Instr->setDebugLoc(Instr->getDebugLoc());
- VN.add(Instr, ValNo);
+
+ unsigned Num = VN.lookupOrAdd(Instr);
+ VN.add(Instr, Num);
// Update the availability map to include the new instruction.
- addToLeaderTable(ValNo, Instr, Pred);
+ addToLeaderTable(Num, Instr, Pred);
return true;
}
@@ -2014,7 +2135,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
break;
}
- Value *predV = findLeader(P, ValNo);
+ uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
+ Value *predV = findLeader(P, TValNo);
if (!predV) {
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
new file mode 100644
index 000000000000..5c75f39e381d
--- /dev/null
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -0,0 +1,872 @@
+//===- GVNSink.cpp - sink expressions into successors -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file GVNSink.cpp
+/// This pass attempts to sink instructions into successors, reducing static
+/// instruction count and enabling if-conversion.
+///
+/// We use a variant of global value numbering to decide what can be sunk.
+/// Consider:
+///
+/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
+/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
+/// \ /
+/// [ %e = phi i32 %a2, %c2 ]
+/// [ add i32 %e, 4 ]
+///
+///
+/// GVN would number %a1 and %c1 differently because they compute different
+/// results - the VN of an instruction is a function of its opcode and the
+/// transitive closure of its operands. This is the key property for hoisting
+/// and CSE.
+///
+/// What we want when sinking however is for a numbering that is a function of
+/// the *uses* of an instruction, which allows us to answer the question "if I
+/// replace %a1 with %c1, will it contribute in an equivalent way to all
+/// successive instructions?". The PostValueTable class in GVN provides this
+/// mapping.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <unordered_set>
+using namespace llvm;
+
+#define DEBUG_TYPE "gvn-sink"
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+
+static bool isMemoryInst(const Instruction *I) {
+ return isa<LoadInst>(I) || isa<StoreInst>(I) ||
+ (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
+ (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
+}
+
+/// Iterates through instructions in a set of blocks in reverse order from the
+/// first non-terminator. For example (assume all blocks have size n):
+/// LockstepReverseIterator I([B1, B2, B3]);
+/// *I-- = [B1[n], B2[n], B3[n]];
+/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+/// ...
+///
+/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
+/// to
+/// determine which blocks are still going and the order they appear in the
+/// list returned by operator*.
+class LockstepReverseIterator {
+ ArrayRef<BasicBlock *> Blocks;
+ SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
+ SmallVector<Instruction *, 4> Insts;
+ bool Fail;
+
+public:
+ LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
+ reset();
+ }
+
+ void reset() {
+ Fail = false;
+ ActiveBlocks.clear();
+ for (BasicBlock *BB : Blocks)
+ ActiveBlocks.insert(BB);
+ Insts.clear();
+ for (BasicBlock *BB : Blocks) {
+ if (BB->size() <= 1) {
+ // Block wasn't big enough - only contained a terminator.
+ ActiveBlocks.erase(BB);
+ continue;
+ }
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ }
+ if (Insts.empty())
+ Fail = true;
+ }
+
+ bool isValid() const { return !Fail; }
+ ArrayRef<Instruction *> operator*() const { return Insts; }
+ SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
+
+ void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
+ for (auto II = Insts.begin(); II != Insts.end();) {
+ if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
+ Blocks.end()) {
+ ActiveBlocks.erase((*II)->getParent());
+ II = Insts.erase(II);
+ } else {
+ ++II;
+ }
+ }
+ }
+
+ void operator--() {
+ if (Fail)
+ return;
+ SmallVector<Instruction *, 4> NewInsts;
+ for (auto *Inst : Insts) {
+ if (Inst == &Inst->getParent()->front())
+ ActiveBlocks.erase(Inst->getParent());
+ else
+ NewInsts.push_back(Inst->getPrevNode());
+ }
+ if (NewInsts.empty()) {
+ Fail = true;
+ return;
+ }
+ Insts = NewInsts;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+/// Candidate solution for sinking. There may be different ways to
+/// sink instructions, differing in the number of instructions sunk,
+/// the number of predecessors sunk from and the number of PHIs
+/// required.
+struct SinkingInstructionCandidate {
+ unsigned NumBlocks;
+ unsigned NumInstructions;
+ unsigned NumPHIs;
+ unsigned NumMemoryInsts;
+ int Cost = -1;
+ SmallVector<BasicBlock *, 4> Blocks;
+
+ void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
+ unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
+ unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
+ Cost = (NumInstructions * (NumBlocks - 1)) -
+ (NumExtraPHIs *
+ NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
+ - SplitEdgeCost;
+ }
+ bool operator>=(const SinkingInstructionCandidate &Other) const {
+ return Cost >= Other.Cost;
+ }
+};
+
+#ifndef NDEBUG
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const SinkingInstructionCandidate &C) {
+ OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+ << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+ return OS;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+/// Describes a PHI node that may or may not exist. These track the PHIs
+/// that must be created if we sunk a sequence of instructions. It provides
+/// a hash function for efficient equality comparisons.
+class ModelledPHI {
+ SmallVector<Value *, 4> Values;
+ SmallVector<BasicBlock *, 4> Blocks;
+
+public:
+ ModelledPHI() {}
+ ModelledPHI(const PHINode *PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
+ Blocks.push_back(PN->getIncomingBlock(I));
+ std::sort(Blocks.begin(), Blocks.end());
+
+ // This assumes the PHI is already well-formed and there aren't conflicting
+ // incoming values for the same block.
+ for (auto *B : Blocks)
+ Values.push_back(PN->getIncomingValueForBlock(B));
+ }
+ /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
+ /// without the same ID.
+ /// \note This is specifically for DenseMapInfo - do not use this!
+ static ModelledPHI createDummy(size_t ID) {
+ ModelledPHI M;
+ M.Values.push_back(reinterpret_cast<Value*>(ID));
+ return M;
+ }
+
+ /// Create a PHI from an array of incoming values and incoming blocks.
+ template <typename VArray, typename BArray>
+ ModelledPHI(const VArray &V, const BArray &B) {
+ std::copy(V.begin(), V.end(), std::back_inserter(Values));
+ std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ }
+
+ /// Create a PHI from [I[OpNum] for I in Insts].
+ template <typename BArray>
+ ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
+ std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ for (auto *I : Insts)
+ Values.push_back(I->getOperand(OpNum));
+ }
+
+ /// Restrict the PHI's contents down to only \c NewBlocks.
+ /// \c NewBlocks must be a subset of \c this->Blocks.
+ void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
+ auto BI = Blocks.begin();
+ auto VI = Values.begin();
+ while (BI != Blocks.end()) {
+ assert(VI != Values.end());
+ if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
+ NewBlocks.end()) {
+ BI = Blocks.erase(BI);
+ VI = Values.erase(VI);
+ } else {
+ ++BI;
+ ++VI;
+ }
+ }
+ assert(Blocks.size() == NewBlocks.size());
+ }
+
+ ArrayRef<Value *> getValues() const { return Values; }
+
+ bool areAllIncomingValuesSame() const {
+ return all_of(Values, [&](Value *V) { return V == Values[0]; });
+ }
+ bool areAllIncomingValuesSameType() const {
+ return all_of(
+ Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
+ }
+ bool areAnyIncomingValuesConstant() const {
+ return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
+ }
+ // Hash functor
+ unsigned hash() const {
+ return (unsigned)hash_combine_range(Values.begin(), Values.end());
+ }
+ bool operator==(const ModelledPHI &Other) const {
+ return Values == Other.Values && Blocks == Other.Blocks;
+ }
+};
+
+template <typename ModelledPHI> struct DenseMapInfo {
+ static inline ModelledPHI &getEmptyKey() {
+ static ModelledPHI Dummy = ModelledPHI::createDummy(0);
+ return Dummy;
+ }
+ static inline ModelledPHI &getTombstoneKey() {
+ static ModelledPHI Dummy = ModelledPHI::createDummy(1);
+ return Dummy;
+ }
+ static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
+ static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
+ return LHS == RHS;
+ }
+};
+
+typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
+
+//===----------------------------------------------------------------------===//
+// ValueTable
+//===----------------------------------------------------------------------===//
+// This is a value number table where the value number is a function of the
+// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
+// that the program would be equivalent if we replaced A with PHI(A, B).
+//===----------------------------------------------------------------------===//
+
+/// A GVN expression describing how an instruction is used. The operands
+/// field of BasicExpression is used to store uses, not operands.
+///
+/// This class also contains fields for discriminators used when determining
+/// equivalence of instructions with sideeffects.
+class InstructionUseExpr : public GVNExpression::BasicExpression {
+ unsigned MemoryUseOrder = -1;
+ bool Volatile = false;
+
+public:
+ InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
+ BumpPtrAllocator &A)
+ : GVNExpression::BasicExpression(I->getNumUses()) {
+ allocateOperands(R, A);
+ setOpcode(I->getOpcode());
+ setType(I->getType());
+
+ for (auto &U : I->uses())
+ op_push_back(U.getUser());
+ std::sort(op_begin(), op_end());
+ }
+ void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
+ void setVolatile(bool V) { Volatile = V; }
+
+ virtual hash_code getHashValue() const {
+ return hash_combine(GVNExpression::BasicExpression::getHashValue(),
+ MemoryUseOrder, Volatile);
+ }
+
+ template <typename Function> hash_code getHashValue(Function MapFn) {
+ hash_code H =
+ hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
+ for (auto *V : operands())
+ H = hash_combine(H, MapFn(V));
+ return H;
+ }
+};
+
+class ValueTable {
+ DenseMap<Value *, uint32_t> ValueNumbering;
+ DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+ DenseMap<size_t, uint32_t> HashNumbering;
+ BumpPtrAllocator Allocator;
+ ArrayRecycler<Value *> Recycler;
+ uint32_t nextValueNumber;
+
+ /// Create an expression for I based on its opcode and its uses. If I
+ /// touches or reads memory, the expression is also based upon its memory
+ /// order - see \c getMemoryUseOrder().
+ InstructionUseExpr *createExpr(Instruction *I) {
+ InstructionUseExpr *E =
+ new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
+ if (isMemoryInst(I))
+ E->setMemoryUseOrder(getMemoryUseOrder(I));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ CmpInst::Predicate Predicate = C->getPredicate();
+ E->setOpcode((C->getOpcode() << 8) | Predicate);
+ }
+ return E;
+ }
+
+ /// Helper to compute the value number for a memory instruction
+ /// (LoadInst/StoreInst), including checking the memory ordering and
+ /// volatility.
+ template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
+ if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
+ return nullptr;
+ InstructionUseExpr *E = createExpr(I);
+ E->setVolatile(I->isVolatile());
+ return E;
+ }
+
+public:
+ /// Returns the value number for the specified value, assigning
+ /// it a new number if it did not have one before.
+ uint32_t lookupOrAdd(Value *V) {
+ auto VI = ValueNumbering.find(V);
+ if (VI != ValueNumbering.end())
+ return VI->second;
+
+ if (!isa<Instruction>(V)) {
+ ValueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ InstructionUseExpr *exp = nullptr;
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ exp = createMemoryExpr(cast<LoadInst>(I));
+ break;
+ case Instruction::Store:
+ exp = createMemoryExpr(cast<StoreInst>(I));
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::Select:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::InsertValue:
+ case Instruction::GetElementPtr:
+ exp = createExpr(I);
+ break;
+ default:
+ break;
+ }
+
+ if (!exp) {
+ ValueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t e = ExpressionNumbering[exp];
+ if (!e) {
+ hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
+ auto I = HashNumbering.find(H);
+ if (I != HashNumbering.end()) {
+ e = I->second;
+ } else {
+ e = nextValueNumber++;
+ HashNumbering[H] = e;
+ ExpressionNumbering[exp] = e;
+ }
+ }
+ ValueNumbering[V] = e;
+ return e;
+ }
+
+ /// Returns the value number of the specified value. Fails if the value has
+ /// not yet been numbered.
+ uint32_t lookup(Value *V) const {
+ auto VI = ValueNumbering.find(V);
+ assert(VI != ValueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+
+ /// Removes all value numberings and resets the value table.
+ void clear() {
+ ValueNumbering.clear();
+ ExpressionNumbering.clear();
+ HashNumbering.clear();
+ Recycler.clear(Allocator);
+ nextValueNumber = 1;
+ }
+
+ ValueTable() : nextValueNumber(1) {}
+
+ /// \c Inst uses or touches memory. Return an ID describing the memory state
+ /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
+ /// the exact same memory operations happen after I1 and I2.
+ ///
+ /// This is a very hard problem in general, so we use domain-specific
+ /// knowledge that we only ever check for equivalence between blocks sharing a
+ /// single immediate successor that is common, and when determining if I1 ==
+ /// I2 we will have already determined that next(I1) == next(I2). This
+ /// inductive property allows us to simply return the value number of the next
+ /// instruction that defines memory.
+ uint32_t getMemoryUseOrder(Instruction *Inst) {
+ auto *BB = Inst->getParent();
+ for (auto I = std::next(Inst->getIterator()), E = BB->end();
+ I != E && !I->isTerminator(); ++I) {
+ if (!isMemoryInst(&*I))
+ continue;
+ if (isa<LoadInst>(&*I))
+ continue;
+ CallInst *CI = dyn_cast<CallInst>(&*I);
+ if (CI && CI->onlyReadsMemory())
+ continue;
+ InvokeInst *II = dyn_cast<InvokeInst>(&*I);
+ if (II && II->onlyReadsMemory())
+ continue;
+ return lookupOrAdd(&*I);
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+class GVNSink {
+public:
+ GVNSink() : VN() {}
+ bool run(Function &F) {
+ DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
+
+ unsigned NumSunk = 0;
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (auto *N : RPOT)
+ NumSunk += sinkBB(N);
+
+ return NumSunk > 0;
+ }
+
+private:
+ ValueTable VN;
+
+ bool isInstructionBlacklisted(Instruction *I) {
+ // These instructions may change or break semantics if moved.
+ if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+ I->getType()->isTokenTy())
+ return true;
+ return false;
+ }
+
+ /// The main heuristic function. Analyze the set of instructions pointed to by
+ /// LRI and return a candidate solution if these instructions can be sunk, or
+ /// None otherwise.
+ Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
+ LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+ ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
+
+ /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
+ void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
+ SmallPtrSetImpl<Value *> &PHIContents) {
+ for (auto &I : *BB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ return;
+
+ auto MPHI = ModelledPHI(PN);
+ PHIs.insert(MPHI);
+ for (auto *V : MPHI.getValues())
+ PHIContents.insert(V);
+ }
+ }
+
+ /// The main instruction sinking driver. Set up state and try and sink
+ /// instructions into BBEnd from its predecessors.
+ unsigned sinkBB(BasicBlock *BBEnd);
+
+ /// Perform the actual mechanics of sinking an instruction from Blocks into
+ /// BBEnd, which is their only successor.
+ void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
+
+ /// Remove PHIs that all have the same incoming value.
+ void foldPointlessPHINodes(BasicBlock *BB) {
+ auto I = BB->begin();
+ while (PHINode *PN = dyn_cast<PHINode>(I++)) {
+ if (!all_of(PN->incoming_values(),
+ [&](const Value *V) { return V == PN->getIncomingValue(0); }))
+ continue;
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+ }
+};
+
+Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
+ LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+ ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
+ auto Insts = *LRI;
+ DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
+ : Insts) {
+ I->dump();
+ } dbgs() << " ]\n";);
+
+ DenseMap<uint32_t, unsigned> VNums;
+ for (auto *I : Insts) {
+ uint32_t N = VN.lookupOrAdd(I);
+ DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+ if (N == ~0U)
+ return None;
+ VNums[N]++;
+ }
+ unsigned VNumToSink =
+ std::max_element(VNums.begin(), VNums.end(),
+ [](const std::pair<uint32_t, unsigned> &I,
+ const std::pair<uint32_t, unsigned> &J) {
+ return I.second < J.second;
+ })
+ ->first;
+
+ if (VNums[VNumToSink] == 1)
+ // Can't sink anything!
+ return None;
+
+ // Now restrict the number of incoming blocks down to only those with
+ // VNumToSink.
+ auto &ActivePreds = LRI.getActiveBlocks();
+ unsigned InitialActivePredSize = ActivePreds.size();
+ SmallVector<Instruction *, 4> NewInsts;
+ for (auto *I : Insts) {
+ if (VN.lookup(I) != VNumToSink)
+ ActivePreds.erase(I->getParent());
+ else
+ NewInsts.push_back(I);
+ }
+ for (auto *I : NewInsts)
+ if (isInstructionBlacklisted(I))
+ return None;
+
+ // If we've restricted the incoming blocks, restrict all needed PHIs also
+ // to that set.
+ bool RecomputePHIContents = false;
+ if (ActivePreds.size() != InitialActivePredSize) {
+ ModelledPHISet NewNeededPHIs;
+ for (auto P : NeededPHIs) {
+ P.restrictToBlocks(ActivePreds);
+ NewNeededPHIs.insert(P);
+ }
+ NeededPHIs = NewNeededPHIs;
+ LRI.restrictToBlocks(ActivePreds);
+ RecomputePHIContents = true;
+ }
+
+ // The sunk instruction's results.
+ ModelledPHI NewPHI(NewInsts, ActivePreds);
+
+ // Does sinking this instruction render previous PHIs redundant?
+ if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
+ NeededPHIs.erase(NewPHI);
+ RecomputePHIContents = true;
+ }
+
+ if (RecomputePHIContents) {
+ // The needed PHIs have changed, so recompute the set of all needed
+ // values.
+ PHIContents.clear();
+ for (auto &PHI : NeededPHIs)
+ PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+ }
+
+ // Is this instruction required by a later PHI that doesn't match this PHI?
+ // if so, we can't sink this instruction.
+ for (auto *V : NewPHI.getValues())
+ if (PHIContents.count(V))
+ // V exists in this PHI, but the whole PHI is different to NewPHI
+ // (else it would have been removed earlier). We cannot continue
+ // because this isn't representable.
+ return None;
+
+ // Which operands need PHIs?
+ // FIXME: If any of these fail, we should partition up the candidates to
+ // try and continue making progress.
+ Instruction *I0 = NewInsts[0];
+ for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
+ ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
+ if (PHI.areAllIncomingValuesSame())
+ continue;
+ if (!canReplaceOperandWithVariable(I0, OpNum))
+ // We can 't create a PHI from this instruction!
+ return None;
+ if (NeededPHIs.count(PHI))
+ continue;
+ if (!PHI.areAllIncomingValuesSameType())
+ return None;
+ // Don't create indirect calls! The called value is the final operand.
+ if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
+ PHI.areAnyIncomingValuesConstant())
+ return None;
+
+ NeededPHIs.reserve(NeededPHIs.size());
+ NeededPHIs.insert(PHI);
+ PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+ }
+
+ if (isMemoryInst(NewInsts[0]))
+ ++MemoryInstNum;
+
+ SinkingInstructionCandidate Cand;
+ Cand.NumInstructions = ++InstNum;
+ Cand.NumMemoryInsts = MemoryInstNum;
+ Cand.NumBlocks = ActivePreds.size();
+ Cand.NumPHIs = NeededPHIs.size();
+ for (auto *C : ActivePreds)
+ Cand.Blocks.push_back(C);
+
+ return Cand;
+}
+
+unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
+ DEBUG(dbgs() << "GVNSink: running on basic block ";
+ BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
+ SmallVector<BasicBlock *, 4> Preds;
+ for (auto *B : predecessors(BBEnd)) {
+ auto *T = B->getTerminator();
+ if (isa<BranchInst>(T) || isa<SwitchInst>(T))
+ Preds.push_back(B);
+ else
+ return 0;
+ }
+ if (Preds.size() < 2)
+ return 0;
+ std::sort(Preds.begin(), Preds.end());
+
+ unsigned NumOrigPreds = Preds.size();
+ // We can only sink instructions through unconditional branches.
+ for (auto I = Preds.begin(); I != Preds.end();) {
+ if ((*I)->getTerminator()->getNumSuccessors() != 1)
+ I = Preds.erase(I);
+ else
+ ++I;
+ }
+
+ LockstepReverseIterator LRI(Preds);
+ SmallVector<SinkingInstructionCandidate, 4> Candidates;
+ unsigned InstNum = 0, MemoryInstNum = 0;
+ ModelledPHISet NeededPHIs;
+ SmallPtrSet<Value *, 4> PHIContents;
+ analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
+ unsigned NumOrigPHIs = NeededPHIs.size();
+
+ while (LRI.isValid()) {
+ auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
+ NeededPHIs, PHIContents);
+ if (!Cand)
+ break;
+ Cand->calculateCost(NumOrigPHIs, Preds.size());
+ Candidates.emplace_back(*Cand);
+ --LRI;
+ }
+
+ std::stable_sort(
+ Candidates.begin(), Candidates.end(),
+ [](const SinkingInstructionCandidate &A,
+ const SinkingInstructionCandidate &B) { return A >= B; });
+ DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
+ : Candidates) dbgs()
+ << " " << C << "\n";);
+
+ // Pick the top candidate, as long it is positive!
+ if (Candidates.empty() || Candidates.front().Cost <= 0)
+ return 0;
+ auto C = Candidates.front();
+
+ DEBUG(dbgs() << " -- Sinking: " << C << "\n");
+ BasicBlock *InsertBB = BBEnd;
+ if (C.Blocks.size() < NumOrigPreds) {
+ DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
+ dbgs() << "\n");
+ InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
+ if (!InsertBB) {
+ DEBUG(dbgs() << " -- FAILED to split edge!\n");
+ // Edge couldn't be split.
+ return 0;
+ }
+ }
+
+ for (unsigned I = 0; I < C.NumInstructions; ++I)
+ sinkLastInstruction(C.Blocks, InsertBB);
+
+ return C.NumInstructions;
+}
+
+void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
+ BasicBlock *BBEnd) {
+ SmallVector<Instruction *, 4> Insts;
+ for (BasicBlock *BB : Blocks)
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ Instruction *I0 = Insts.front();
+
+ SmallVector<Value *, 4> NewOperands;
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+ bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+ return I->getOperand(O) != I0->getOperand(O);
+ });
+ if (!NeedPHI) {
+ NewOperands.push_back(I0->getOperand(O));
+ continue;
+ }
+
+ // Create a new PHI in the successor block and populate it.
+ auto *Op = I0->getOperand(O);
+ assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+ auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+ Op->getName() + ".sink", &BBEnd->front());
+ for (auto *I : Insts)
+ PN->addIncoming(I->getOperand(O), I->getParent());
+ NewOperands.push_back(PN);
+ }
+
+ // Arbitrarily use I0 as the new "common" instruction; remap its operands
+ // and move it to the start of the successor block.
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ I0->getOperandUse(O).set(NewOperands[O]);
+ I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ // Update metadata and IR flags.
+ for (auto *I : Insts)
+ if (I != I0) {
+ combineMetadataForCSE(I0, I);
+ I0->andIRFlags(I);
+ }
+
+ for (auto *I : Insts)
+ if (I != I0)
+ I->replaceAllUsesWith(I0);
+ foldPointlessPHINodes(BBEnd);
+
+ // Finally nuke all instructions apart from the common instruction.
+ for (auto *I : Insts)
+ if (I != I0)
+ I->eraseFromParent();
+
+ NumRemoved += Insts.size() - 1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Pass machinery / boilerplate
+
+class GVNSinkLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ GVNSinkLegacyPass() : FunctionPass(ID) {
+ initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ GVNSink G;
+ return G.run(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
+ GVNSink G;
+ if (!G.run(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+char GVNSinkLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
+ "Early GVN sinking of Expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
+ "Early GVN sinking of Expressions", false, false)
+
+FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 198d2b2b024f..65a2cd955672 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -537,9 +537,7 @@ bool GuardWideningImpl::parseRangeChecks(
Changed = true;
} else if (match(Check.getBase(),
m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
- unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- computeKnownBits(OpLHS, Known, DL);
+ KnownBits Known = computeKnownBits(OpLHS, DL);
if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
Check.setBase(OpLHS);
APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 85db6e5e1105..e21b0feb7c5a 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1228,7 +1228,12 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
ValueToValueMapTy &VM) {
- Loop &New = LPM.addLoop(Parent);
+ Loop &New = *new Loop();
+ if (Parent)
+ Parent->addChildLoop(&New);
+ else
+ LI.addTopLevelLoop(&New);
+ LPM.addLoop(New);
// Add all of the blocks in Original to the new loop.
for (auto *BB : Original->blocks())
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ada22ae38eb8..2ef8f8563bb9 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -253,6 +253,35 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return EverChanged;
}
+// Replace uses of Cond with ToVal when safe to do so. If all uses are
+// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
+// because we may incorrectly replace uses when guards/assumes are uses of
+// of `Cond` and we used the guards/assume to reason about the `Cond` value
+// at the end of block. RAUW unconditionally replaces all uses
+// including the guards/assumes themselves and the uses before the
+// guard/assume.
+static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
+ assert(Cond->getType() == ToVal->getType());
+ auto *BB = Cond->getParent();
+ // We can unconditionally replace all uses in non-local blocks (i.e. uses
+ // strictly dominated by BB), since LVI information is true from the
+ // terminator of BB.
+ replaceNonLocalUsesWith(Cond, ToVal);
+ for (Instruction &I : reverse(*BB)) {
+ // Reached the Cond whose uses we are trying to replace, so there are no
+ // more uses.
+ if (&I == Cond)
+ break;
+ // We only replace uses in instructions that are guaranteed to reach the end
+ // of BB, where we know Cond is ToVal.
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ I.replaceUsesOfWith(Cond, ToVal);
+ }
+ if (Cond->use_empty() && !Cond->mayHaveSideEffects())
+ Cond->eraseFromParent();
+}
+
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
@@ -833,13 +862,19 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
CondBr->eraseFromParent();
if (CondCmp->use_empty())
CondCmp->eraseFromParent();
- // TODO: We can safely replace *some* uses of the CondInst if it has
+ // We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
// is because we use the guards/assume to reason about the `Cond` value
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
+ else if (CondCmp->getParent() == BB) {
+ auto *CI = Ret == LazyValueInfo::True ?
+ ConstantInt::getTrue(CondCmp->getType()) :
+ ConstantInt::getFalse(CondCmp->getType());
+ ReplaceFoldableUses(CondCmp, CI);
+ }
return true;
}
@@ -1325,13 +1360,16 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
CondInst->eraseFromParent();
- // TODO: We can safely replace *some* uses of the CondInst if it has
+ // We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
// is because we use the guards/assume to reason about the `Cond` value
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
+ else if (OnlyVal && OnlyVal != MultipleVal &&
+ CondInst->getParent() == BB)
+ ReplaceFoldableUses(CondInst, OnlyVal);
}
return true;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 97337ea5ba62..c6a05ecbd0b1 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1035,6 +1035,17 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
return nullptr;
}
+// Check if the recurrence variable `VarX` is in the right form to create
+// the idiom. Returns the value coerced to a PHINode if so.
+static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
+ BasicBlock *LoopEntry) {
+ auto *PhiX = dyn_cast<PHINode>(VarX);
+ if (PhiX && PhiX->getParent() == LoopEntry &&
+ (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
+ return PhiX;
+ return nullptr;
+}
+
/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
@@ -1110,13 +1121,9 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
}
// step 3: Check the recurrence of variable X
- {
- PhiX = dyn_cast<PHINode>(VarX1);
- if (!PhiX ||
- (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
- return false;
- }
- }
+ PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
+ if (!PhiX)
+ return false;
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
@@ -1132,8 +1139,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
+ PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ if (!Phi)
continue;
// Check if the result of the instruction is live of the loop.
@@ -1227,8 +1234,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
VarX = DefX->getOperand(0);
// step 3: Check the recurrence of variable X
- PhiX = dyn_cast<PHINode>(VarX);
- if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX))
+ PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+ if (!PhiX)
return false;
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
@@ -1248,8 +1255,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
+ PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ if (!Phi)
continue;
CntInst = Inst;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 6ef1464e9338..19daebd0613a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -831,7 +831,12 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
- Loop &New = LPM->addLoop(PL);
+ Loop &New = *new Loop();
+ if (PL)
+ PL->addChildLoop(&New);
+ else
+ LI->addTopLevelLoop(&New);
+ LPM->addLoop(New);
// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5cfbf6baeaa9..67abc3116988 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -858,7 +858,14 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
// Filter out unreachable phi operands.
auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
- return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
+ if (*U == PN)
+ return false;
+ if (!ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}))
+ return false;
+ // Things in TOPClass are equivalent to everything.
+ if (ValueToClass.lookup(*U) == TOPClass)
+ return false;
+ return true;
});
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use *U) -> Value * {
@@ -866,14 +873,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
OriginalOpsConstant =
OriginalOpsConstant && isa<Constant>(*U);
- // Use nullptr to distinguish between things that were
- // originally self-defined and those that have an operand
- // leader that is self-defined.
- if (*U == PN)
- return nullptr;
- // Things in TOPClass are equivalent to everything.
- if (ValueToClass.lookup(*U) == TOPClass)
- return nullptr;
return lookupOperandLeader(*U);
});
return E;
@@ -955,6 +954,10 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
CongruenceClass *CC = ValueToClass.lookup(V);
if (CC && CC->getDefiningExpr()) {
+ // If we simplified to something else, we need to communicate
+ // that we're users of the value we simplified to.
+ if (I != V)
+ addAdditionalUsers(V, I);
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " expression " << *CC->getDefiningExpr() << "\n");
@@ -1581,6 +1584,30 @@ bool NewGVN::isCycleFree(const Instruction *I) const {
// Evaluate PHI nodes symbolically, and create an expression result.
const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
+ // Resolve irreducible and reducible phi cycles.
+ // FIXME: This is hopefully a temporary solution while we resolve the issues
+ // with fixpointing self-cycles. It currently should be "guaranteed" to be
+ // correct, but non-optimal. The SCCFinder does not, for example, take
+ // reachability of arguments into account, etc.
+ SCCFinder.Start(I);
+ bool CanOptimize = true;
+ SmallPtrSet<Value *, 8> OuterOps;
+
+ auto &Component = SCCFinder.getComponentFor(I);
+ for (auto *Member : Component) {
+ if (!isa<PHINode>(Member)) {
+ CanOptimize = false;
+ break;
+ }
+ for (auto &PHIOp : cast<PHINode>(Member)->operands())
+ if (!isa<PHINode>(PHIOp) || !Component.count(cast<PHINode>(PHIOp)))
+ OuterOps.insert(PHIOp);
+ }
+ if (CanOptimize && OuterOps.size() == 1) {
+ DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin())
+ << "\n");
+ return createVariableOrConstant(*OuterOps.begin());
+ }
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
@@ -1594,17 +1621,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// See if all arguments are the same.
// We track if any were undef because they need special handling.
bool HasUndef = false;
- bool CycleFree = isCycleFree(I);
auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) {
- if (Arg == nullptr)
- return false;
- // Original self-operands are already eliminated during expression creation.
- // We can only eliminate value-wise self-operands if it's cycle
- // free. Otherwise, eliminating the operand can cause our value to change,
- // which can cause us to not eliminate the operand, which changes the value
- // back to what it was before, cycling forever.
- if (CycleFree && Arg == I)
- return false;
if (isa<UndefValue>(Arg)) {
HasUndef = true;
return false;
@@ -1613,6 +1630,14 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
});
// If we are left with no operands, it's dead.
if (Filtered.begin() == Filtered.end()) {
+ // If it has undef at this point, it means there are no-non-undef arguments,
+ // and thus, the value of the phi node must be undef.
+ if (HasUndef) {
+ DEBUG(dbgs() << "PHI Node " << *I
+ << " has no non-undef arguments, valuing it as undef\n");
+ return createConstantExpression(UndefValue::get(I->getType()));
+ }
+
DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
deleteExpression(E);
return createDeadExpression();
@@ -1642,7 +1667,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// constants, or all operands are ignored but the undef, it also must be
// cycle free.
if (!AllConstant && HasBackedge && NumOps > 0 &&
- !isa<UndefValue>(AllSameValue) && !CycleFree)
+ !isa<UndefValue>(AllSameValue) && !isCycleFree(I))
return E;
// Only have to check for instructions
@@ -3556,6 +3581,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// Map to store the use counts
DenseMap<const Value *, unsigned int> UseCounts;
for (auto *CC : reverse(CongruenceClasses)) {
+ DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n");
// Track the equivalent store info so we can decide whether to try
// dead store elimination.
SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3602,8 +3628,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
}
CC->swap(MembersLeft);
} else {
- DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
- << "\n");
// If this is a singleton, we can skip it.
if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
// This is a stack because equality replacement/etc may place
@@ -3846,6 +3870,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B);
}
+namespace {
class NewGVNLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
@@ -3865,6 +3890,7 @@ private:
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
+} // namespace
bool NewGVNLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 8908dae2f545..1d0e8396f6a2 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1779,8 +1779,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
if (F.hasLocalLinkage()) {
- if (AddressIsTaken(&F))
+ if (F.hasAddressTaken()) {
AddressTakenFunctions.insert(&F);
+ }
else {
Solver.AddArgumentTrackedFunction(&F);
continue;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 24bd0a2b7bdf..6e113bccff94 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -326,7 +326,7 @@ private:
/// partition.
uint64_t BeginOffset, EndOffset;
- /// \brief The start end end iterators of this partition.
+ /// \brief The start and end iterators of this partition.
iterator SI, SJ;
/// \brief A collection of split slice tails overlapping the partition.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 52201d8f3e51..9fa43da99da9 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
+ initializeGVNSinkLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
initializeIndVarSimplifyLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b32a61a7e8f8..0f170e26ce5f 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -123,11 +123,62 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
// exit block.
DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
- // Blocks reachable from the unswitched block may need to change their IDom
- // as well.
+ // For everything that moves up the dominator tree, we need to examine the
+ // dominator frontier to see if it additionally should move up the dominator
+ // tree. This lambda appends the dominator frontier for a node on the
+ // worklist.
+ //
+ // Note that we don't currently use the IDFCalculator here for two reasons:
+ // 1) It computes dominator tree levels for the entire function on each run
+ // of 'compute'. While this isn't terrible, given that we expect to update
+ // relatively small subtrees of the domtree, it isn't necessarily the right
+ // tradeoff.
+ // 2) The interface doesn't fit this usage well. It doesn't operate in
+ // append-only, and builds several sets that we don't need.
+ //
+ // FIXME: Neither of these issues are a big deal and could be addressed with
+ // some amount of refactoring of IDFCalculator. That would allow us to share
+ // the core logic here (which is solving the same core problem).
SmallSetVector<BasicBlock *, 4> Worklist;
- for (auto *SuccBB : successors(UnswitchedBB))
- Worklist.insert(SuccBB);
+ SmallVector<DomTreeNode *, 4> DomNodes;
+ SmallPtrSet<BasicBlock *, 4> DomSet;
+ auto AppendDomFrontier = [&](DomTreeNode *Node) {
+ assert(DomNodes.empty() && "Must start with no dominator nodes.");
+ assert(DomSet.empty() && "Must start with an empty dominator set.");
+
+ // First flatten this subtree into sequence of nodes by doing a pre-order
+ // walk.
+ DomNodes.push_back(Node);
+ // We intentionally re-evaluate the size as each node can add new children.
+ // Because this is a tree walk, this cannot add any duplicates.
+ for (int i = 0; i < (int)DomNodes.size(); ++i)
+ DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
+
+ // Now create a set of the basic blocks so we can quickly test for
+ // dominated successors. We could in theory use the DFS numbers of the
+ // dominator tree for this, but we want this to remain predictably fast
+ // even while we mutate the dominator tree in ways that would invalidate
+ // the DFS numbering.
+ for (DomTreeNode *InnerN : DomNodes)
+ DomSet.insert(InnerN->getBlock());
+
+ // Now re-walk the nodes, appending every successor of every node that isn't
+ // in the set. Note that we don't append the node itself, even though if it
+ // is a successor it does not strictly dominate itself and thus it would be
+ // part of the dominance frontier. The reason we don't append it is that
+ // the node passed in came *from* the worklist and so it has already been
+ // processed.
+ for (DomTreeNode *InnerN : DomNodes)
+ for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
+ if (!DomSet.count(SuccBB))
+ Worklist.insert(SuccBB);
+
+ DomNodes.clear();
+ DomSet.clear();
+ };
+
+ // Append the initial dom frontier nodes.
+ AppendDomFrontier(UnswitchedNode);
// Walk the worklist. We grow the list in the loop and so must recompute size.
for (int i = 0; i < (int)Worklist.size(); ++i) {
@@ -136,20 +187,17 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
DomTreeNode *Node = DT[BB];
assert(!DomChain.count(Node) &&
"Cannot be dominated by a block you can reach!");
- // If this block doesn't have an immediate dominator somewhere in the chain
- // we hoisted over, then its position in the domtree hasn't changed. Either
- // it is above the region hoisted and still valid, or it is below the
- // hoisted block and so was trivially updated. This also applies to
- // everything reachable from this block so we're completely done with the
- // it.
+
+ // If this block had an immediate dominator somewhere in the chain
+ // we hoisted over, then its position in the domtree needs to move as it is
+ // reachable from a node hoisted over this chain.
if (!DomChain.count(Node->getIDom()))
continue;
- // We need to change the IDom for this node but also walk its successors
- // which could have similar dominance position.
DT.changeImmediateDominator(Node, OldPHNode);
- for (auto *SuccBB : successors(BB))
- Worklist.insert(SuccBB);
+
+ // Now add this node's dominator frontier to the worklist as well.
+ AppendDomFrontier(Node);
}
}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index bf2ab7c55be2..1ec3d0d49637 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -133,7 +133,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
auto *SP = cast<DISubprogram>(MD.second);
NewMD = DISubprogram::getDistinct(
NewFunc->getContext(), SP->getScope(), SP->getName(),
- NewFunc->getName(), SP->getFile(), SP->getLine(), SP->getType(),
+ SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(),
SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(),
SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(),
SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(),
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 73a0b2737e95..57468be9a2a8 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -76,12 +76,14 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
int FunctionComparator::cmpAttrs(const AttributeList L,
const AttributeList R) const {
- if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+ if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
return Res;
- for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
- AttributeList::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
- RE = R.end(i);
+ for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
+ AttributeSet LAS = L.getAttributes(i);
+ AttributeSet RAS = R.getAttributes(i);
+ AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
+ AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
for (; LI != LE && RI != RE; ++LI, ++RI) {
Attribute LA = *LI;
Attribute RA = *RI;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 9cb4762b683c..0ca9f4c484e6 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1397,11 +1397,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
const Optional<uint64_t> &CalleeEntryCount,
const Instruction *TheCall,
- ProfileSummaryInfo *PSI) {
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *CallerBFI) {
if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
return;
Optional<uint64_t> CallSiteCount =
- PSI ? PSI->getProfileCount(TheCall, nullptr) : None;
+ PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
uint64_t CallCount =
std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
CalleeEntryCount.getValue());
@@ -1637,7 +1638,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CalledFunc->front());
updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
- IFI.PSI);
+ IFI.PSI, IFI.CallerBFI);
// Update the profile count of callee.
updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 1ca509472b5f..ebd528bc8ec1 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1037,17 +1037,15 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
const DominatorTree *DT) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
- KnownBits Known(BitWidth);
- computeKnownBits(V, Known, DL, 0, AC, CxtI, DT);
+ KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT);
unsigned TrailZ = Known.countMinTrailingZeros();
// Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
- unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+ unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ);
// LLVM doesn't support alignments larger than this currently.
Align = std::min(Align, +Value::MaximumAlignment);
@@ -1796,6 +1794,23 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
return Count;
}
+unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
+ assert(From->getType() == To->getType());
+ auto *BB = From->getParent();
+ unsigned Count = 0;
+
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *I = cast<Instruction>(U.getUser());
+ if (I->getParent() == BB)
+ continue;
+ U.set(To);
+ ++Count;
+ }
+ return Count;
+}
+
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
DominatorTree &DT,
const BasicBlockEdge &Root) {
@@ -2094,3 +2109,48 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
!F->doesNotAccessMemory())
CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
}
+
+bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
+ // We can't have a PHI with a metadata type.
+ if (I->getOperand(OpIdx)->getType()->isMetadataTy())
+ return false;
+
+ // Early exit.
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return true;
+
+ switch (I->getOpcode()) {
+ default:
+ return true;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ // Many arithmetic intrinsics have no issue taking a
+ // variable, however it's hard to distingish these from
+ // specials such as @llvm.frameaddress that require a constant.
+ if (isa<IntrinsicInst>(I))
+ return false;
+
+ // Constant bundle operands may need to retain their constant-ness for
+ // correctness.
+ if (ImmutableCallSite(I).isBundleOperand(OpIdx))
+ return false;
+ return true;
+ case Instruction::ShuffleVector:
+ // Shufflevector masks are constant.
+ return OpIdx != 2;
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
+ // All operands apart from the first are constant.
+ return OpIdx == 0;
+ case Instruction::Alloca:
+ return false;
+ case Instruction::GetElementPtr:
+ if (OpIdx == 0)
+ return true;
+ gep_type_iterator It = gep_type_begin(I);
+ for (auto E = std::next(It, OpIdx); It != E; ++It)
+ if (It.isStruct())
+ return false;
+ return true;
+ }
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 27f72fcd8bda..1b442a9a264d 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1376,53 +1376,6 @@ HoistTerminator:
return true;
}
-// Is it legal to place a variable in operand \c OpIdx of \c I?
-// FIXME: This should be promoted to Instruction.
-static bool canReplaceOperandWithVariable(const Instruction *I,
- unsigned OpIdx) {
- // We can't have a PHI with a metadata type.
- if (I->getOperand(OpIdx)->getType()->isMetadataTy())
- return false;
-
- // Early exit.
- if (!isa<Constant>(I->getOperand(OpIdx)))
- return true;
-
- switch (I->getOpcode()) {
- default:
- return true;
- case Instruction::Call:
- case Instruction::Invoke:
- // FIXME: many arithmetic intrinsics have no issue taking a
- // variable, however it's hard to distingish these from
- // specials such as @llvm.frameaddress that require a constant.
- if (isa<IntrinsicInst>(I))
- return false;
-
- // Constant bundle operands may need to retain their constant-ness for
- // correctness.
- if (ImmutableCallSite(I).isBundleOperand(OpIdx))
- return false;
-
- return true;
-
- case Instruction::ShuffleVector:
- // Shufflevector masks are constant.
- return OpIdx != 2;
- case Instruction::ExtractValue:
- case Instruction::InsertValue:
- // All operands apart from the first are constant.
- return OpIdx == 0;
- case Instruction::Alloca:
- return false;
- case Instruction::GetElementPtr:
- if (OpIdx == 0)
- return true;
- gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1);
- return It.isSequential();
- }
-}
-
// All instructions in Insts belong to different blocks that all unconditionally
// branch to a common successor. Analyze each instruction and return true if it
// would be possible to sink them into their successor, creating one common
@@ -4368,8 +4321,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
- KnownBits Known(Bits);
- computeKnownBits(Cond, Known, DL, 0, AC, SI);
+ KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
// We can also eliminate cases by determining that their values are outside of
// the limited range of the condition based on how many significant (non-sign)
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 85c9464b5569..49effda5d833 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -466,9 +466,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
}
Value *Offset = GEP->getOperand(2);
- unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
- KnownBits Known(BitWidth);
- computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr);
+ KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
Known.Zero.flipAllBits();
uint64_t ArrSize =
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1dc554bede7e..3b036a6ac430 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2092,6 +2092,10 @@ private:
/// The data is collected per VF.
DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Scalars;
+ /// Holds the instructions (address computations) that are forced to be
+ /// scalarized.
+ DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> ForcedScalars;
+
/// Returns the expected difference in cost from scalarizing the expression
/// feeding a predicated instruction \p PredInst. The instructions to
/// scalarize and their scalar costs are collected in \p ScalarCosts. A
@@ -5086,12 +5090,18 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
bool LoopVectorizationLegality::canVectorize() {
+ // Store the result and return it at the end instead of exiting early, in case
+ // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+ bool Result = true;
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!TheLoop->getLoopPreheader()) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// FIXME: The code is currently dead, since the loop gets sent to
@@ -5101,21 +5111,30 @@ bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->empty()) {
ORE->emit(createMissedAnalysis("NotInnermostLoop")
<< "loop is not the innermost loop");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We must have a single exiting block.
if (!TheLoop->getExitingBlock()) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We only handle bottom-tested loops, i.e. loop in which the condition is
@@ -5124,7 +5143,10 @@ bool LoopVectorizationLegality::canVectorize() {
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// We need to have a loop header.
@@ -5135,28 +5157,28 @@ bool LoopVectorizationLegality::canVectorize() {
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
- return false;
- }
-
- // ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = PSE.getBackedgeTakenCount();
- if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
- ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations")
- << "could not determine number of loop iterations");
- DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// Check if we can vectorize the instructions and CFG in this loop.
if (!canVectorizeInstrs()) {
DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
// Go over each instruction and look at memory deps.
if (!canVectorizeMemory()) {
DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
DEBUG(dbgs() << "LV: We can vectorize this loop"
@@ -5184,13 +5206,17 @@ bool LoopVectorizationLegality::canVectorize() {
<< "Too many SCEV assumptions need to be made and checked "
<< "at runtime");
DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
- return false;
+ if (ORE->allowExtraAnalysis())
+ Result = false;
+ else
+ return false;
}
- // Okay! We can vectorize. At this point we don't have any other mem analysis
+ // Okay! We've done all the tests. If any have failed, return false. Otherwise
+ // we can vectorize, and at this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
// no restrictions.
- return true;
+ return Result;
}
static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
@@ -5554,6 +5580,13 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n");
}
+ // Insert the forced scalars.
+ // FIXME: Currently widenPHIInstruction() often creates a dead vector
+ // induction variable when the PHI user is scalarized.
+ if (ForcedScalars.count(VF))
+ for (auto *I : ForcedScalars.find(VF)->second)
+ Worklist.insert(I);
+
// Expand the worklist by looking through any bitcasts and getelementptr
// instructions we've already identified as scalar. This is similar to the
// expansion step in collectLoopUniforms(); however, here we're only
@@ -7129,11 +7162,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
if (VF > 1 && isProfitableToScalarize(I, VF))
return VectorizationCostTy(InstsToScalarize[VF][I], false);
+ // Forced scalars do not have any scalarization overhead.
+ if (VF > 1 && ForcedScalars.count(VF) &&
+ ForcedScalars.find(VF)->second.count(I))
+ return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
+
Type *VectorTy;
unsigned C = getInstructionCost(I, VF, VectorTy);
+ // Note: Even if all instructions are scalarized, return true if any memory
+ // accesses appear in the loop to get benefits from address folding etc.
bool TypeNotScalarized =
- VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF;
+ VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF;
return VectorizationCostTy(C, TypeNotScalarized);
}
@@ -7208,6 +7248,62 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
setWideningDecision(&I, VF, Decision, Cost);
}
}
+
+ // Make sure that any load of address and any other address computation
+ // remains scalar unless there is gather/scatter support. This avoids
+ // inevitable extracts into address registers, and also has the benefit of
+ // activating LSR more, since that pass can't optimize vectorized
+ // addresses.
+ if (TTI.prefersVectorizedAddressing())
+ return;
+
+ // Start with all scalar pointer uses.
+ SmallPtrSet<Instruction *, 8> AddrDefs;
+ for (BasicBlock *BB : TheLoop->blocks())
+ for (Instruction &I : *BB) {
+ Instruction *PtrDef =
+ dyn_cast_or_null<Instruction>(getPointerOperand(&I));
+ if (PtrDef && TheLoop->contains(PtrDef) &&
+ getWideningDecision(&I, VF) != CM_GatherScatter)
+ AddrDefs.insert(PtrDef);
+ }
+
+ // Add all instructions used to generate the addresses.
+ SmallVector<Instruction *, 4> Worklist;
+ for (auto *I : AddrDefs)
+ Worklist.push_back(I);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ for (auto &Op : I->operands())
+ if (auto *InstOp = dyn_cast<Instruction>(Op))
+ if ((InstOp->getParent() == I->getParent()) && !isa<PHINode>(InstOp) &&
+ AddrDefs.insert(InstOp).second == true)
+ Worklist.push_back(InstOp);
+ }
+
+ for (auto *I : AddrDefs) {
+ if (isa<LoadInst>(I)) {
+ // Setting the desired widening decision should ideally be handled in
+ // by cost functions, but since this involves the task of finding out
+ // if the loaded register is involved in an address computation, it is
+ // instead changed here when we know this is the case.
+ if (getWideningDecision(I, VF) == CM_Widen)
+ // Scalarize a widened load of address.
+ setWideningDecision(I, VF, CM_Scalarize,
+ (VF * getMemoryInstructionCost(I, 1)));
+ else if (auto Group = Legal->getInterleavedAccessGroup(I)) {
+ // Scalarize an interleave group of address loads.
+ for (unsigned I = 0; I < Group->getFactor(); ++I) {
+ if (Instruction *Member = Group->getMember(I))
+ setWideningDecision(Member, VF, CM_Scalarize,
+ (VF * getMemoryInstructionCost(Member, 1)));
+ }
+ }
+ } else
+ // Make sure I gets scalarized and a cost estimate without
+ // scalarization overhead.
+ ForcedScalars[VF].insert(I);
+ }
}
unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
@@ -7216,7 +7312,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
- VectorTy = ToVectorTy(RetTy, VF);
+ VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
@@ -7349,9 +7445,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
} else if (Legal->isUniform(Op2)) {
Op2VK = TargetTransformInfo::OK_UniformValue;
}
- SmallVector<const Value *, 4> Operands(I->operand_values());
- return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
- Op2VK, Op1VP, Op2VP, Operands);
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
+ Op2VK, Op1VP, Op2VP, Operands);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -7374,7 +7471,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
case Instruction::Store:
case Instruction::Load: {
- VectorTy = ToVectorTy(getMemInstValueType(I), VF);
+ unsigned Width = VF;
+ if (Width > 1) {
+ InstWidening Decision = getWideningDecision(I, Width);
+ assert(Decision != CM_Unknown &&
+ "CM decision should be taken at this point");
+ if (Decision == CM_Scalarize)
+ Width = 1;
+ }
+ VectorTy = ToVectorTy(getMemInstValueType(I), Width);
return getMemoryInstructionCost(I, VF);
}
case Instruction::ZExt: