aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-07-05 14:21:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-07-05 14:21:36 +0000
commit1a82d4c088707c791c792f6822f611b47a12bdfe (patch)
tree7c411f9b5d807f7f204fdd16965d8925a82b6d18 /lib/Target
parent3a0822f094b578157263e04114075ad7df81db41 (diff)
downloadsrc-1a82d4c088707c791c792f6822f611b47a12bdfe.tar.gz
src-1a82d4c088707c791c792f6822f611b47a12bdfe.zip
Vendor import of llvm trunk r241361:vendor/llvm/llvm-trunk-r241361
Notes
Notes: svn path=/vendor/llvm/dist/; revision=285163 svn path=/vendor/llvm/llvm-trunk-r241361/; revision=285164; tag=vendor/llvm/llvm-trunk-r241361
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp2
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp2
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h2
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp33
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp157
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h9
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td49
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp22
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td8
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp3
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h2
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp2
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.h2
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp2
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h2
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp20
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h5
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp31
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp14
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h22
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td13
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp143
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h20
-rw-r--r--lib/Target/AMDGPU/AMDKernelCodeT.h121
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp314
-rw-r--r--lib/Target/AMDGPU/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/LLVMBuild.txt4
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp11
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h4
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp22
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h5
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp297
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h77
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/Makefile2
-rw-r--r--lib/Target/AMDGPU/Processors.td15
-rw-r--r--lib/Target/AMDGPU/R600Defines.h4
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp11
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/R600MachineFunctionInfo.h2
-rw-r--r--lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp2
-rw-r--r--lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp2
-rw-r--r--lib/Target/AMDGPU/SIDefines.h31
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp11
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp27
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp60
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h34
-rw-r--r--lib/Target/AMDGPU/Utils/CMakeLists.txt3
-rw-r--r--lib/Target/AMDGPU/Utils/LLVMBuild.txt23
-rw-r--r--lib/Target/AMDGPU/Utils/Makefile16
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARM.td4
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp10
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp14
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMCallingConv.h2
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h4
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp4
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFeatures.h2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp163
-rw-r--r--lib/Target/ARM/ARMISelLowering.h13
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp231
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h2
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h2
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp25
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h5
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp20
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp55
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp2
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.h2
-rw-r--r--lib/Target/BPF/BPFAsmPrinter.cpp2
-rw-r--r--lib/Target/BPF/BPFFrameLowering.h2
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp2
-rw-r--r--lib/Target/BPF/BPFISelLowering.h2
-rw-r--r--lib/Target/BPF/BPFInstrInfo.h2
-rw-r--r--lib/Target/BPF/BPFMCInstLower.h2
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.h2
-rw-r--r--lib/Target/BPF/BPFSubtarget.h2
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp2
-rw-r--r--lib/Target/BPF/BPFTargetMachine.h2
-rw-r--r--lib/Target/BPF/InstPrinter/BPFInstPrinter.h2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp3
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h2
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp2
-rw-r--r--lib/Target/Hexagon/Hexagon.h2
-rwxr-xr-xlib/Target/Hexagon/HexagonAsmPrinter.h2
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetStreamer.h2
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp19
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp10
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp11
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h5
-rw-r--r--lib/Target/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h2
-rw-r--r--lib/Target/MSP430/MSP430.h4
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h2
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp2
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h2
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp435
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp19
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIInfo.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp53
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrFormats.td47
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrInfo.td23
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td4
-rw-r--r--lib/Target/Mips/Mips.h2
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h2
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp2
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.cpp4
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.h4
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h2
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp2
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td16
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h2
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp3
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h2
-rw-r--r--lib/Target/Mips/MipsCCState.h2
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp13
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h2
-rw-r--r--lib/Target/Mips/MipsISelLowering.h4
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td42
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp6
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h2
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h2
-rw-r--r--lib/Target/Mips/MipsOs16.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td16
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h2
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h2
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h2
-rw-r--r--lib/Target/Mips/MipsSubtarget.h2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h2
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h38
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp5
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h2
-rw-r--r--lib/Target/NVPTX/NVPTX.h11
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp46
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h2
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h2
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp154
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td2
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h2
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/PPC.h2
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.h2
-rw-r--r--lib/Target/PowerPC/PPCEarlyReturn.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp6
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp25
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp107
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h11
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td50
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp23
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td24
-rw-r--r--lib/Target/PowerPC/PPCLoopDataPrefetch.cpp2
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp4
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTOCRegDeps.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h2
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp69
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h2
-rw-r--r--lib/Target/Sparc/Sparc.h4
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td148
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h2
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp13
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp12
-rw-r--r--lib/Target/WebAssembly/CMakeLists.txt24
-rw-r--r--lib/Target/WebAssembly/InstPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/WebAssembly/InstPrinter/Makefile16
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp43
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h38
-rw-r--r--lib/Target/WebAssembly/LLVMBuild.txt32
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt4
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp53
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h32
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp56
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h53
-rw-r--r--lib/Target/WebAssembly/Makefile19
-rw-r--r--lib/Target/WebAssembly/README.txt15
-rw-r--r--lib/Target/WebAssembly/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/WebAssembly/TargetInfo/Makefile15
-rw-r--r--lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp30
-rw-r--r--lib/Target/WebAssembly/WebAssembly.h31
-rw-r--r--lib/Target/WebAssembly/WebAssembly.td62
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp74
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.h48
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp73
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp63
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.h49
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrAtomics.td46
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFormats.td28
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp28
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.h37
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td46
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrSIMD.td15
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp19
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h37
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp33
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.h35
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.td28
-rw-r--r--lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp23
-rw-r--r--lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h31
-rw-r--r--lib/Target/WebAssembly/WebAssemblySubtarget.cpp48
-rw-r--r--lib/Target/WebAssembly/WebAssemblySubtarget.h79
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp173
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.h51
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h67
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp28
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h87
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h2
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h16
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp10
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp39
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp32
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h12
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp16
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h2
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp69
-rw-r--r--lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp119
-rw-r--r--lib/Target/X86/X86CallingConv.h2
-rw-r--r--lib/Target/X86/X86FastISel.cpp7
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp2
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.h2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp45
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp708
-rw-r--r--lib/Target/X86/X86ISelLowering.h17
-rw-r--r--lib/Target/X86/X86InstrAVX512.td794
-rw-r--r--lib/Target/X86/X86InstrBuilder.h2
-rw-r--r--lib/Target/X86/X86InstrCompiler.td16
-rw-r--r--lib/Target/X86/X86InstrFPStack.td16
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td58
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp352
-rw-r--r--lib/Target/X86/X86InstrInfo.h4
-rw-r--r--lib/Target/X86/X86InstrInfo.td10
-rw-r--r--lib/Target/X86/X86InstrSSE.td9
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h355
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp10
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h2
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp24
-rw-r--r--lib/Target/X86/X86RegisterInfo.h13
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h2
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp15
-rw-r--r--lib/Target/X86/X86TargetMachine.h2
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp64
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp15
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h2
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/X86/X86WinEHState.cpp17
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp2
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp2
-rw-r--r--lib/Target/XCore/XCore.h2
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h2
-rw-r--r--lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h4
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp25
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h2
-rw-r--r--lib/Target/XCore/XCoreLowerThreadLocal.cpp2
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.h2
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h2
-rw-r--r--lib/Target/XCore/XCoreTargetStreamer.h2
420 files changed, 6777 insertions, 2259 deletions
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 6c5a083b393d..bffd9e6e8c76 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -148,7 +148,7 @@ private:
Color getColor(unsigned Register);
Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
};
-} // namespace
+}
char AArch64A57FPLoadBalancing::ID = 0;
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 176403ce124a..d973234dd86a 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -102,7 +102,7 @@ public:
}
};
char AArch64BranchRelaxation::ID = 0;
-} // namespace
+}
/// verify - check BBOffsets, BBSizes, alignment of islands
void AArch64BranchRelaxation::verify() {
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index efc328a37e5f..1e2d1c3b93bd 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -136,6 +136,6 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
}
-} // namespace
+}
#endif
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 11eefc4ff63d..06ff9af37fd7 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -135,7 +135,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char LDTLSCleanup::ID = 0;
FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index acb35251fc6d..c2470f747a38 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -43,7 +43,7 @@ private:
unsigned BitSize);
};
char AArch64ExpandPseudo::ID = 0;
-} // namespace
+}
/// \brief Transfer implicit operands on the pseudo instruction to the
/// instructions created from the expansion.
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index d1523e8548e2..c19fcdc4bb18 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -1678,7 +1679,7 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
bool WantZExt, MachineMemOperand *MMO) {
- if(!TLI.allowsMisalignedMemoryAccesses(VT))
+ if (!TLI.allowsMisalignedMemoryAccesses(VT))
return 0;
// Simplify this down to something we can handle.
@@ -1965,7 +1966,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO) {
- if(!TLI.allowsMisalignedMemoryAccesses(VT))
+ if (!TLI.allowsMisalignedMemoryAccesses(VT))
return false;
// Simplify this down to something we can handle.
@@ -3070,9 +3071,9 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- const char *SymName = CLI.SymName;
+ MCSymbol *Symbol = CLI.Symbol;
- if (!Callee && !SymName)
+ if (!Callee && !Symbol)
return false;
// Allow SelectionDAG isel to handle tail calls.
@@ -3134,8 +3135,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (CM == CodeModel::Small) {
const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
- if (SymName)
- MIB.addExternalSymbol(SymName, 0);
+ if (Symbol)
+ MIB.addSym(Symbol, 0);
else if (Addr.getGlobalValue())
MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
else if (Addr.getReg()) {
@@ -3145,18 +3146,18 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;
} else {
unsigned CallReg = 0;
- if (SymName) {
+ if (Symbol) {
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
- .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+ .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
CallReg = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
- CallReg)
- .addReg(ADRPReg)
- .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::LDRXui), CallReg)
+ .addReg(ADRPReg)
+ .addSym(Symbol,
+ AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
} else if (Addr.getGlobalValue())
CallReg = materializeGV(Addr.getGlobalValue());
else if (Addr.getReg())
@@ -3460,7 +3461,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
CallLoweringInfo CLI;
- CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
+ MCContext &Ctx = MF->getContext();
+ CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
TLI.getLibcallName(LC), std::move(Args));
if (!lowerCallTo(CLI))
return false;
@@ -4734,7 +4736,8 @@ bool AArch64FastISel::selectFRem(const Instruction *I) {
}
CallLoweringInfo CLI;
- CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
+ MCContext &Ctx = MF->getContext();
+ CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
TLI.getLibcallName(LC), std::move(Args));
if (!lowerCallTo(CLI))
return false;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 11227eeaf3d7..b496fccba349 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -63,6 +63,6 @@ public:
RegScavenger *RS) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0165ef9c49c0..f3242cdd971d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1777,8 +1777,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- SDNode *Elt = N->getOperand(i).getNode();
+ for (const SDValue &Elt : N->op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
unsigned HalfSize = EltSize / 2;
@@ -6689,6 +6688,160 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
return NumBits == 32 || NumBits == 64;
}
+/// \brief Lower an interleaved load into a ldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
+/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
+///
+/// Into:
+/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
+/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
+/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
+bool AArch64TargetLowering::lowerInterleavedLoad(
+ LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices, unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+ assert(!Shuffles.empty() && "Empty shufflevector input");
+ assert(Shuffles.size() == Indices.size() &&
+ "Unmatched number of shufflevectors and indices");
+
+ const DataLayout *DL = getDataLayout();
+
+ VectorType *VecTy = Shuffles[0]->getType();
+ unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+
+ // Skip illegal vector types.
+ if (VecSize != 64 && VecSize != 128)
+ return false;
+
+ // A pointer vector can not be the return type of the ldN intrinsics. Need to
+ // load integer vectors first and then convert to pointer vectors.
+ Type *EltTy = VecTy->getVectorElementType();
+ if (EltTy->isPointerTy())
+ VecTy = VectorType::get(DL->getIntPtrType(EltTy),
+ VecTy->getVectorNumElements());
+
+ Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *Tys[2] = {VecTy, PtrTy};
+ static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
+ Intrinsic::aarch64_neon_ld3,
+ Intrinsic::aarch64_neon_ld4};
+ Function *LdNFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+
+ IRBuilder<> Builder(LI);
+ Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+
+ CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SVI = Shuffles[i];
+ unsigned Index = Indices[i];
+
+ Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+
+ SVI->replaceAllUsesWith(SubVec);
+ }
+
+ return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned NumElts) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < NumElts; i++)
+ Mask.push_back(Builder.getInt32(Start + i));
+
+ return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a stN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+/// Into:
+/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// st3 instruction in CodeGen.
+bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
+ ShuffleVectorInst *SVI,
+ unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+
+ VectorType *VecTy = SVI->getType();
+ assert(VecTy->getVectorNumElements() % Factor == 0 &&
+ "Invalid interleaved store");
+
+ unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+ Type *EltTy = VecTy->getVectorElementType();
+ VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+ const DataLayout *DL = getDataLayout();
+ unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+
+ // Skip illegal vector types.
+ if (SubVecSize != 64 && SubVecSize != 128)
+ return false;
+
+ Value *Op0 = SVI->getOperand(0);
+ Value *Op1 = SVI->getOperand(1);
+ IRBuilder<> Builder(SI);
+
+ // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+ // vectors to integer vectors.
+ if (EltTy->isPointerTy()) {
+ Type *IntTy = DL->getIntPtrType(EltTy);
+ unsigned NumOpElts =
+ dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
+
+ // Convert to the corresponding integer vector.
+ Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
+ Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+ Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+ SubVecTy = VectorType::get(IntTy, NumSubElts);
+ }
+
+ Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
+ Type *Tys[2] = {SubVecTy, PtrTy};
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
+ Intrinsic::aarch64_neon_st3,
+ Intrinsic::aarch64_neon_st4};
+ Function *StNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ SmallVector<Value *, 5> Ops;
+
+ // Split the shufflevector operands into sub vectors for the new stN call.
+ for (unsigned i = 0; i < Factor; i++)
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+ Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
+ Builder.CreateCall(StNFunc, Ops);
+ return true;
+}
+
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
unsigned AlignCheck) {
return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index da42376ac250..46298c0e7de1 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -305,6 +305,15 @@ public:
unsigned &RequiredAligment) const override;
bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
+ unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+ bool lowerInterleavedLoad(LoadInst *LI,
+ ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices,
+ unsigned Factor) const override;
+ bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ unsigned Factor) const override;
+
bool isLegalAddImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 2c52f340d6d1..3f2e772a90c4 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -614,10 +614,15 @@ def move_vec_shift : Operand<i32> {
let ParserMatchClass = MoveVecShifterOperand;
}
-def AddSubImmOperand : AsmOperandClass {
- let Name = "AddSubImm";
- let ParserMethod = "tryParseAddSubImm";
- let DiagnosticType = "AddSubSecondSource";
+let DiagnosticType = "AddSubSecondSource" in {
+ def AddSubImmOperand : AsmOperandClass {
+ let Name = "AddSubImm";
+ let ParserMethod = "tryParseAddSubImm";
+ }
+ def AddSubImmNegOperand : AsmOperandClass {
+ let Name = "AddSubImmNeg";
+ let ParserMethod = "tryParseAddSubImm";
+ }
}
// An ADD/SUB immediate shifter operand:
// second operand:
@@ -631,8 +636,17 @@ class addsub_shifted_imm<ValueType Ty>
let MIOperandInfo = (ops i32imm, i32imm);
}
+class addsub_shifted_imm_neg<ValueType Ty>
+ : Operand<Ty> {
+ let EncoderMethod = "getAddSubImmOpValue";
+ let ParserMatchClass = AddSubImmNegOperand;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
def addsub_shifted_imm32 : addsub_shifted_imm<i32>;
def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
+def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
+def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
class neg_addsub_shifted_imm<ValueType Ty>
: Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
@@ -1633,7 +1647,7 @@ class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
(inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
shiftExt)>;
-multiclass AddSub<bit isSub, string mnemonic,
+multiclass AddSub<bit isSub, string mnemonic, string alias,
SDPatternOperator OpNode = null_frag> {
let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
// Add/Subtract immediate
@@ -1686,6 +1700,14 @@ multiclass AddSub<bit isSub, string mnemonic,
let Inst{31} = 1;
}
+ // add Rd, Rb, -imm -> sub Rd, Rn, imm
+ def : InstAlias<alias#" $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
+ addsub_shifted_imm32_neg:$imm), 0>;
+ def : InstAlias<alias#" $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
+ addsub_shifted_imm64_neg:$imm), 0>;
+
// Register/register aliases with no shift when SP is not used.
def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
GPR32, GPR32, GPR32, 0>;
@@ -1706,7 +1728,8 @@ multiclass AddSub<bit isSub, string mnemonic,
GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0
}
-multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
+multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
+ string alias, string cmpAlias> {
let isCompare = 1, Defs = [NZCV] in {
// Add/Subtract immediate
def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
@@ -1752,6 +1775,14 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
}
} // Defs = [NZCV]
+ // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
+ def : InstAlias<alias#" $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
+ addsub_shifted_imm32_neg:$imm), 0>;
+ def : InstAlias<alias#" $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
+ addsub_shifted_imm64_neg:$imm), 0>;
+
// Compare aliases
def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
@@ -1768,6 +1799,12 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
+ // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
+ def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+ WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
+ def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+ XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
+
// Compare shorthands
def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs")
WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8d8864cfe65f..c0b3f2c60916 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -96,15 +96,10 @@ bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
+
if (!isUnpredicatedTerminator(I))
return false;
@@ -224,15 +219,10 @@ bool AArch64InstrInfo::ReverseBranchCondition(
}
unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
+
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 653f80286b25..b73e0958df90 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -567,8 +567,8 @@ def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
// Add/subtract
-defm ADD : AddSub<0, "add", add>;
-defm SUB : AddSub<1, "sub">;
+defm ADD : AddSub<0, "add", "sub", add>;
+defm SUB : AddSub<1, "sub", "add">;
def : InstAlias<"mov $dst, $src",
(ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
@@ -579,8 +579,8 @@ def : InstAlias<"mov $dst, $src",
def : InstAlias<"mov $dst, $src",
(ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
-defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
-defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
+defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
+defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e55ae991b635..580427ab3cc1 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -187,6 +187,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
break;
+ case MachineOperand::MO_MCSymbol:
+ MCOp = LowerSymbolOperand(MO, MO.getMCSymbol());
+ break;
case MachineOperand::MO_JumpTableIndex:
MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
break;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index 908f66f8e296..1e29b80c2d62 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -47,6 +47,6 @@ public:
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 2a0f0a47b05c..536a8d0f97a0 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -158,6 +158,6 @@ private:
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index bab84631f2b1..5394875a6bc1 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -154,7 +154,7 @@ bool haveSameParity(unsigned reg1, unsigned reg2) {
return isOdd(reg1) == isOdd(reg2);
}
-} // namespace
+}
bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
unsigned Ra) {
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index c83aea452513..4f656f94ea12 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -33,6 +33,6 @@ private:
// Add constraints between existing chains
void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
};
-} // namespace llvm
+}
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index a993b6059131..11932d2b1c22 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -28,6 +28,6 @@ public:
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index e8165a8e4085..1c6b15790ea9 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -57,7 +57,7 @@ private:
}
};
char AArch64StorePairSuppress::ID = 0;
-} // namespace
+} // anonymous
FunctionPass *llvm::createAArch64StorePairSuppressPass() {
return new AArch64StorePairSuppress();
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index c9b54cc3819c..6bb069423060 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -151,6 +151,6 @@ public:
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 5496a50f6b6e..db6e244337a7 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -225,6 +225,10 @@ void AArch64PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
+ // Match interleaved memory accesses to ldN/stN intrinsics.
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createInterleavedAccessPass(TM));
+
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ed27cf84bbba..fc91c94351cc 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -407,6 +407,26 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
+unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Alignment, unsigned AddressSpace) {
+ assert(Factor >= 2 && "Invalid interleave factor");
+ assert(isa<VectorType>(VecTy) && "Expect a vector type");
+
+ if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ unsigned NumElts = VecTy->getVectorNumElements();
+ Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+ unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+
+ // ldN/stN only support legal vector types of size 64 or 128 in bits.
+ if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+ return Factor;
+ }
+
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+}
+
unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
unsigned Cost = 0;
for (auto *I : Tys) {
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 25c22bcd58ec..4dabdadd8eeb 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -139,6 +139,11 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 063c053ffe8a..38e8b4d9a938 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -699,6 +699,25 @@ public:
const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
}
+ bool isAddSubImmNeg() const {
+ if (!isShiftedImm() && !isImm())
+ return false;
+
+ const MCExpr *Expr;
+
+ // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'.
+ if (isShiftedImm()) {
+ unsigned Shift = ShiftedImm.ShiftAmount;
+ Expr = ShiftedImm.Val;
+ if (Shift != 0 && Shift != 12)
+ return false;
+ } else
+ Expr = getImm();
+
+ // Otherwise it should be a real negative immediate in range:
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff;
+ }
bool isCondCode() const { return Kind == k_CondCode; }
bool isSIMDImmType10() const {
if (!isImm())
@@ -1219,6 +1238,18 @@ public:
}
}
+ void addAddSubImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ const MCExpr *MCE = isShiftedImm() ? getShiftedImmVal() : getImm();
+ const MCConstantExpr *CE = cast<MCConstantExpr>(MCE);
+ int64_t Val = -CE->getValue();
+ unsigned ShiftAmt = isShiftedImm() ? ShiftedImm.ShiftAmount : 0;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ Inst.addOperand(MCOperand::createImm(ShiftAmt));
+ }
+
void addCondCodeOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(getCondCode()));
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 19544ac600d6..15dee978e229 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -181,6 +181,6 @@ public:
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 3e982ee03986..7624c7240d68 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -293,7 +293,7 @@ enum CompactUnwindEncodings {
UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800
};
-} // namespace CU
+} // end CU namespace
// FIXME: This should be in a separate file.
class DarwinAArch64AsmBackend : public AArch64AsmBackend {
@@ -517,7 +517,7 @@ void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
}
-} // namespace
+}
MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 807679fb1a21..1f516d1db896 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -34,7 +34,7 @@ protected:
private:
};
-} // namespace
+}
AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
bool IsLittleEndian)
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index bbcbf514069c..b5b1d1f9e19c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -156,22 +156,12 @@ private:
}
void EmitMappingSymbol(StringRef Name) {
- MCSymbol *Start = getContext().createTempSymbol();
- EmitLabel(Start);
-
auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++)));
-
- getAssembler().registerSymbol(*Symbol);
+ EmitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
Symbol->setExternal(false);
- auto Sec = getCurrentSection().first;
- assert(Sec && "need a section");
- Symbol->setSection(*Sec);
-
- const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
- Symbol->setVariableValue(Value);
}
int64_t MappingSymbolCounter;
@@ -213,4 +203,4 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new AArch64TargetELFStreamer(S);
return nullptr;
}
-} // namespace llvm
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index ca56f6393c41..342384437c6a 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -65,7 +65,7 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI);
-} // namespace llvm
+} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index b2f5bf3cf4b5..741b273073e4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -38,7 +38,7 @@ public:
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
};
-} // namespace
+}
bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 40071f6b6bb7..7e42f8e3601e 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -346,7 +346,7 @@ namespace AArch64AT {
ATMapper();
};
-} // namespace AArch64AT
+}
namespace AArch64DB {
enum DBValues {
Invalid = -1,
@@ -369,7 +369,7 @@ namespace AArch64DB {
DBarrierMapper();
};
-} // namespace AArch64DB
+}
namespace AArch64DC {
enum DCValues {
@@ -390,7 +390,7 @@ namespace AArch64DC {
DCMapper();
};
-} // namespace AArch64DC
+}
namespace AArch64IC {
enum ICValues {
@@ -410,7 +410,7 @@ namespace AArch64IC {
static inline bool NeedsRegister(ICValues Val) {
return Val == IVAU;
}
-} // namespace AArch64IC
+}
namespace AArch64ISB {
enum ISBValues {
@@ -422,7 +422,7 @@ namespace AArch64ISB {
ISBMapper();
};
-} // namespace AArch64ISB
+}
namespace AArch64PRFM {
enum PRFMValues {
@@ -452,7 +452,7 @@ namespace AArch64PRFM {
PRFMMapper();
};
-} // namespace AArch64PRFM
+}
namespace AArch64PState {
enum PStateValues {
@@ -471,7 +471,7 @@ namespace AArch64PState {
PStateMapper();
};
-} // namespace AArch64PState
+}
namespace AArch64SE {
enum ShiftExtSpecifiers {
@@ -492,7 +492,7 @@ namespace AArch64SE {
SXTW,
SXTX
};
-} // namespace AArch64SE
+}
namespace AArch64Layout {
enum VectorLayout {
@@ -514,7 +514,7 @@ namespace AArch64Layout {
VL_S,
VL_D
};
-} // namespace AArch64Layout
+}
inline static const char *
AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) {
@@ -1221,7 +1221,7 @@ namespace AArch64SysReg {
};
uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
-} // namespace AArch64SysReg
+}
namespace AArch64TLBI {
enum TLBIValues {
@@ -1283,7 +1283,7 @@ namespace AArch64TLBI {
return true;
}
}
-} // namespace AArch64TLBI
+}
namespace AArch64II {
/// Target Operand Flag enum.
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 2e7e39a54d33..569ad3844b25 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -141,6 +141,19 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
+ : SubtargetFeature <
+ "isaver"#Major#"."#Minor#"."#Stepping,
+ "IsaVersion",
+ "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+ "Instruction set version number"
+>;
+
+def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
+def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>;
+def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>;
+def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
+
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index afc6bcb52bb8..709d7531d38b 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,7 +17,9 @@
//
#include "AMDGPUAsmPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
@@ -89,6 +91,15 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
+void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+ const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
+ SIProgramInfo KernelInfo;
+ if (STM.isAmdHsaOS()) {
+ getSIProgramInfo(KernelInfo, *MF);
+ EmitAmdKernelCodeT(*MF, KernelInfo);
+ }
+}
+
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
// This label is used to mark the end of the .text section.
@@ -113,13 +124,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
- if (STM.isAmdHsaOS()) {
- getSIProgramInfo(KernelInfo, MF);
- EmitAmdKernelCodeT(MF, KernelInfo);
- OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
- } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- getSIProgramInfo(KernelInfo, MF);
- EmitProgramInfoSI(MF, KernelInfo);
+ if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (!STM.isAmdHsaOS()) {
+ getSIProgramInfo(KernelInfo, MF);
+ EmitProgramInfoSI(MF, KernelInfo);
+ }
+ // Emit directives
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitDirectiveHSACodeObjectVersion(1, 0);
+ AMDGPU::IsaVersion ISA = STM.getIsaVersion();
+ TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
+ "AMD", "AMDGPU");
} else {
EmitProgramInfoR600(MF);
}
@@ -459,125 +475,28 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
}
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const {
+ const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
amd_kernel_code_t header;
- memset(&header, 0, sizeof(header));
-
- header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
- header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
-
- header.struct_byte_size = sizeof(amd_kernel_code_t);
-
- header.target_chip = STM.getAmdKernelCodeChipID();
-
- header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+ AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
header.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
+ header.code_properties =
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+ AMD_CODE_PROPERTY_IS_PTR64;
- // Code Properties:
- header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
- AMD_CODE_PROPERTY_IS_PTR64;
-
- if (KernelInfo.FlatUsed)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
- if (KernelInfo.ScratchBlocks)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
-
- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
-
- // MFI->ABIArgOffset is the number of bytes for the kernel arguments
- // plus 36. 36 is the number of bytes reserved at the begining of the
- // input buffer to store work-group size information.
- // FIXME: We should be adding the size of the implicit arguments
- // to this value.
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
-
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
- // FIXME: What values do I put for these alignments
- header.kernarg_segment_alignment = 0;
- header.group_segment_alignment = 0;
- header.private_segment_alignment = 0;
-
- header.code_type = 1; // HSA_EXT_CODE_KERNEL
-
- header.wavefront_size = STM.getWavefrontSize();
-
- MCSectionELF *VersionSection =
- OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(VersionSection);
- OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
- Twine(header.hsail_version_major) + "." +
- Twine(header.hsail_version_minor) + ":" +
- "AMD:" +
- Twine(header.amd_code_version_major) + "." +
- Twine(header.amd_code_version_minor) + ":" +
- "GFX8.1:0").str());
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-
- if (isVerbose()) {
- OutStreamer->emitRawComment("amd_code_version_major = " +
- Twine(header.amd_code_version_major), false);
- OutStreamer->emitRawComment("amd_code_version_minor = " +
- Twine(header.amd_code_version_minor), false);
- OutStreamer->emitRawComment("struct_byte_size = " +
- Twine(header.struct_byte_size), false);
- OutStreamer->emitRawComment("target_chip = " +
- Twine(header.target_chip), false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
- false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
- false);
- OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
- OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
- OutStreamer->emitRawComment("private_element_size = 2 ", false);
- OutStreamer->emitRawComment("is_ptr64 = " +
- Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
- OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
- Twine(header.workitem_private_segment_byte_size),
- false);
- OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
- Twine(header.workgroup_group_segment_byte_size),
- false);
- OutStreamer->emitRawComment("gds_segment_byte_size = " +
- Twine(header.gds_segment_byte_size), false);
- OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
- Twine(header.kernarg_segment_byte_size), false);
- OutStreamer->emitRawComment("wavefront_sgpr_count = " +
- Twine(header.wavefront_sgpr_count), false);
- OutStreamer->emitRawComment("workitem_vgpr_count = " +
- Twine(header.workitem_vgpr_count), false);
- OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
- OutStreamer->emitRawComment("wavefront_size = " +
- Twine((int)header.wavefront_size), false);
- OutStreamer->emitRawComment("optimization_level = " +
- Twine(header.optimization_level), false);
- OutStreamer->emitRawComment("hsail_profile = " +
- Twine(header.hsail_profile), false);
- OutStreamer->emitRawComment("hsail_machine_model = " +
- Twine(header.hsail_machine_model), false);
- OutStreamer->emitRawComment("hsail_version_major = " +
- Twine(header.hsail_version_major), false);
- OutStreamer->emitRawComment("hsail_version_minor = " +
- Twine(header.hsail_version_minor), false);
- }
- OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 92072512e6b5..345af9b85e15 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -97,6 +97,8 @@ public:
/// Implemented in AMDGPUMCInstLower.cpp
void EmitInstruction(const MachineInstr *MI) override;
+ void EmitFunctionBodyStart() override;
+
void EmitEndOfAsmFile(Module &M) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -108,6 +110,6 @@ protected:
size_t DisasmLineMaxLen;
};
-} // namespace llvm
+} // End anonymous llvm
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 570473d85585..d56838ec2019 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -68,7 +68,7 @@ public:
};
int DiagnosticInfoUnsupported::KindID = 0;
-} // namespace
+}
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 31ae9a3c7760..86d3962b3856 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -198,7 +198,7 @@ namespace AMDGPU {
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
} // End namespace AMDGPU
-} // namespace llvm
+} // End llvm namespace
#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index e17b41ad5f21..f5e4694e76f6 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -41,5 +41,5 @@ public:
bool IsKernel;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 605ccd0e1361..0779d1d786b2 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -72,6 +72,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
+ IsaVersion(ISAVersion0_0_0),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
@@ -109,6 +110,10 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
}
}
+AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
+ return AMDGPU::getIsaVersion(getFeatureBits());
+}
+
bool AMDGPUSubtarget::isVGPRSpillingEnabled(
const SIMachineFunctionInfo *MFI) const {
return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
@@ -131,3 +136,4 @@ void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.OnlyBottomUp = false;
}
}
+
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0d40d14f8203..30f50eb1d2f3 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -20,6 +20,8 @@
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600ISelLowering.h"
+#include "AMDKernelCodeT.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -48,6 +50,14 @@ public:
FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
};
+ enum {
+ ISAVersion0_0_0,
+ ISAVersion7_0_0,
+ ISAVersion7_0_1,
+ ISAVersion8_0_0,
+ ISAVersion8_0_1
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -77,6 +87,7 @@ private:
bool CIInsts;
bool FeatureDisable;
int LDSBankCount;
+ unsigned IsaVersion;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -236,6 +247,8 @@ public:
unsigned getAmdKernelCodeChipID() const;
+ AMDGPU::IsaVersion getIsaVersion() const;
+
bool enableMachineScheduler() const override {
return true;
}
@@ -275,6 +288,13 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
+
+ /// \brief Returns the offset in bytes from the start of the input buffer
+ /// of the first explicit kernel argument.
+ unsigned getExplicitKernelArgOffset() const {
+ return isAmdHsaOS() ? 0 : 36;
+ }
+
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h
index eaffb854793c..a9ba60c8cbad 100644
--- a/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -12,9 +12,12 @@
#ifndef AMDKERNELCODET_H
#define AMDKERNELCODET_H
+#include "llvm/MC/SubtargetFeature.h"
+
#include <cstddef>
#include <cstdint>
+#include "llvm/Support/Debug.h"
//---------------------------------------------------------------------------//
// AMD Kernel Code, and its dependencies //
//---------------------------------------------------------------------------//
@@ -142,7 +145,7 @@ enum amd_code_property_mask_t {
/// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
/// is generally DWORD.
///
- /// Use values from the amd_element_byte_size_t enum.
+ /// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
@@ -171,7 +174,11 @@ enum amd_code_property_mask_t {
/// Indicate if code generated has support for debugging.
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
+
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT
};
/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
@@ -369,7 +376,7 @@ typedef struct hsa_ext_control_directives_s {
/// Scratch Wave Offset must be added by the kernel code and moved to
/// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
///
-/// The second SGPR is 32 bit byte size of a single work-item’s scratch
+/// The second SGPR is 32 bit byte size of a single work-item's scratch
/// memory usage. This is directly loaded from the dispatch packet Private
/// Segment Byte Size and rounded up to a multiple of DWORD.
///
@@ -385,7 +392,7 @@ typedef struct hsa_ext_control_directives_s {
///
/// Private Segment Size (enable_sgpr_private_segment_size):
/// Number of User SGPR registers: 1. The 32 bit byte size of a single
-/// work-item’s scratch memory allocation. This is the value from the dispatch
+/// work-item's scratch memory allocation. This is the value from the dispatch
/// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
///
/// \todo [Does CP need to round this to >4 byte alignment?]
@@ -433,7 +440,7 @@ typedef struct hsa_ext_control_directives_s {
/// present
///
/// Work-Group Info (enable_sgpr_workgroup_info):
-/// Number of System SGPR registers: 1. {first_wave, 14’b0000,
+/// Number of System SGPR registers: 1. {first_wave, 14'b0000,
/// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
///
/// Private Segment Wave Byte Offset
@@ -499,25 +506,14 @@ typedef struct hsa_ext_control_directives_s {
/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
/// the kernarg segment is constant for the duration of the kernel execution.
///
-typedef struct amd_kernel_code_s {
- /// The AMD major version of the Code Object. Must be the value
- /// AMD_CODE_VERSION_MAJOR.
- amd_code_version32_t amd_code_version_major;
- /// The AMD minor version of the Code Object. Minor versions must be
- /// backward compatible. Must be the value
- /// AMD_CODE_VERSION_MINOR.
- amd_code_version32_t amd_code_version_minor;
-
- /// The byte size of this struct. Must be set to
- /// sizeof(amd_kernel_code_t). Used for backward
- /// compatibility.
- uint32_t struct_byte_size;
-
- /// The target chip instruction set for which code has been
- /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
- /// in sc/Interface/SCCommon.h.
- uint32_t target_chip;
+typedef struct amd_kernel_code_s {
+ uint32_t amd_kernel_code_version_major;
+ uint32_t amd_kernel_code_version_minor;
+ uint16_t amd_machine_kind;
+ uint16_t amd_machine_version_major;
+ uint16_t amd_machine_version_minor;
+ uint16_t amd_machine_version_stepping;
/// Byte offset (possibly negative) from start of amd_kernel_code_t
/// object to kernel's entry point instruction. The actual code for
@@ -535,10 +531,6 @@ typedef struct amd_kernel_code_s {
/// and size. The offset is from the start (possibly negative) of
/// amd_kernel_code_t object. Set both to 0 if no prefetch
/// information is available.
- ///
- /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
- /// not make the size a uint64_t as prefetching more than 4GiB seems
- /// excessive.
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
@@ -553,11 +545,11 @@ typedef struct amd_kernel_code_s {
/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
/// COMPUTE_PGM_RSRC2 registers.
- amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
+ uint64_t compute_pgm_resource_registers;
/// Code properties. See amd_code_property_mask_t for a full list of
/// properties.
- amd_code_property32_t code_properties;
+ uint32_t code_properties;
/// The amount of memory required for the combined private, spill
/// and arg segments for a work-item in bytes. If
@@ -629,76 +621,21 @@ typedef struct amd_kernel_code_s {
/// The maximum byte alignment of variables used by the kernel in
/// the specified memory segment. Expressed as a power of two. Must
/// be at least HSA_POWERTWO_16.
- hsa_powertwo8_t kernarg_segment_alignment;
- hsa_powertwo8_t group_segment_alignment;
- hsa_powertwo8_t private_segment_alignment;
-
- uint8_t reserved3;
-
- /// Type of code object.
- hsa_ext_code_kind32_t code_type;
-
- /// Reserved for code properties if any are defined in the future.
- /// There are currently no code properties so this field must be 0.
- uint32_t reserved4;
+ uint8_t kernarg_segment_alignment;
+ uint8_t group_segment_alignment;
+ uint8_t private_segment_alignment;
/// Wavefront size expressed as a power of two. Must be a power of 2
/// in range 1..64 inclusive. Used to support runtime query that
/// obtains wavefront size, which may be used by application to
/// allocated dynamic group memory and set the dispatch work-group
/// size.
- hsa_powertwo8_t wavefront_size;
-
- /// The optimization level specified when the kernel was
- /// finalized.
- uint8_t optimization_level;
-
- /// The HSAIL profile defines which features are used. This
- /// information is from the HSAIL version directive. If this
- /// amd_kernel_code_t is not generated from an HSAIL compilation
- /// unit then must be 0.
- hsa_ext_brig_profile8_t hsail_profile;
-
- /// The HSAIL machine model gives the address sizes used by the
- /// code. This information is from the HSAIL version directive. If
- /// not generated from an HSAIL compilation unit then must still
- /// indicate for what machine mode the code is generated.
- hsa_ext_brig_machine_model8_t hsail_machine_model;
-
- /// The HSAIL major version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_major;
-
- /// The HSAIL minor version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_minor;
-
- /// Reserved for HSAIL target options if any are defined in the
- /// future. There are currently no target options so this field
- /// must be 0.
- uint16_t reserved5;
-
- /// Reserved. Must be 0.
- uint16_t reserved6;
-
- /// The values should be the actually values used by the finalizer
- /// in generating the code. This may be the union of values
- /// specified as finalizer arguments and explicit HSAIL control
- /// directives. If the finalizer chooses to ignore a control
- /// directive, and not generate constrained code, then the control
- /// directive should not be marked as enabled even though it was
- /// present in the HSAIL or finalizer argument. The values are
- /// intended to reflect the constraints that the code actually
- /// requires to correctly execute, not the values that were
- /// actually specified at finalize time.
- hsa_ext_control_directives_t control_directive;
-
- /// The code can immediately follow the amd_kernel_code_t, or can
- /// come after subsequent amd_kernel_code_t structs when there are
- /// multiple kernels in the compilation unit.
+ uint8_t wavefront_size;
+ int32_t call_convention;
+ uint8_t reserved3[12];
+ uint64_t runtime_loader_kernel_symbol;
+ uint64_t control_directives[16];
} amd_kernel_code_t;
#endif // AMDKERNELCODET_H
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 80081d40d089..2018983bc306 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,9 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
@@ -314,6 +317,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
+private:
+ bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
+ bool ParseDirectiveHSACodeObjectVersion();
+ bool ParseDirectiveHSACodeObjectISA();
+ bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+ bool ParseDirectiveAMDKernelCodeT();
+
public:
AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
@@ -329,6 +339,11 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
+ AMDGPUTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<AMDGPUTargetStreamer &>(TS);
+ }
+
unsigned getForcedEncodingSize() const {
return ForcedEncodingSize;
}
@@ -403,7 +418,7 @@ struct OptionalOperand {
bool (*ConvertResult)(int64_t&);
};
-} // namespace
+}
static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
if (IsVgpr) {
@@ -581,7 +596,304 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
+bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
+ uint32_t &Minor) {
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("invalid major version");
+
+ Major = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("minor version number required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("invalid minor version");
+
+ Minor = getLexer().getTok().getIntVal();
+ Lex();
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
+
+ uint32_t Major;
+ uint32_t Minor;
+
+ if (ParseDirectiveMajorMinor(Major, Minor))
+ return true;
+
+ getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
+
+ uint32_t Major;
+ uint32_t Minor;
+ uint32_t Stepping;
+ StringRef VendorName;
+ StringRef ArchName;
+
+ // If this directive has no arguments, then use the ISA version for the
+ // targeted GPU.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits());
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
+ Isa.Stepping,
+ "AMD", "AMDGPU");
+ return false;
+ }
+
+
+ if (ParseDirectiveMajorMinor(Major, Minor))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("stepping version number required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("invalid stepping version");
+
+ Stepping = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("vendor name required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("invalid vendor name");
+
+ VendorName = getLexer().getTok().getStringContents();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("arch name required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("invalid arch name");
+
+ ArchName = getLexer().getTok().getStringContents();
+ Lex();
+
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
+ VendorName, ArchName);
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+ amd_kernel_code_t &Header) {
+
+ if (getLexer().isNot(AsmToken::Equal))
+ return TokError("expected '='");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("amd_kernel_code_t values must be integers");
+
+ uint64_t Value = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (ID == "kernel_code_version_major")
+ Header.amd_kernel_code_version_major = Value;
+ else if (ID == "kernel_code_version_minor")
+ Header.amd_kernel_code_version_minor = Value;
+ else if (ID == "machine_kind")
+ Header.amd_machine_kind = Value;
+ else if (ID == "machine_version_major")
+ Header.amd_machine_version_major = Value;
+ else if (ID == "machine_version_minor")
+ Header.amd_machine_version_minor = Value;
+ else if (ID == "machine_version_stepping")
+ Header.amd_machine_version_stepping = Value;
+ else if (ID == "kernel_code_entry_byte_offset")
+ Header.kernel_code_entry_byte_offset = Value;
+ else if (ID == "kernel_code_prefetch_byte_size")
+ Header.kernel_code_prefetch_byte_size = Value;
+ else if (ID == "max_scratch_backing_memory_byte_size")
+ Header.max_scratch_backing_memory_byte_size = Value;
+ else if (ID == "compute_pgm_rsrc1_vgprs")
+ Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
+ else if (ID == "compute_pgm_rsrc1_sgprs")
+ Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
+ else if (ID == "compute_pgm_rsrc1_priority")
+ Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
+ else if (ID == "compute_pgm_rsrc1_float_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
+ else if (ID == "compute_pgm_rsrc1_priv")
+ Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
+ else if (ID == "compute_pgm_rsrc1_dx10_clamp")
+ Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
+ else if (ID == "compute_pgm_rsrc1_debug_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
+ else if (ID == "compute_pgm_rsrc1_ieee_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
+ else if (ID == "compute_pgm_rsrc2_scratch_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_user_sgpr")
+ Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_x_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_y_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_z_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tg_size_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
+ Header.compute_pgm_resource_registers |=
+ (S_00B84C_TIDIG_COMP_CNT(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_excp_en_msb")
+ Header.compute_pgm_resource_registers |=
+ (S_00B84C_EXCP_EN_MSB(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_lds_size")
+ Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_excp_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
+ else if (ID == "compute_pgm_resource_registers")
+ Header.compute_pgm_resource_registers = Value;
+ else if (ID == "enable_sgpr_private_segment_buffer")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
+ else if (ID == "enable_sgpr_dispatch_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
+ else if (ID == "enable_sgpr_queue_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
+ else if (ID == "enable_sgpr_kernarg_segment_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
+ else if (ID == "enable_sgpr_dispatch_id")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
+ else if (ID == "enable_sgpr_flat_scratch_init")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
+ else if (ID == "enable_sgpr_private_segment_size")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_x")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_y")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_z")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
+ else if (ID == "enable_ordered_append_gds")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
+ else if (ID == "private_element_size")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
+ else if (ID == "is_ptr64")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
+ else if (ID == "is_dynamic_callstack")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
+ else if (ID == "is_debug_enabled")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
+ else if (ID == "is_xnack_enabled")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
+ else if (ID == "workitem_private_segment_byte_size")
+ Header.workitem_private_segment_byte_size = Value;
+ else if (ID == "workgroup_group_segment_byte_size")
+ Header.workgroup_group_segment_byte_size = Value;
+ else if (ID == "gds_segment_byte_size")
+ Header.gds_segment_byte_size = Value;
+ else if (ID == "kernarg_segment_byte_size")
+ Header.kernarg_segment_byte_size = Value;
+ else if (ID == "workgroup_fbarrier_count")
+ Header.workgroup_fbarrier_count = Value;
+ else if (ID == "wavefront_sgpr_count")
+ Header.wavefront_sgpr_count = Value;
+ else if (ID == "workitem_vgpr_count")
+ Header.workitem_vgpr_count = Value;
+ else if (ID == "reserved_vgpr_first")
+ Header.reserved_vgpr_first = Value;
+ else if (ID == "reserved_vgpr_count")
+ Header.reserved_vgpr_count = Value;
+ else if (ID == "reserved_sgpr_first")
+ Header.reserved_sgpr_first = Value;
+ else if (ID == "reserved_sgpr_count")
+ Header.reserved_sgpr_count = Value;
+ else if (ID == "debug_wavefront_private_segment_offset_sgpr")
+ Header.debug_wavefront_private_segment_offset_sgpr = Value;
+ else if (ID == "debug_private_segment_buffer_sgpr")
+ Header.debug_private_segment_buffer_sgpr = Value;
+ else if (ID == "kernarg_segment_alignment")
+ Header.kernarg_segment_alignment = Value;
+ else if (ID == "group_segment_alignment")
+ Header.group_segment_alignment = Value;
+ else if (ID == "private_segment_alignment")
+ Header.private_segment_alignment = Value;
+ else if (ID == "wavefront_size")
+ Header.wavefront_size = Value;
+ else if (ID == "call_convention")
+ Header.call_convention = Value;
+ else if (ID == "runtime_loader_kernel_symbol")
+ Header.runtime_loader_kernel_symbol = Value;
+ else
+ return TokError("amd_kernel_code_t value not recognized.");
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
+
+ amd_kernel_code_t Header;
+ AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+
+ while (true) {
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("amd_kernel_code_t values must begin on a new line");
+
+ // Lex EndOfStatement. This is in a while loop, because lexing a comment
+ // will set the current token to EndOfStatement.
+ while(getLexer().is(AsmToken::EndOfStatement))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected value identifier or .end_amd_kernel_code_t");
+
+ StringRef ID = getLexer().getTok().getIdentifier();
+ Lex();
+
+ if (ID == ".end_amd_kernel_code_t")
+ break;
+
+ if (ParseAMDKernelCodeTValue(ID, Header))
+ return true;
+ }
+
+ getTargetStreamer().EmitAMDKernelCodeT(Header);
+
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getString();
+
+ if (IDVal == ".hsa_code_object_version")
+ return ParseDirectiveHSACodeObjectVersion();
+
+ if (IDVal == ".hsa_code_object_isa")
+ return ParseDirectiveHSACodeObjectISA();
+
+ if (IDVal == ".amd_kernel_code_t")
+ return ParseDirectiveAMDKernelCodeT();
+
return true;
}
diff --git a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
index 63d44d1e06f1..dab0c6f585af 100644
--- a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = AMDGPUAsmParser
parent = AMDGPU
-required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo Support
+required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo AMDGPUUtils Support
add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 3e5ff1f3c6d4..9460bf6b9338 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -62,3 +62,4 @@ add_subdirectory(AsmParser)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
+add_subdirectory(Utils)
diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt
index c6861df91ed6..38c5489586f1 100644
--- a/lib/Target/AMDGPU/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo Utils
[component_0]
type = TargetGroup
@@ -29,5 +29,5 @@ has_asmprinter = 1
type = Library
name = AMDGPUCodeGen
parent = AMDGPU
-required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils
add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 8bed2deef4cd..468563c44982 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -127,11 +127,14 @@ bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+ bool Is64Bit;
+
public:
- ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+ ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) :
+ AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { }
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
- return createAMDGPUELFObjectWriter(OS);
+ return createAMDGPUELFObjectWriter(Is64Bit, OS);
}
};
@@ -140,5 +143,8 @@ public:
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU) {
- return new ELFAMDGPUAsmBackend(T);
+ Triple TargetTriple(TT);
+
+ // Use 64-bit ELF for amdgcn
+ return new ELFAMDGPUAsmBackend(T, TargetTriple.getArch() == Triple::amdgcn);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 59f45ff02d88..820f17df8960 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -18,7 +18,7 @@ namespace {
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
public:
- AMDGPUELFObjectWriter();
+ AMDGPUELFObjectWriter(bool Is64Bit);
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override {
@@ -30,10 +30,11 @@ protected:
} // End anonymous namespace
-AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
- : MCELFObjectTargetWriter(false, 0, 0, false) { }
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit)
+ : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA,
+ ELF::EM_AMDGPU, false) { }
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
- MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) {
+ MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit);
return createELFObjectWriter(MOTW, OS, true);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index fa3b3c3d9489..01021d67ffd9 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-} // namespace AMDGPU
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index a7d3dd1345f9..7172e4bb9335 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -14,6 +14,7 @@
#include "AMDGPUMCTargetDesc.h"
#include "AMDGPUMCAsmInfo.h"
+#include "AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
@@ -72,6 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
+static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new AMDGPUTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer * createAMDGPUObjectTargetStreamer(
+ MCStreamer &S,
+ const MCSubtargetInfo &STI) {
+ return new AMDGPUTargetELFStreamer(S);
+}
+
extern "C" void LLVMInitializeAMDGPUTargetMC() {
for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
@@ -84,7 +98,15 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
}
+ // R600 specific registration
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
createR600MCCodeEmitter);
+
+ // GCN specific registration
TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
+
+ TargetRegistry::RegisterAsmTargetStreamer(TheGCNTarget,
+ createAMDGPUAsmTargetStreamer);
+ TargetRegistry::RegisterObjectTargetStreamer(TheGCNTarget,
+ createAMDGPUObjectTargetStreamer);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index ac611b862a1a..5d1b86b8c0c2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -46,8 +46,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU);
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
-} // namespace llvm
+MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
+ raw_pwrite_stream &OS);
+} // End llvm namespace
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
new file mode 100644
index 000000000000..09e6cb1f1ffc
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -0,0 +1,297 @@
+//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AMDGPU specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S) { }
+
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetAsmStreamer
+//===----------------------------------------------------------------------===//
+
+AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS)
+ : AMDGPUTargetStreamer(S), OS(OS) { }
+
+void
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) {
+ OS << "\t.hsa_code_object_version " <<
+ Twine(Major) << "," << Twine(Minor) << '\n';
+}
+
+void
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
+ uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) {
+ OS << "\t.hsa_code_object_isa " <<
+ Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
+ ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
+
+}
+
+void
+AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+ uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
+ bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ bool EnableSGPRDispatchPtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ bool EnableSGPRQueuePtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ bool EnableSGPRDispatchID = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ bool EnableSGPRFlatScratchInit = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
+ bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
+ bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
+ bool EnableOrderedAppendGDS = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
+ uint32_t PrivateElementSize = (Header.code_properties &
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
+ bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
+ bool IsDynamicCallstack = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
+ bool IsDebugEnabled = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
+ bool IsXNackEnabled = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
+
+ OS << "\t.amd_kernel_code_t\n" <<
+ "\t\tkernel_code_version_major = " <<
+ Header.amd_kernel_code_version_major << '\n' <<
+ "\t\tkernel_code_version_minor = " <<
+ Header.amd_kernel_code_version_minor << '\n' <<
+ "\t\tmachine_kind = " <<
+ Header.amd_machine_kind << '\n' <<
+ "\t\tmachine_version_major = " <<
+ Header.amd_machine_version_major << '\n' <<
+ "\t\tmachine_version_minor = " <<
+ Header.amd_machine_version_minor << '\n' <<
+ "\t\tmachine_version_stepping = " <<
+ Header.amd_machine_version_stepping << '\n' <<
+ "\t\tkernel_code_entry_byte_offset = " <<
+ Header.kernel_code_entry_byte_offset << '\n' <<
+ "\t\tkernel_code_prefetch_byte_size = " <<
+ Header.kernel_code_prefetch_byte_size << '\n' <<
+ "\t\tmax_scratch_backing_memory_byte_size = " <<
+ Header.max_scratch_backing_memory_byte_size << '\n' <<
+ "\t\tcompute_pgm_rsrc1_vgprs = " <<
+ G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_sgprs = " <<
+ G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_priority = " <<
+ G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_float_mode = " <<
+ G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_priv = " <<
+ G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
+ G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_debug_mode = " <<
+ G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
+ G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_scratch_en = " <<
+ G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
+ G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
+ G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
+ G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
+ G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
+ G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
+ G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
+ G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_lds_size = " <<
+ G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_excp_en = " <<
+ G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
+
+ "\t\tenable_sgpr_private_segment_buffer = " <<
+ EnableSGPRPrivateSegmentBuffer << '\n' <<
+ "\t\tenable_sgpr_dispatch_ptr = " <<
+ EnableSGPRDispatchPtr << '\n' <<
+ "\t\tenable_sgpr_queue_ptr = " <<
+ EnableSGPRQueuePtr << '\n' <<
+ "\t\tenable_sgpr_kernarg_segment_ptr = " <<
+ EnableSGPRKernargSegmentPtr << '\n' <<
+ "\t\tenable_sgpr_dispatch_id = " <<
+ EnableSGPRDispatchID << '\n' <<
+ "\t\tenable_sgpr_flat_scratch_init = " <<
+ EnableSGPRFlatScratchInit << '\n' <<
+ "\t\tenable_sgpr_private_segment_size = " <<
+ EnableSGPRPrivateSegmentSize << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_x = " <<
+ EnableSGPRGridWorkgroupCountX << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_y = " <<
+ EnableSGPRGridWorkgroupCountY << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_z = " <<
+ EnableSGPRGridWorkgroupCountZ << '\n' <<
+ "\t\tenable_ordered_append_gds = " <<
+ EnableOrderedAppendGDS << '\n' <<
+ "\t\tprivate_element_size = " <<
+ PrivateElementSize << '\n' <<
+ "\t\tis_ptr64 = " <<
+ IsPtr64 << '\n' <<
+ "\t\tis_dynamic_callstack = " <<
+ IsDynamicCallstack << '\n' <<
+ "\t\tis_debug_enabled = " <<
+ IsDebugEnabled << '\n' <<
+ "\t\tis_xnack_enabled = " <<
+ IsXNackEnabled << '\n' <<
+ "\t\tworkitem_private_segment_byte_size = " <<
+ Header.workitem_private_segment_byte_size << '\n' <<
+ "\t\tworkgroup_group_segment_byte_size = " <<
+ Header.workgroup_group_segment_byte_size << '\n' <<
+ "\t\tgds_segment_byte_size = " <<
+ Header.gds_segment_byte_size << '\n' <<
+ "\t\tkernarg_segment_byte_size = " <<
+ Header.kernarg_segment_byte_size << '\n' <<
+ "\t\tworkgroup_fbarrier_count = " <<
+ Header.workgroup_fbarrier_count << '\n' <<
+ "\t\twavefront_sgpr_count = " <<
+ Header.wavefront_sgpr_count << '\n' <<
+ "\t\tworkitem_vgpr_count = " <<
+ Header.workitem_vgpr_count << '\n' <<
+ "\t\treserved_vgpr_first = " <<
+ Header.reserved_vgpr_first << '\n' <<
+ "\t\treserved_vgpr_count = " <<
+ Header.reserved_vgpr_count << '\n' <<
+ "\t\treserved_sgpr_first = " <<
+ Header.reserved_sgpr_first << '\n' <<
+ "\t\treserved_sgpr_count = " <<
+ Header.reserved_sgpr_count << '\n' <<
+ "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
+ Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
+ "\t\tdebug_private_segment_buffer_sgpr = " <<
+ Header.debug_private_segment_buffer_sgpr << '\n' <<
+ "\t\tkernarg_segment_alignment = " <<
+ (uint32_t)Header.kernarg_segment_alignment << '\n' <<
+ "\t\tgroup_segment_alignment = " <<
+ (uint32_t)Header.group_segment_alignment << '\n' <<
+ "\t\tprivate_segment_alignment = " <<
+ (uint32_t)Header.private_segment_alignment << '\n' <<
+ "\t\twavefront_size = " <<
+ (uint32_t)Header.wavefront_size << '\n' <<
+ "\t\tcall_convention = " <<
+ Header.call_convention << '\n' <<
+ "\t\truntime_loader_kernel_symbol = " <<
+ Header.runtime_loader_kernel_symbol << '\n' <<
+ // TODO: control_directives
+ "\t.end_amd_kernel_code_t\n";
+
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetELFStreamer
+//===----------------------------------------------------------------------===//
+
+AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
+ : AMDGPUTargetStreamer(S), Streamer(S) { }
+
+MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+}
+
+void
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) {
+ MCStreamer &OS = getStreamer();
+ MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
+
+ unsigned NameSZ = 4;
+
+ OS.PushSection();
+ OS.SwitchSection(Note);
+ OS.EmitIntValue(NameSZ, 4); // namesz
+ OS.EmitIntValue(8, 4); // descz
+ OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
+ OS.EmitBytes(StringRef("AMD", NameSZ)); // name
+ OS.EmitIntValue(Major, 4); // desc
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitValueToAlignment(4);
+ OS.PopSection();
+}
+
+void
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
+ uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) {
+ MCStreamer &OS = getStreamer();
+ MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
+
+ unsigned NameSZ = 4;
+ uint16_t VendorNameSize = VendorName.size() + 1;
+ uint16_t ArchNameSize = ArchName.size() + 1;
+ unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
+ sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
+ VendorNameSize + ArchNameSize;
+
+ OS.PushSection();
+ OS.SwitchSection(Note);
+ OS.EmitIntValue(NameSZ, 4); // namesz
+ OS.EmitIntValue(DescSZ, 4); // descsz
+ OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type
+ OS.EmitBytes(StringRef("AMD", 4)); // name
+ OS.EmitIntValue(VendorNameSize, 2); // desc
+ OS.EmitIntValue(ArchNameSize, 2);
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitIntValue(Stepping, 4);
+ OS.EmitBytes(VendorName);
+ OS.EmitIntValue(0, 1); // NULL terminate VendorName
+ OS.EmitBytes(ArchName);
+ OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ OS.EmitValueToAlignment(4);
+ OS.PopSection();
+}
+
+void
+AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+
+ MCStreamer &OS = getStreamer();
+ OS.PushSection();
+ OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+ OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ OS.PopSection();
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
new file mode 100644
index 000000000000..d37677c6b863
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -0,0 +1,77 @@
+//===-- AMDGPUTargetStreamer.h - AMDGPU Target Streamer --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDKernelCodeT.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+namespace llvm {
+
+class MCELFStreamer;
+
+class AMDGPUTargetStreamer : public MCTargetStreamer {
+public:
+ AMDGPUTargetStreamer(MCStreamer &S);
+ virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) = 0;
+
+ virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) = 0;
+
+ virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
+};
+
+class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+ formatted_raw_ostream &OS;
+public:
+ AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+ void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) override;
+
+ void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping, StringRef VendorName,
+ StringRef ArchName) override;
+
+ void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+};
+
+class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+
+ enum NoteType {
+ NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
+ NT_AMDGPU_HSA_HSAIL = 2,
+ NT_AMDGPU_HSA_ISA = 3,
+ NT_AMDGPU_HSA_PRODUCER = 4,
+ NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
+ NT_AMDGPU_HSA_EXTENSION = 6,
+ NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
+ NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
+ };
+
+ MCStreamer &Streamer;
+
+public:
+ AMDGPUTargetELFStreamer(MCStreamer &S);
+
+ MCELFStreamer &getStreamer();
+
+ void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) override;
+
+ void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping, StringRef VendorName,
+ StringRef ArchName) override;
+
+ void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
+};
+
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 151d0d5f83de..8306a051ff98 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMAMDGPUDesc
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUMCAsmInfo.cpp
+ AMDGPUTargetStreamer.cpp
R600MCCodeEmitter.cpp
SIMCCodeEmitter.cpp
)
diff --git a/lib/Target/AMDGPU/Makefile b/lib/Target/AMDGPU/Makefile
index 2e2de5020867..219f34daa24f 100644
--- a/lib/Target/AMDGPU/Makefile
+++ b/lib/Target/AMDGPU/Makefile
@@ -18,6 +18,6 @@ BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
AMDGPUGenAsmWriter.inc AMDGPUGenAsmMatcher.inc
-DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc Utils
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index c0ffede51999..69efb8b8bc43 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -104,7 +104,7 @@ def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
//===----------------------------------------------------------------------===//
def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount32]
+ [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
>;
def : ProcessorModel<"kabini", SIQuarterSpeedModel,
@@ -112,11 +112,12 @@ def : ProcessorModel<"kabini", SIQuarterSpeedModel,
>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount32]
+ [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
>;
def : ProcessorModel<"hawaii", SIFullSpeedModel,
- [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+ [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32,
+ FeatureISAVersion7_0_1]
>;
def : ProcessorModel<"mullins", SIQuarterSpeedModel,
@@ -127,11 +128,13 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel,
//===----------------------------------------------------------------------===//
def : ProcessorModel<"tonga", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug]
+ [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
>;
def : ProcessorModel<"iceland", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug]
+ [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
>;
-def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+>;
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index 6ff0a2204cfa..51d87eda31d1 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -48,7 +48,7 @@ namespace R600_InstFlag {
IS_EXPORT = (1 << 17),
LDS_1A2D = (1 << 18)
};
-} // namespace R600_InstFlag
+}
#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
@@ -138,7 +138,7 @@ namespace OpName {
VEC_COUNT
};
-} // namespace OpName
+}
//===----------------------------------------------------------------------===//
// Config register definitions
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index c25287806988..c06d3c4fd309 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -75,6 +75,6 @@ private:
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
};
-} // namespace llvm
+} // End namespace llvm;
#endif
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 5ef883cbcadd..855fa9fe45b2 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -697,15 +697,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
+
// AMDGPU::BRANCH* instructions are only available after isel and are not
// handled
if (isBranch(I->getOpcode()))
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index 9c5f76c882f1..dee4c2b9ae31 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -298,6 +298,6 @@ int getLDSNoRetOp(uint16_t Opcode);
} //End namespace AMDGPU
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index f5556c1e81fc..263561edd30d 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -29,6 +29,6 @@ public:
unsigned StackSize;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index a1a1b4043429..0c06ccc736d0 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -375,7 +375,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
return false;
}
-} // namespace
+}
llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
return new R600VectorRegMerger(tm);
diff --git a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
index 93bcf680a022..2fc7b02f673f 100644
--- a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
@@ -296,7 +296,7 @@ public:
char R600TextureIntrinsicsReplacer::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
return new R600TextureIntrinsicsReplacer();
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index f1b4ba1ac07d..4c3263911c40 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -39,7 +39,7 @@ enum {
WQM = 1 << 20,
VGPRSpill = 1 << 21
};
-} // namespace SIInstrFlags
+}
namespace llvm {
namespace AMDGPU {
@@ -74,7 +74,7 @@ namespace SIInstrFlags {
P_NORMAL = 1 << 8, // Positive normal
P_INFINITY = 1 << 9 // Positive infinity
};
-} // namespace SIInstrFlags
+}
namespace SISrcMods {
enum {
@@ -100,16 +100,41 @@ namespace SIOutMods {
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
+
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
+#define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1)
+#define C_00B84C_SCRATCH_EN 0xFFFFFFFE
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
+#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
+#define C_00B84C_USER_SGPR 0xFFFFFFC1
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
+#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
+#define C_00B84C_TGID_X_EN 0xFFFFFF7F
#define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
+#define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1)
+#define C_00B84C_TGID_Y_EN 0xFFFFFEFF
#define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
+#define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1)
+#define C_00B84C_TGID_Z_EN 0xFFFFFDFF
#define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
+#define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1)
+#define C_00B84C_TG_SIZE_EN 0xFFFFFBFF
#define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
-
+#define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03)
+#define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF
+/* CIK */
+#define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13)
+#define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03)
+#define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF
+/* */
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
+#define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF)
+#define C_00B84C_LDS_SIZE 0xFF007FFF
+#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
+#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
+#define C_00B84C_EXCP_EN
+
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 12d08cf4c7f5..ead1a3743473 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -583,7 +583,8 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = Splits[i].VT;
- const unsigned Offset = 36 + VA.getLocMemOffset();
+ const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+ VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
@@ -2211,8 +2212,9 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ const std::string &Constraint_,
MVT VT) const {
+ StringRef Constraint(Constraint_);
if (Constraint == "r") {
switch(VT.SimpleTy) {
default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
@@ -2232,8 +2234,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
if (RC) {
- unsigned Idx = std::atoi(Constraint.substr(2).c_str());
- if (Idx < RC->getNumRegs())
+ uint32_t Idx;
+ bool Failed = Constraint.substr(2).getAsInteger(10, Idx);
+ if (!Failed && Idx < RC->getNumRegs())
return std::make_pair(RC->getRegister(Idx), RC);
}
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 47bc17823b3f..eb96bd0227b2 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode();
int NewOpc;
// Try to map original to commuted opcode
NewOpc = AMDGPU::getCommuteRev(Opcode);
- // Check if the commuted (REV) opcode exists on the target.
- if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
- return NewOpc;
+ if (NewOpc != -1)
+ // Check if the commuted (REV) opcode exists on the target.
+ return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
// Try to map commuted to original opcode
NewOpc = AMDGPU::getCommuteOrig(Opcode);
- // Check if the original (non-REV) opcode exists on the target.
- if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
- return NewOpc;
+ if (NewOpc != -1)
+ // Check if the original (non-REV) opcode exists on the target.
+ return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
return Opcode;
}
@@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
if (MI->getNumOperands() < 3)
return nullptr;
+ int CommutedOpcode = commuteOpcode(*MI);
+ if (CommutedOpcode == -1)
+ return nullptr;
+
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
assert(Src0Idx != -1 && "Should always have src0 operand");
@@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
}
if (MI)
- MI->setDesc(get(commuteOpcode(*MI)));
+ MI->setDesc(get(CommutedOpcode));
return MI;
}
@@ -2716,8 +2720,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
- if (ST.isAmdHsaOS())
+ if (ST.isAmdHsaOS()) {
RsrcDataFormat |= (1ULL << 56);
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ // Set MTYPE = 2
+ RsrcDataFormat |= (2ULL << 59);
+ }
+
return RsrcDataFormat;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 6fafb945c993..0382272068d2 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -117,7 +117,7 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(const MachineInstr &MI) const;
+ int commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 93e4ca74ec38..fcb58d5da3b0 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1740,7 +1740,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
ClampMod:$clamp,
omod:$omod),
- " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
+ "$dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
new file mode 100644
index 000000000000..b76b4007003f
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -0,0 +1,60 @@
+//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUBaseInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
+
+namespace llvm {
+namespace AMDGPU {
+
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+
+ if (Features.test(FeatureISAVersion7_0_0))
+ return {7, 0, 0};
+
+ if (Features.test(FeatureISAVersion7_0_1))
+ return {7, 0, 1};
+
+ if (Features.test(FeatureISAVersion8_0_0))
+ return {8, 0, 0};
+
+ if (Features.test(FeatureISAVersion8_0_1))
+ return {8, 0, 1};
+
+ return {0, 0, 0};
+}
+
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const FeatureBitset &Features) {
+
+ IsaVersion ISA = getIsaVersion(Features);
+
+ memset(&Header, 0, sizeof(Header));
+
+ Header.amd_kernel_code_version_major = 1;
+ Header.amd_kernel_code_version_minor = 0;
+ Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+ Header.amd_machine_version_major = ISA.Major;
+ Header.amd_machine_version_minor = ISA.Minor;
+ Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.kernel_code_entry_byte_offset = sizeof(Header);
+ // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
+ Header.wavefront_size = 6;
+ // These alignment values are specified in powers of two, so alignment =
+ // 2^n. The minimum alignment is 2^4 = 16.
+ Header.kernarg_segment_alignment = 4;
+ Header.group_segment_alignment = 4;
+ Header.private_segment_alignment = 4;
+}
+
+} // End namespace AMDGPU
+} // End namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
new file mode 100644
index 000000000000..f57028cc5bfd
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -0,0 +1,34 @@
+//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+
+#include "AMDKernelCodeT.h"
+
+namespace llvm {
+
+class FeatureBitset;
+
+namespace AMDGPU {
+
+struct IsaVersion {
+ unsigned Major;
+ unsigned Minor;
+ unsigned Stepping;
+};
+
+IsaVersion getIsaVersion(const FeatureBitset &Features);
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const FeatureBitset &Features);
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AMDGPU/Utils/CMakeLists.txt b/lib/Target/AMDGPU/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..2c07aeab7dd3
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMAMDGPUUtils
+ AMDGPUBaseInfo.cpp
+ )
diff --git a/lib/Target/AMDGPU/Utils/LLVMBuild.txt b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..dec5360e3bc7
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDGPU/Utils/LLVMBuild.txt ------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDGPUUtils
+parent = AMDGPU
+required_libraries = Support
+add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/Utils/Makefile b/lib/Target/AMDGPU/Utils/Makefile
new file mode 100644
index 000000000000..1019e726d50e
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/Utils/Makefile --------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDGPUUtils
+
+# Hack: we need to include 'main' AMDGPU target directory to grab private
+# headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index d554fe5d4465..9550a3a3cad1 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -46,6 +46,6 @@ FunctionPass *createThumb2SizeReductionPass(
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index c7ea18a17fef..96b4742da2bb 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -410,13 +410,13 @@ def : ProcessorModel<"cortex-r4", CortexA8Model,
def : ProcessorModel<"cortex-r4f", CortexA8Model,
[ProcR4,
FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+ FeatureVFP3, FeatureD16]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model,
[ProcR5, HasV7Ops, FeatureDB,
FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS, FeatureVFPOnlySP,
+ FeatureHasRAS,
FeatureD16, FeatureRClass]>;
// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 4530e4155ae2..738ddedccdac 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() {
} else if (STI.hasVFP4())
ATS.emitFPU(ARM::FK_NEON_VFPV4);
else
- ATS.emitFPU(ARM::FK_NEON);
+ ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
if (STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
@@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() {
? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
: ARM::FK_VFPV4);
else if (STI.hasVFP3())
- ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
+ ATS.emitFPU(STI.hasD16()
+ // +d16
+ ? (STI.isFPOnlySP()
+ ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
+ : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
+ // -d16
+ : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
else if (STI.hasVFP2())
ATS.emitFPU(ARM::FK_VFPV2);
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f2b7a6419be3..b1a11d626bda 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -367,14 +367,10 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
@@ -594,7 +590,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
// all definitions of CPSR are dead
return true;
}
-} // namespace llvm
+}
/// GetInstSize - Return the size of the specified MachineInstr.
///
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 6fc0edd101b9..b4706e348933 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -493,6 +493,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII);
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 2edb96adba42..d687568d7eb9 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -281,6 +281,6 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
return true;
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index cb4eeb5fc43d..f4ec8c67c977 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -335,7 +335,7 @@ namespace {
}
};
char ARMConstantIslands::ID = 0;
-} // namespace
+}
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index b429bed9ff25..36f63e239a9e 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -44,7 +44,7 @@ namespace ARMCP {
GOTTPOFF,
TPOFF
};
-} // namespace ARMCP
+}
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
/// represent PC-relative displacement between the address of the load
@@ -254,6 +254,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 963b46c98e00..4438f50758dc 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -69,7 +69,7 @@ namespace {
MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
-} // namespace
+}
/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
/// the instructions created from the expansion.
@@ -129,7 +129,7 @@ namespace {
return PseudoOpc < TE.PseudoOpc;
}
};
-} // namespace
+}
static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index cead18f97d74..4175b4af86e6 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2898,7 +2898,7 @@ const struct FoldableLoadExtendsStruct {
{ { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
{ { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
};
-} // namespace
+}
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index 5b4a44c72030..0c910ab6130f 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -92,6 +92,6 @@ inline bool isV8EligibleForIT(InstrType *Instr) {
}
}
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 091086d3c429..a52e49780e27 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -221,7 +221,7 @@ struct StackAdjustingInsts {
}
}
};
-} // namespace
+}
/// Emit an instruction sequence that will align the address in
/// register Reg by zero-ing out the lower bits. For versions of the
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 98313e60e234..d763d17a506f 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -78,6 +78,6 @@ public:
MachineBasicBlock::iterator MI) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 575a9d930675..50afb192b331 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -279,7 +279,7 @@ private:
SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
bool is64BitVector);
};
-} // namespace
+}
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 94a026bf2cc8..4b2105b7442f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -83,7 +83,7 @@ namespace {
CallOrPrologue = PC;
}
};
-} // namespace
+}
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
@@ -11404,6 +11404,167 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Addr});
}
+/// \brief Lower an interleaved load into a vldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
+/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
+///
+/// Into:
+/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
+/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
+/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
+bool ARMTargetLowering::lowerInterleavedLoad(
+ LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices, unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+ assert(!Shuffles.empty() && "Empty shufflevector input");
+ assert(Shuffles.size() == Indices.size() &&
+ "Unmatched number of shufflevectors and indices");
+
+ VectorType *VecTy = Shuffles[0]->getType();
+ Type *EltTy = VecTy->getVectorElementType();
+
+ const DataLayout *DL = getDataLayout();
+ unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+ bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+
+ // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
+ // support i64/f64 element).
+ if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ return false;
+
+ // A pointer vector can not be the return type of the ldN intrinsics. Need to
+ // load integer vectors first and then convert to pointer vectors.
+ if (EltTy->isPointerTy())
+ VecTy = VectorType::get(DL->getIntPtrType(EltTy),
+ VecTy->getVectorNumElements());
+
+ static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
+ Intrinsic::arm_neon_vld3,
+ Intrinsic::arm_neon_vld4};
+
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
+
+ IRBuilder<> Builder(LI);
+ SmallVector<Value *, 2> Ops;
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+ Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
+ Ops.push_back(Builder.getInt32(LI->getAlignment()));
+
+ CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SV = Shuffles[i];
+ unsigned Index = Indices[i];
+
+ Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+
+ SV->replaceAllUsesWith(SubVec);
+ }
+
+ return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned NumElts) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < NumElts; i++)
+ Mask.push_back(Builder.getInt32(Start + i));
+
+ return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a vstN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
+///
+/// Into:
+/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// vst3 instruction in CodeGen.
+bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
+ ShuffleVectorInst *SVI,
+ unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+
+ VectorType *VecTy = SVI->getType();
+ assert(VecTy->getVectorNumElements() % Factor == 0 &&
+ "Invalid interleaved store");
+
+ unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+ Type *EltTy = VecTy->getVectorElementType();
+ VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+ const DataLayout *DL = getDataLayout();
+ unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+
+ // Skip illegal sub vector types and vector types of i64/f64 element (vstN
+ // doesn't support i64/f64 element).
+ if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+ return false;
+
+ Value *Op0 = SVI->getOperand(0);
+ Value *Op1 = SVI->getOperand(1);
+ IRBuilder<> Builder(SI);
+
+ // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+ // vectors to integer vectors.
+ if (EltTy->isPointerTy()) {
+ Type *IntTy = DL->getIntPtrType(EltTy);
+
+ // Convert to the corresponding integer vector.
+ Type *IntVecTy =
+ VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
+ Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+ Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+ SubVecTy = VectorType::get(IntTy, NumSubElts);
+ }
+
+ static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
+ Function *VstNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), StoreInts[Factor - 2], SubVecTy);
+
+ SmallVector<Value *, 6> Ops;
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+ Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+
+ // Split the shufflevector operands into sub vectors for the new vstN call.
+ for (unsigned i = 0; i < Factor; i++)
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+ Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Builder.CreateCall(VstNFunc, Ops);
+ return true;
+}
+
enum HABaseType {
HA_UNKNOWN = 0,
HA_FLOAT,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 71a47a2cb81b..74396392f8e3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -215,7 +215,7 @@ namespace llvm {
VST3LN_UPD,
VST4LN_UPD
};
- } // namespace ARMISD
+ }
/// Define some predicates that are used for node matching.
namespace ARM {
@@ -433,6 +433,15 @@ namespace llvm {
Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
bool IsStore, bool IsLoad) const override;
+ unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+ bool lowerInterleavedLoad(LoadInst *LI,
+ ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices,
+ unsigned Factor) const override;
+ bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ unsigned Factor) const override;
+
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicRMWExpansionKind
@@ -638,6 +647,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-} // namespace llvm
+}
#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 59e1535a6fe6..84f95be30991 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -198,7 +198,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char ARMCGBR::ID = 0;
FunctionPass*
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 9e5700a256bd..90f34ea08401 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -43,6 +43,6 @@ private:
Reloc::Model RM) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 50e2292b8b6e..245c9e869bf6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -142,7 +142,7 @@ namespace {
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
char ARMLoadStoreOpt::ID = 0;
-} // namespace
+}
static bool definesCPSR(const MachineInstr *MI) {
for (const auto &MO : MI->operands()) {
@@ -444,7 +444,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
return;
}
- if (MBBI->killsRegister(Base))
+ if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
// Register got killed. Stop updating.
return;
}
@@ -743,6 +743,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
}
}
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
+ if (TransferOp.isUse() && TransferOp.getReg() == Base)
+ BaseKill = false;
+ }
+
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 8> ImpDefs;
SmallVector<MachineOperand *, 8> UsesOfImpDefs;
@@ -1464,119 +1470,124 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode();
- if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
- const MachineOperand &BaseOp = MI->getOperand(2);
- unsigned BaseReg = BaseOp.getReg();
- unsigned EvenReg = MI->getOperand(0).getReg();
- unsigned OddReg = MI->getOperand(1).getReg();
- unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
- unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
- // ARM errata 602117: LDRD with base in list may result in incorrect base
- // register when interrupted or faulted.
- bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
- if (!Errata602117 &&
- ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
- return false;
+ if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
+ return false;
- MachineBasicBlock::iterator NewBBI = MBBI;
- bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
- bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
- bool EvenDeadKill = isLd ?
- MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
- bool EvenUndef = MI->getOperand(0).isUndef();
- bool OddDeadKill = isLd ?
- MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
- bool OddUndef = MI->getOperand(1).isUndef();
- bool BaseKill = BaseOp.isKill();
- bool BaseUndef = BaseOp.isUndef();
- bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
- bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
- int OffImm = getMemoryOpOffset(MI);
- unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
-
- if (OddRegNum > EvenRegNum && OffImm == 0) {
- // Ascending register numbers and no offset. It's safe to change it to a
- // ldm or stm.
- unsigned NewOpc = (isLd)
- ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
- : (isT2 ? ARM::t2STMIA : ARM::STMIA);
- if (isLd) {
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
- .addReg(BaseReg, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg)
- .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
- .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
- ++NumLDRD2LDM;
- } else {
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
- .addReg(BaseReg, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg)
- .addReg(EvenReg,
- getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
- .addReg(OddReg,
- getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
- ++NumSTRD2STM;
- }
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ bool Errata602117 = EvenReg == BaseReg &&
+ (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
+ // ARM LDRD/STRD needs consecutive registers.
+ bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
+ (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
+
+ if (!Errata602117 && !NonConsecutiveRegs)
+ return false;
+
+ MachineBasicBlock::iterator NewBBI = MBBI;
+ bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+ bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool EvenUndef = MI->getOperand(0).isUndef();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ bool OddUndef = MI->getOperand(1).isUndef();
+ bool BaseKill = BaseOp.isKill();
+ bool BaseUndef = BaseOp.isUndef();
+ bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+ bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg,
+ getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+ .addReg(OddReg,
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ ++NumSTRD2STM;
+ }
+ NewBBI = std::prev(MBBI);
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (isLd)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+ // so adjust and use t2LDRi12 here for that.
+ unsigned NewOpc2 = (isLd)
+ ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+ OddReg, OddDeadKill, false,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
NewBBI = std::prev(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, false,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
} else {
- // Split into two instructions.
- unsigned NewOpc = (isLd)
- ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
- : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
- // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
- // so adjust and use t2LDRi12 here for that.
- unsigned NewOpc2 = (isLd)
- ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
- : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
- DebugLoc dl = MBBI->getDebugLoc();
- // If this is a load and base register is killed, it may have been
- // re-defed by the load, make sure the first load does not clobber it.
- if (isLd &&
- (BaseKill || OffKill) &&
- (TRI->regsOverlap(EvenReg, BaseReg))) {
- assert(!TRI->regsOverlap(OddReg, BaseReg));
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
- OddReg, OddDeadKill, false,
- BaseReg, false, BaseUndef, false, OffUndef,
- Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
- InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
- EvenReg, EvenDeadKill, false,
- BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
- Pred, PredReg, TII, isT2);
- } else {
- if (OddReg == EvenReg && EvenDeadKill) {
- // If the two source operands are the same, the kill marker is
- // probably on the first one. e.g.
- // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
- EvenDeadKill = false;
- OddDeadKill = true;
- }
- // Never kill the base register in the first instruction.
- if (EvenReg == BaseReg)
- EvenDeadKill = false;
- InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
- EvenReg, EvenDeadKill, EvenUndef,
- BaseReg, false, BaseUndef, false, OffUndef,
- Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
- OddReg, OddDeadKill, OddUndef,
- BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
- Pred, PredReg, TII, isT2);
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
}
- if (isLd)
- ++NumLDRD2LDR;
- else
- ++NumSTRD2STR;
+ // Never kill the base register in the first instruction.
+ if (EvenReg == BaseReg)
+ EvenDeadKill = false;
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, EvenUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
+ Pred, PredReg, TII, isT2);
+ NewBBI = std::prev(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+ OddReg, OddDeadKill, OddUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+ Pred, PredReg, TII, isT2);
}
-
- MBB.erase(MI);
- MBBI = NewBBI;
- return true;
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
}
- return false;
+
+ MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
}
/// An optimization pass to turn multiple LDR / STR ops of the same base and
@@ -1859,7 +1870,7 @@ namespace {
bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
};
char ARMPreAllocLoadStoreOpt::ID = 0;
-} // namespace
+}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TD = Fn.getTarget().getDataLayout();
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 8b1210268eb2..14dd9ef333af 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -229,6 +229,6 @@ public:
return It;
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 1c8e1f8b1412..30baf4263c11 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -32,7 +32,7 @@ public:
}
};
char ARMOptimizeBarriersPass::ID = 0;
-} // namespace
+}
// Returns whether the instruction can safely move past a DMB instruction
// The current implementation allows this iif MI does not have any possible
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 4563caae9ffe..1db190f41e1a 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -70,6 +70,6 @@ public:
RTLIB::Libcall LC) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f00594f82012..9909a6a6d198 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -453,6 +453,6 @@ public:
/// True if fast-isel is used.
bool useFastISel() const;
};
-} // namespace llvm
+} // End llvm namespace
#endif // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 104a34f97e5e..6e81bd2d349d 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -332,6 +332,10 @@ void ARMPassConfig::addIRPasses() {
}));
TargetPassConfig::addIRPasses();
+
+ // Match interleaved memory accesses to ldN/stN intrinsics.
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createInterleavedAccessPass(TM));
}
bool ARMPassConfig::addPreISel() {
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 4e1b371640bc..f4901fc24e44 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -478,3 +478,28 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
return LT.first;
}
+
+unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(Factor >= 2 && "Invalid interleave factor");
+ assert(isa<VectorType>(VecTy) && "Expect a vector type");
+
+ // vldN/vstN doesn't support vector types of i64/f64 element.
+ bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+
+ if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+ unsigned NumElts = VecTy->getVectorNumElements();
+ Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+ unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+
+ // vldN/vstN only support legal vector types of size 64 or 128 in bits.
+ if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+ return Factor;
+ }
+
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 9479d7693ebf..f2e5db655ccf 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -126,6 +126,11 @@ public:
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 35387d3e6cf1..c2db74619871 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -9887,22 +9888,13 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
}
Lex();
+ MCSymbol *Sym;
const MCExpr *Value;
- if (Parser.parseExpression(Value)) {
- TokError("missing expression");
- Parser.eatToEndOfStatement();
- return false;
- }
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- TokError("unexpected token");
- Parser.eatToEndOfStatement();
- return false;
- }
- Lex();
+ if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true,
+ Parser, Sym, Value))
+ return true;
- MCSymbol *Alias = getContext().getOrCreateSymbol(Name);
- getTargetStreamer().emitThumbSet(Alias, Value);
+ getTargetStreamer().emitThumbSet(Sym, Value);
return false;
}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f973a8de8bcf..097ec04e7052 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -81,7 +81,7 @@ namespace {
private:
std::vector<unsigned char> ITStates;
};
-} // namespace
+}
namespace {
/// ARM disassembler for all ARM platforms.
@@ -118,7 +118,7 @@ private:
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(MCInst&) const;
};
-} // namespace
+}
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index e28f6e097421..a6206e3d9585 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -29,6 +29,6 @@ public:
Subtype);
}
};
-} // namespace
+}
#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 412feb8873ca..68b12edd089e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -23,6 +23,6 @@ public:
return createARMELFObjectWriter(OS, OSABI, isLittle());
}
};
-} // namespace
+}
#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 1975bcaa234e..4289a73e9d6b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -114,7 +114,7 @@ namespace ARM_PROC {
case ID: return "id";
}
}
-} // namespace ARM_PROC
+}
namespace ARM_MB {
// The Memory Barrier Option constants map directly to the 4-bit encoding of
@@ -459,6 +459,6 @@ namespace ARMII {
} // end namespace ARMII
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 9fe27fbcff4a..804d3534096a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -40,7 +40,7 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
};
-} // namespace
+}
ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index bbc0b37175df..4d12bfb5d60f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -563,20 +563,13 @@ private:
}
void EmitMappingSymbol(StringRef Name) {
- MCSymbol *Start = getContext().createTempSymbol();
- EmitLabel(Start);
-
auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++)));
+ EmitLabel(Symbol);
- getAssembler().registerSymbol(*Symbol);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
Symbol->setExternal(false);
- AssignSection(Symbol, getCurrentSection().first);
-
- const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
- Symbol->setVariableValue(Value);
}
void EmitThumbFunc(MCSymbol *Func) override {
@@ -804,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
/* OverwriteExisting= */ false);
break;
+ case ARM::FK_VFPV3_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
case ARM::FK_VFPV3_D16:
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPv3B,
/* OverwriteExisting= */ false);
break;
+ case ARM::FK_VFPV3_D16_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FK_VFPV3XD:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ break;
+ case ARM::FK_VFPV3XD_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
case ARM::FK_VFPV4:
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPv4A,
@@ -849,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
/* OverwriteExisting= */ false);
break;
+ case ARM::FK_NEON_FP16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+ ARMBuildAttrs::AllowHPFP,
+ /* OverwriteExisting= */ false);
+ break;
+
case ARM::FK_NEON_VFPV4:
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPv4A,
@@ -1345,6 +1382,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
return S;
}
-} // namespace llvm
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 23ef50132900..46ba57170db5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -104,7 +104,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-} // namespace ARM
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 0fb395e473a6..fafe25ae5be5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -370,7 +370,7 @@ public:
}
};
-} // namespace
+}
static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
return new ARMMCInstrAnalysis(Info);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c6f2d1341623..fd30623d79af 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -103,7 +103,7 @@ MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
/// Construct ARM Mach-O relocation info.
MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
-} // namespace llvm
+} // End llvm namespace
// Defines symbolic names for ARM registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 6ac778e0cecd..95d7ea7c04a3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -56,7 +56,7 @@ public:
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
};
-} // namespace
+}
static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
unsigned &Log2Size) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 32481e276b00..173cc93d44fb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -60,7 +60,7 @@ namespace {
EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
}
};
-} // namespace
+}
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
if (RegSave == 0u)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 34b552f7a212..166c04b41a77 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -79,7 +79,7 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
}
-} // namespace
+}
namespace llvm {
MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 6515a650be59..b993b1be4847 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -35,7 +35,7 @@ void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
getAssembler().setIsThumbFunc(Symbol);
}
-} // namespace
+}
MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
MCAsmBackend &MAB,
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index ca98f696b7dd..ed2deeaa24c0 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -71,7 +71,7 @@ namespace {
bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
};
char MLxExpansion::ID = 0;
-} // namespace
+}
void MLxExpansion::clearStack() {
std::fill(LastMIs, LastMIs + 4, nullptr);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index e5e89fad3d71..31d57325ebd6 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -47,6 +47,6 @@ public:
MachineBasicBlock::iterator MI) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 31b4df2e5b0c..f3f493d89237 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -58,6 +58,6 @@ private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI,
Reloc::Model RM) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 7ce602d326cd..68736bc1decd 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -48,7 +48,7 @@ namespace {
bool InsertITInstructions(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
-} // namespace
+}
/// TrackDefUses - Tracking what registers are being defined and used by
/// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index d186dfb2ec91..916ab06ec305 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -73,6 +73,6 @@ private:
ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0dd1b4c15ef8..d9ab824995c1 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -202,7 +202,7 @@ namespace {
std::function<bool(const Function &)> PredicateFtor;
};
char Thumb2SizeReduce::ID = 0;
-} // namespace
+}
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
: MachineFunctionPass(ID), PredicateFtor(Ftor) {
diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index e55f88f53aec..23aaff37f409 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -60,6 +60,6 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index 9d0aa7a98a64..10ec6587550b 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -44,7 +44,7 @@ public:
const char *Modifier = nullptr);
void EmitInstruction(const MachineInstr *MI) override;
};
-} // namespace
+}
void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index a6fe7c98115b..3b9fc443e053 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -37,5 +37,5 @@ public:
MBB.erase(MI);
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index b49de3a27083..d9e654c76428 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -51,7 +51,7 @@ private:
// Complex Pattern for address selection.
bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
};
-} // namespace
+}
// ComplexPattern used on BPF Load/Store instructions
bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 21d160d49946..38c56bbef81e 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -86,7 +86,7 @@ public:
};
int DiagnosticInfoUnsupported::KindID = 0;
-} // namespace
+}
BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
const BPFSubtarget &STI)
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index b56bb39ca85d..ec71dca2faeb 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -85,6 +85,6 @@ private:
return true;
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h
index bd96f76a8075..ac60188804d2 100644
--- a/lib/Target/BPF/BPFInstrInfo.h
+++ b/lib/Target/BPF/BPFInstrInfo.h
@@ -54,6 +54,6 @@ public:
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h
index ba9189792cbb..054e89407db2 100644
--- a/lib/Target/BPF/BPFMCInstLower.h
+++ b/lib/Target/BPF/BPFMCInstLower.h
@@ -38,6 +38,6 @@ public:
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h
index 44977a210959..7072dd0bde1a 100644
--- a/lib/Target/BPF/BPFRegisterInfo.h
+++ b/lib/Target/BPF/BPFRegisterInfo.h
@@ -35,6 +35,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
unsigned getFrameRegister(const MachineFunction &MF) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/BPFSubtarget.h b/lib/Target/BPF/BPFSubtarget.h
index 701ac577dd74..5ad58db75395 100644
--- a/lib/Target/BPF/BPFSubtarget.h
+++ b/lib/Target/BPF/BPFSubtarget.h
@@ -59,6 +59,6 @@ public:
return &InstrInfo.getRegisterInfo();
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 5a888a955e33..06cba2252a25 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -60,7 +60,7 @@ public:
bool addInstSelector() override;
};
-} // namespace
+}
TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
return new BPFPassConfig(this, PM);
diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h
index c715fd5f0089..a0086df2d32c 100644
--- a/lib/Target/BPF/BPFTargetMachine.h
+++ b/lib/Target/BPF/BPFTargetMachine.h
@@ -38,6 +38,6 @@ public:
return TLOF.get();
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index cb074713cce5..adcaff686933 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -37,6 +37,6 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 33aecb7b8ec3..36f99262ed70 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -84,7 +84,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
}
-} // namespace
+}
MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index ef4f05f3d810..05ba6183e322 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -25,7 +25,7 @@ protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
};
-} // namespace
+}
BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 22376543bd05..d63bbf49294e 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -36,6 +36,6 @@ public:
HasDotTypeDotSizeDirective = false;
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index b579afd690e9..dc4ede30f191 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -58,7 +58,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
};
-} // namespace
+}
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 3d2583a11349..e2ae6526edc6 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -49,7 +49,7 @@ MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI, bool IsLittleEndian);
-} // namespace llvm
+}
// Defines symbolic names for BPF registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 9c9c097b4c3d..bc5d7f65b2f6 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1678,9 +1678,8 @@ void CppWriter::printFunctionUses(const Function* F) {
consts.insert(GVar->getInitializer());
} else if (Constant* C = dyn_cast<Constant>(operand)) {
consts.insert(C);
- for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+ for (Value* operand : C->operands()) {
// If the operand references a GVal or Constant, make a note of it
- Value* operand = C->getOperand(j);
printType(operand->getType());
if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
gvs.insert(GV);
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 0cd20daa12fa..ebf0635b12e4 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -37,7 +37,7 @@ public:
extern Target TheCppBackendTarget;
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 837838afc0f2..9cc1e944d359 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -53,7 +53,7 @@ public:
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
-} // namespace
+}
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index b24d24a6d6f2..d360be2aa5b2 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -58,6 +58,6 @@ namespace llvm {
/// \brief Creates a Hexagon-specific Target Transformation Info pass.
ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index f09a5b91fe8b..792fc8b7af3a 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -53,6 +53,6 @@ namespace llvm {
static const char *getRegisterName(unsigned RegNo);
};
-} // namespace llvm
+} // end of llvm namespace
#endif
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index ff1a4fe30757..3753b745657b 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -228,7 +228,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
}
return true;
}
-} // namespace
+}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 33766dfb830c..37ed173a79cd 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -173,7 +173,7 @@ namespace {
bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
bool coalesceSegments(MachineFunction &MF);
};
-} // namespace
+}
char HexagonExpandCondsets::ID = 0;
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 1657d88a4f43..e4c8d8f7b28c 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -333,7 +333,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
return true;
}
-} // namespace
+}
//===----------------------------------------------------------------------===//
// Public Constructor Functions
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 3ea77cdbb1f7..d0c7f9c8960f 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -67,7 +67,7 @@ namespace {
};
char HexagonFixupHwLoops::ID = 0;
-} // namespace
+}
INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
"Hexagon Hardware Loops Fixup", false, false)
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 9797134f41ad..868f87e18413 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -238,7 +238,7 @@ namespace {
return true;
return false;
}
-} // namespace
+}
/// Implements shrink-wrapping of the stack frame. By default, stack frame
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 767e13cbd6a6..89500cb85724 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -99,6 +99,6 @@ private:
bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1a14c88f04fd..6e9e69f5a2c7 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -95,7 +95,7 @@ public:
unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
};
-} // namespace
+}
// Implement calling convention for Hexagon.
static bool
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index b9d18df05b54..b80e8477eb7b 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -86,7 +86,7 @@ bool isPositiveHalfWord(SDNode *N);
OP_END
};
- } // namespace HexagonISD
+ }
class HexagonSubtarget;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 91f508ee5ecf..d0b8a4631c1d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -229,6 +229,6 @@ public:
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 5681ae29831f..76723586c66e 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -80,6 +80,6 @@ public:
void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index fae16e2a0612..60343442e327 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -238,7 +238,7 @@ protected:
#endif
};
-} // namespace llvm
+} // namespace
#endif
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 94ec2e7ca6c1..93dcbe233b25 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -104,7 +104,7 @@ namespace {
private:
void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
};
-} // namespace
+}
char HexagonPeephole::ID = 0;
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index d586c395a9ad..7069ad36e21a 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -48,7 +48,7 @@ namespace {
FunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char HexagonRemoveExtendArgs::ID = 0;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index c72051ca1348..8ac2e43f9294 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -32,6 +32,6 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 61bb7c5139e4..d3eb56f4ba0f 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -156,7 +156,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
return true;
}
-} // namespace
+}
//===----------------------------------------------------------------------===//
// Public Constructor Functions
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 90f1ced5420a..a173a8087832 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -77,7 +77,7 @@ namespace llvm {
FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonNewValueJump();
-} // namespace llvm
+} // end namespace llvm;
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
///
diff --git a/lib/Target/Hexagon/HexagonTargetStreamer.h b/lib/Target/Hexagon/HexagonTargetStreamer.h
index 2b4a3ada506b..e19c404450e6 100644
--- a/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -26,6 +26,6 @@ public:
unsigned ByteAlign,
unsigned AccessGranularity){};
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 66fdd65b3ea7..b91a3f6f8c6c 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -170,7 +170,7 @@ namespace {
void reserveResourcesForConstExt(MachineInstr* MI);
bool isNewValueInst(MachineInstr* MI);
};
-} // namespace
+}
INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
false, false)
@@ -272,9 +272,8 @@ static bool IsIndirectCall(MachineInstr* MI) {
// reservation fail.
void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
+ MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
+ MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
@@ -290,11 +289,10 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
"Should only be called for constant extended instructions");
- MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
+ MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
+ MI->getDebugLoc());
bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
- MF->DeleteMachineInstr(PseudoMI);
+ MF.DeleteMachineInstr(PseudoMI);
return CanReserve;
}
@@ -302,9 +300,8 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
// true, otherwise, return false.
bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
+ MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
+ MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 0f7cf0e7fcbd..da5d4d1da69b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -31,7 +31,7 @@ public:
unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
bool IsPCRel) const override;
};
-} // namespace
+}
HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
: MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 6f8cb90f18f9..9fc4e2aeaba6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -370,7 +370,7 @@ namespace {
return false;
}
}
-} // namespace
+}
unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
const MCOperand &MO,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 0d1f1e607e63..886f8db3bc63 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -174,7 +174,7 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
return HexagonII::HCG_None;
}
-} // namespace
+}
/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
namespace {
@@ -199,7 +199,7 @@ unsigned getCompoundOp(MCInst const &HMCI) {
return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
}
}
-} // namespace
+}
namespace {
MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
@@ -331,7 +331,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
return CompoundInsn;
}
-} // namespace
+}
/// Non-Symmetrical. See if these two instructions are fit for compound pair.
namespace {
@@ -348,7 +348,7 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
(MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
}
-} // namespace
+}
namespace {
bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
@@ -396,7 +396,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
}
return false;
}
-} // namespace
+}
/// tryCompound - Given a bundle check for compound insns when one
/// is found update the contents fo the bundle with the compound insn.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index e69a52de5c77..48b15f85a783 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -461,4 +461,4 @@ void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
MCOperand &Operand = MCI.getOperand(0);
Operand.setImm(Operand.getImm() | outerLoopMask);
}
-} // namespace llvm
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 9f7562a20063..32d61a4a7be5 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -229,7 +229,7 @@ bool subInstWouldBeExtended(MCInst const &potentialDuplex);
// Attempt to find and replace compound pairs
void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
-} // namespace HexagonMCInstrInfo
-} // namespace llvm
+}
+}
#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index 9c0e3f2bbf6e..a21cce1fc240 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -60,6 +60,6 @@ bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &,
SmallVector<DuplexCandidate, 8>);
-} // namespace llvm
+}
#endif // HEXAGONMCSHUFFLER_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 4a4f0c21afa2..83ce0abd835e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -102,7 +102,7 @@ public:
OS << "\n\t}" << PacketBundle.second;
}
};
-} // namespace
+}
namespace {
class HexagonTargetELFStreamer : public HexagonTargetStreamer {
@@ -137,7 +137,7 @@ public:
Symbol, Size, ByteAlignment, AccessSize);
}
};
-} // namespace
+}
static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
@@ -172,9 +172,10 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
return nullptr;
}
-MCTargetStreamer *createMCAsmTargetStreamer(
- MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
- bool IsVerboseAsm) {
+static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool IsVerboseAsm) {
return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *InstPrint);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 89c3eb3cd65e..cb626503313f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -49,7 +49,7 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T,
MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI, StringRef CPU);
-} // namespace llvm
+} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index feaaa4f780d5..41112ac0b46e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -81,6 +81,9 @@ unsigned HexagonResource::setWeight(unsigned s) {
const unsigned MaskWeight = SlotWeight - 1;
bool Key = (1 << s) & getUnits();
+ // TODO: Improve this API so that we can prevent misuse statically.
+ assert(SlotWeight * s < 32 && "Argument to setWeight too large.");
+
// Calculate relative weight of the insn for the given slot, weighing it the
// heavier the more restrictive the insn is and the lowest the slots that the
// insn may be executed in.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 53325f6edb7c..8b6c72ee25e6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -34,8 +34,7 @@ public:
HexagonResource(unsigned s) { setUnits(s); };
void setUnits(unsigned s) {
- Slots = s & ~(-1 << HEXAGON_PACKET_SIZE);
- setWeight(s);
+ Slots = s & ~(~0U << HEXAGON_PACKET_SIZE);
};
unsigned setWeight(unsigned s);
@@ -134,6 +133,6 @@ public:
void setError(unsigned Err) { Error = Err; };
unsigned getError() const { return (Error); };
};
-} // namespace llvm
+}
#endif // HEXAGONSHUFFLER_H
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index ab8232489282..f05d7a465252 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -31,6 +31,7 @@ subdirectories =
PowerPC
Sparc
SystemZ
+ WebAssembly
X86
XCore
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 80565aab180e..70141a998e4a 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -40,6 +40,6 @@ namespace llvm {
void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index 302012e1b148..796f25233123 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -30,7 +30,7 @@ namespace MSP430CC {
COND_INVALID = -1
};
-} // namespace MSP430CC
+}
namespace llvm {
class MSP430TargetMachine;
@@ -42,6 +42,6 @@ namespace llvm {
FunctionPass *createMSP430BranchSelectionPass();
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 2bc11c07f8ff..ffcf22216d4f 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -44,7 +44,7 @@ namespace {
}
};
char MSP430BSel::ID = 0;
-} // namespace
+}
/// createMSP430BranchSelectionPass - returns an instance of the Branch
/// Selection Pass
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index 2f20bbd8ae15..48c4dc866a63 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -49,6 +49,6 @@ public:
RegScavenger *RS = nullptr) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index a60108df360c..5ce5013d898c 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -85,7 +85,7 @@ namespace {
errs() << " JT" << JT << " Align" << Align << '\n';
}
};
-} // namespace
+}
/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
/// instructions for SelectionDAG operations.
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index b09060939ac5..80d3ae175fb1 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -64,7 +64,7 @@ namespace llvm {
/// SHL, SRA, SRL - Non-constant shifts.
SHL, SRA, SRL
};
- } // namespace MSP430ISD
+ }
class MSP430Subtarget;
class MSP430TargetLowering : public TargetLowering {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index c6bad1eadd65..3cf3b1bb8ab2 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -38,7 +38,7 @@ namespace MSP430II {
Size4Bytes = 3 << SizeShift,
Size6Bytes = 4 << SizeShift
};
-} // namespace MSP430II
+}
class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
@@ -87,6 +87,6 @@ public:
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index ebbc6e51286e..ebd639744bcc 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -42,6 +42,6 @@ public:
MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 3d1a245c4fea..fcc5f5b88600 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -49,6 +49,6 @@ public:
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index 95c929372a7f..61a6b19111db 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
~MSP430SelectionDAGInfo();
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 958a5d39487d..81f6f027d45c 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -64,6 +64,6 @@ public:
return &TSInfo;
}
};
-} // namespace llvm
+} // End llvm namespace
#endif // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 5b8d633554b8..f14156dbfa2b 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -113,6 +113,7 @@ class MipsAsmParser : public MCTargetAsmParser {
// nullptr, which indicates that no function is currently
// selected. This usually happens after an '.end func'
// directive.
+ bool IsLittleEndian;
// Print a warning along with its fix-it message at the given range.
void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
@@ -214,11 +215,17 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandCondBranches(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ bool expandUlhu(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
+ bool expandUlw(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg,
- SmallVectorImpl<MCInst> &Instructions);
+ bool Is64Bit, SmallVectorImpl<MCInst> &Instructions);
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -251,6 +258,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetMips16Directive();
bool parseSetNoMips16Directive();
bool parseSetFpDirective();
+ bool parseSetOddSPRegDirective();
+ bool parseSetNoOddSPRegDirective();
bool parseSetPopDirective();
bool parseSetPushDirective();
bool parseSetSoftFloatDirective();
@@ -352,6 +361,16 @@ class MipsAsmParser : public MCTargetAsmParser {
}
}
+ void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
+ setFeatureBits(Feature, FeatureString);
+ AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+ }
+
+ void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
+ clearFeatureBits(Feature, FeatureString);
+ AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+ }
+
public:
enum MipsMatchResultTy {
Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
@@ -387,6 +406,13 @@ public:
report_fatal_error("-mno-odd-spreg requires the O32 ABI");
CurrentFn = nullptr;
+
+ Triple TheTriple(sti.getTargetTriple());
+ if ((TheTriple.getArch() == Triple::mips) ||
+ (TheTriple.getArch() == Triple::mips64))
+ IsLittleEndian = false;
+ else
+ IsLittleEndian = true;
}
/// True if all of $fcc0 - $fcc7 exist for the current ISA.
@@ -462,6 +488,8 @@ public:
void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc);
void warnIfNoMacro(SMLoc Loc);
+
+ bool isLittle() const { return IsLittleEndian; }
};
}
@@ -486,11 +514,11 @@ public:
RegKind_CCR = 128, /// CCR
RegKind_HWRegs = 256, /// HWRegs
RegKind_COP3 = 512, /// COP3
-
+ RegKind_COP0 = 1024, /// COP0
/// Potentially any (e.g. $1)
RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCC | RegKind_MSA128 |
RegKind_MSACtrl | RegKind_COP2 | RegKind_ACC |
- RegKind_CCR | RegKind_HWRegs | RegKind_COP3
+ RegKind_CCR | RegKind_HWRegs | RegKind_COP3 | RegKind_COP0
};
private:
@@ -652,6 +680,14 @@ private:
return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
}
+ /// Coerce the register to COP0 and return the real register for the
+ /// current target.
+ unsigned getCOP0Reg() const {
+ assert(isRegIdx() && (RegIdx.Kind & RegKind_COP0) && "Invalid access!");
+ unsigned ClassID = Mips::COP0RegClassID;
+ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
+ }
+
/// Coerce the register to COP2 and return the real register for the
/// current target.
unsigned getCOP2Reg() const {
@@ -793,6 +829,11 @@ public:
Inst.addOperand(MCOperand::createReg(getMSACtrlReg()));
}
+ void addCOP0AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getCOP0Reg()));
+ }
+
void addCOP2AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getCOP2Reg()));
@@ -1168,6 +1209,9 @@ public:
bool isACCAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3;
}
+ bool isCOP0AsmReg() const {
+ return isRegIdx() && RegIdx.Kind & RegKind_COP0 && RegIdx.Index <= 31;
+ }
bool isCOP2AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31;
}
@@ -1635,6 +1679,8 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
case Mips::BLEU:
case Mips::BGEU:
case Mips::BGTU:
+ case Mips::Ulhu:
+ case Mips::Ulw:
return true;
default:
return false;
@@ -1673,6 +1719,10 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
case Mips::BGEU:
case Mips::BGTU:
return expandCondBranches(Inst, IDLoc, Instructions);
+ case Mips::Ulhu:
+ return expandUlhu(Inst, IDLoc, Instructions);
+ case Mips::Ulw:
+ return expandUlw(Inst, IDLoc, Instructions);
}
}
@@ -1774,6 +1824,16 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
MCInst tmpInst;
+ unsigned TmpReg = DstReg;
+ if (UseSrcReg && (DstReg == SrcReg)) {
+ // At this point we need AT to perform the expansions and we exit if it is
+ // not available.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ TmpReg = ATReg;
+ }
+
tmpInst.setLoc(IDLoc);
// FIXME: gas has a special case for values that are 000...1111, which
// becomes a li -1 and then a dsrl
@@ -1810,23 +1870,23 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
// For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
// upper 32 bits.
tmpInst.setOpcode(Mips::ORi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(TmpReg));
tmpInst.addOperand(MCOperand::createReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::createImm(Bits31To16));
tmpInst.setLoc(IDLoc);
Instructions.push_back(tmpInst);
// Move the value to the upper 16 bits by doing a 16-bit left shift.
- createLShiftOri<16>(0, DstReg, IDLoc, Instructions);
+ createLShiftOri<16>(0, TmpReg, IDLoc, Instructions);
} else {
tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(TmpReg));
tmpInst.addOperand(MCOperand::createImm(Bits31To16));
Instructions.push_back(tmpInst);
}
- createLShiftOri<0>(Bits15To0, DstReg, IDLoc, Instructions);
+ createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
if (UseSrcReg)
- createAddu(DstReg, DstReg, SrcReg, Instructions);
+ createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
} else if ((ImmValue & (0xffffLL << 48)) == 0) {
if (Is32BitImm) {
@@ -1853,14 +1913,14 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
uint16_t Bits15To0 = ImmValue & 0xffff;
tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(TmpReg));
tmpInst.addOperand(MCOperand::createImm(Bits47To32));
Instructions.push_back(tmpInst);
- createLShiftOri<0>(Bits31To16, DstReg, IDLoc, Instructions);
- createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions);
+ createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
+ createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
if (UseSrcReg)
- createAddu(DstReg, DstReg, SrcReg, Instructions);
+ createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
} else {
if (Is32BitImm) {
@@ -1889,22 +1949,22 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
uint16_t Bits15To0 = ImmValue & 0xffff;
tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(TmpReg));
tmpInst.addOperand(MCOperand::createImm(Bits63To48));
Instructions.push_back(tmpInst);
- createLShiftOri<0>(Bits47To32, DstReg, IDLoc, Instructions);
+ createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
// When Bits31To16 is 0, do a left shift of 32 bits instead of doing
// two left shifts of 16 bits.
if (Bits31To16 == 0) {
- createLShiftOri<32>(Bits15To0, DstReg, IDLoc, Instructions);
+ createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions);
} else {
- createLShiftOri<16>(Bits31To16, DstReg, IDLoc, Instructions);
- createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions);
+ createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions);
+ createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
}
if (UseSrcReg)
- createAddu(DstReg, DstReg, SrcReg, Instructions);
+ createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
}
return false;
}
@@ -1991,6 +2051,18 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
&Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+ bool UseSrcReg = SrcReg != Mips::NoRegister;
+
+ unsigned TmpReg = DstReg;
+ if (UseSrcReg && (DstReg == SrcReg)) {
+ // At this point we need AT to perform the expansions and we exit if it is
+ // not available.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ TmpReg = ATReg;
+ }
+
if (!Is32BitSym) {
// If it's a 64-bit architecture, expand to:
// la d,sym => lui d,highest(sym)
@@ -2005,31 +2077,31 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
&Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(TmpReg));
tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
Instructions.push_back(tmpInst);
- createLShiftOri<0>(MCOperand::createExpr(HigherExpr), DstReg, SMLoc(),
+ createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(),
Instructions);
- createLShiftOri<16>(MCOperand::createExpr(HiExpr), DstReg, SMLoc(),
+ createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(),
Instructions);
- createLShiftOri<16>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(),
+ createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
Instructions);
} else {
// Otherwise, expand to:
// la d,sym => lui d,hi16(sym)
// ori d,d,lo16(sym)
tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(TmpReg));
tmpInst.addOperand(MCOperand::createExpr(HiExpr));
Instructions.push_back(tmpInst);
- createLShiftOri<0>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(),
+ createLShiftOri<0>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
Instructions);
}
- if (SrcReg != Mips::NoRegister)
- createAddu(DstReg, DstReg, SrcReg, Instructions);
+ if (UseSrcReg)
+ createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions);
return false;
}
@@ -2449,6 +2521,174 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
return false;
}
+bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ if (hasMips32r6() || hasMips64r6()) {
+ Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+ return false;
+ }
+
+ warnIfNoMacro(IDLoc);
+
+ const MCOperand &DstRegOp = Inst.getOperand(0);
+ assert(DstRegOp.isReg() && "expected register operand kind");
+
+ const MCOperand &SrcRegOp = Inst.getOperand(1);
+ assert(SrcRegOp.isReg() && "expected register operand kind");
+
+ const MCOperand &OffsetImmOp = Inst.getOperand(2);
+ assert(OffsetImmOp.isImm() && "expected immediate operand kind");
+
+ unsigned DstReg = DstRegOp.getReg();
+ unsigned SrcReg = SrcRegOp.getReg();
+ int64_t OffsetValue = OffsetImmOp.getImm();
+
+ // NOTE: We always need AT for ULHU, as it is always used as the source
+ // register for one of the LBu's.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ // When the value of offset+1 does not fit in 16 bits, we have to load the
+ // offset in AT, (D)ADDu the original source register (if there was one), and
+ // then use AT as the source register for the 2 generated LBu's.
+ bool LoadedOffsetInAT = false;
+ if (!isInt<16>(OffsetValue + 1) || !isInt<16>(OffsetValue)) {
+ LoadedOffsetInAT = true;
+
+ if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
+ IDLoc, Instructions))
+ return true;
+
+ // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
+ // because it will make our output more similar to GAS'. For example,
+ // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9",
+ // instead of just an "ori $1, $9, 32768".
+ // NOTE: If there is no source register specified in the ULHU, the parser
+ // will interpret it as $0.
+ if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64)
+ createAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), Instructions);
+ }
+
+ unsigned FirstLbuDstReg = LoadedOffsetInAT ? DstReg : ATReg;
+ unsigned SecondLbuDstReg = LoadedOffsetInAT ? ATReg : DstReg;
+ unsigned LbuSrcReg = LoadedOffsetInAT ? ATReg : SrcReg;
+
+ int64_t FirstLbuOffset = 0, SecondLbuOffset = 0;
+ if (isLittle()) {
+ FirstLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1);
+ SecondLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+ } else {
+ FirstLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+ SecondLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1);
+ }
+
+ unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Mips::LBu);
+ TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg));
+ TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
+ TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset));
+ Instructions.push_back(TmpInst);
+
+ TmpInst.clear();
+ TmpInst.setOpcode(Mips::LBu);
+ TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg));
+ TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
+ TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset));
+ Instructions.push_back(TmpInst);
+
+ TmpInst.clear();
+ TmpInst.setOpcode(Mips::SLL);
+ TmpInst.addOperand(MCOperand::createReg(SllReg));
+ TmpInst.addOperand(MCOperand::createReg(SllReg));
+ TmpInst.addOperand(MCOperand::createImm(8));
+ Instructions.push_back(TmpInst);
+
+ TmpInst.clear();
+ TmpInst.setOpcode(Mips::OR);
+ TmpInst.addOperand(MCOperand::createReg(DstReg));
+ TmpInst.addOperand(MCOperand::createReg(DstReg));
+ TmpInst.addOperand(MCOperand::createReg(ATReg));
+ Instructions.push_back(TmpInst);
+
+ return false;
+}
+
+bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ if (hasMips32r6() || hasMips64r6()) {
+ Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+ return false;
+ }
+
+ const MCOperand &DstRegOp = Inst.getOperand(0);
+ assert(DstRegOp.isReg() && "expected register operand kind");
+
+ const MCOperand &SrcRegOp = Inst.getOperand(1);
+ assert(SrcRegOp.isReg() && "expected register operand kind");
+
+ const MCOperand &OffsetImmOp = Inst.getOperand(2);
+ assert(OffsetImmOp.isImm() && "expected immediate operand kind");
+
+ unsigned SrcReg = SrcRegOp.getReg();
+ int64_t OffsetValue = OffsetImmOp.getImm();
+ unsigned ATReg = 0;
+
+ // When the value of offset+3 does not fit in 16 bits, we have to load the
+ // offset in AT, (D)ADDu the original source register (if there was one), and
+ // then use AT as the source register for the generated LWL and LWR.
+ bool LoadedOffsetInAT = false;
+ if (!isInt<16>(OffsetValue + 3) || !isInt<16>(OffsetValue)) {
+ ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ LoadedOffsetInAT = true;
+
+ warnIfNoMacro(IDLoc);
+
+ if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
+ IDLoc, Instructions))
+ return true;
+
+ // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
+ // because it will make our output more similar to GAS'. For example,
+ // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9",
+ // instead of just an "ori $1, $9, 32768".
+ // NOTE: If there is no source register specified in the ULW, the parser
+ // will interpret it as $0.
+ if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64)
+ createAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), Instructions);
+ }
+
+ unsigned FinalSrcReg = LoadedOffsetInAT ? ATReg : SrcReg;
+ int64_t LeftLoadOffset = 0, RightLoadOffset = 0;
+ if (isLittle()) {
+ LeftLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
+ RightLoadOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+ } else {
+ LeftLoadOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+ RightLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
+ }
+
+ MCInst LeftLoadInst;
+ LeftLoadInst.setOpcode(Mips::LWL);
+ LeftLoadInst.addOperand(DstRegOp);
+ LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
+ LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset));
+ Instructions.push_back(LeftLoadInst);
+
+ MCInst RightLoadInst;
+ RightLoadInst.setOpcode(Mips::LWR);
+ RightLoadInst.addOperand(DstRegOp);
+ RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
+ RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset ));
+ Instructions.push_back(RightLoadInst);
+
+ return false;
+}
+
void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst NopInst;
@@ -2466,10 +2706,10 @@ void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
}
void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
- unsigned TrgReg,
+ unsigned TrgReg, bool Is64Bit,
SmallVectorImpl<MCInst> &Instructions) {
MCInst AdduInst;
- AdduInst.setOpcode(Mips::ADDu);
+ AdduInst.setOpcode(Is64Bit ? Mips::DADDu : Mips::ADDu);
AdduInst.addOperand(MCOperand::createReg(DstReg));
AdduInst.addOperand(MCOperand::createReg(SrcReg));
AdduInst.addOperand(MCOperand::createReg(TrgReg));
@@ -2972,9 +3212,12 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
MCAsmParser &Parser = getParser();
SMLoc S;
bool Result = true;
+ unsigned NumOfLParen = 0;
- while (getLexer().getKind() == AsmToken::LParen)
+ while (getLexer().getKind() == AsmToken::LParen) {
Parser.Lex();
+ ++NumOfLParen;
+ }
switch (getLexer().getKind()) {
default:
@@ -2985,7 +3228,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
case AsmToken::Minus:
case AsmToken::Plus:
if (isParenExpr)
- Result = getParser().parseParenExpression(Res, S);
+ Result = getParser().parseParenExprOfDepth(NumOfLParen, Res, S);
else
Result = (getParser().parseExpression(Res));
while (getLexer().getKind() == AsmToken::RParen)
@@ -3867,6 +4110,34 @@ bool MipsAsmParser::parseSetFpDirective() {
return false;
}
+bool MipsAsmParser::parseSetOddSPRegDirective() {
+ MCAsmParser &Parser = getParser();
+
+ Parser.Lex(); // Eat "oddspreg".
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ getTargetStreamer().emitDirectiveSetOddSPReg();
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoOddSPRegDirective() {
+ MCAsmParser &Parser = getParser();
+
+ Parser.Lex(); // Eat "nooddspreg".
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ getTargetStreamer().emitDirectiveSetNoOddSPReg();
+ return false;
+}
+
bool MipsAsmParser::parseSetPopDirective() {
MCAsmParser &Parser = getParser();
SMLoc Loc = getLexer().getLoc();
@@ -4229,6 +4500,10 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetArchDirective();
} else if (Tok.getString() == "fp") {
return parseSetFpDirective();
+ } else if (Tok.getString() == "oddspreg") {
+ return parseSetOddSPRegDirective();
+ } else if (Tok.getString() == "nooddspreg") {
+ return parseSetNoOddSPRegDirective();
} else if (Tok.getString() == "pop") {
return parseSetPopDirective();
} else if (Tok.getString() == "push") {
@@ -4428,6 +4703,8 @@ bool MipsAsmParser::parseInsnDirective() {
/// ::= .module oddspreg
/// ::= .module nooddspreg
/// ::= .module fp=value
+/// ::= .module softfloat
+/// ::= .module hardfloat
bool MipsAsmParser::parseDirectiveModule() {
MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
@@ -4446,8 +4723,16 @@ bool MipsAsmParser::parseDirectiveModule() {
}
if (Option == "oddspreg") {
- getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
- clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ clearModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ // Synchronize the abiflags information with the FeatureBits information we
+ // changed above.
+ getTargetStreamer().updateABIInfo(*this);
+
+ // If printing assembly, use the recently updated abiflags information.
+ // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+ // emitted at the end).
+ getTargetStreamer().emitDirectiveModuleOddSPReg();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4462,8 +4747,16 @@ bool MipsAsmParser::parseDirectiveModule() {
return false;
}
- getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
- setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ setModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ // Synchronize the abiflags information with the FeatureBits information we
+ // changed above.
+ getTargetStreamer().updateABIInfo(*this);
+
+ // If printing assembly, use the recently updated abiflags information.
+ // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+ // emitted at the end).
+ getTargetStreamer().emitDirectiveModuleOddSPReg();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4474,6 +4767,44 @@ bool MipsAsmParser::parseDirectiveModule() {
return false; // parseDirectiveModule has finished successfully.
} else if (Option == "fp") {
return parseDirectiveModuleFP();
+ } else if (Option == "softfloat") {
+ setModuleFeatureBits(Mips::FeatureSoftFloat, "soft-float");
+
+ // Synchronize the ABI Flags information with the FeatureBits information we
+ // updated above.
+ getTargetStreamer().updateABIInfo(*this);
+
+ // If printing assembly, use the recently updated ABI Flags information.
+ // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+ // emitted later).
+ getTargetStreamer().emitDirectiveModuleSoftFloat();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ return false; // parseDirectiveModule has finished successfully.
+ } else if (Option == "hardfloat") {
+ clearModuleFeatureBits(Mips::FeatureSoftFloat, "soft-float");
+
+ // Synchronize the ABI Flags information with the FeatureBits information we
+ // updated above.
+ getTargetStreamer().updateABIInfo(*this);
+
+ // If printing assembly, use the recently updated ABI Flags information.
+ // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+ // emitted later).
+ getTargetStreamer().emitDirectiveModuleHardFloat();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ return false; // parseDirectiveModule has finished successfully.
} else {
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
}
@@ -4502,8 +4833,15 @@ bool MipsAsmParser::parseDirectiveModuleFP() {
return false;
}
- // Emit appropriate flags.
- getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32());
+ // Synchronize the abiflags information with the FeatureBits information we
+ // changed above.
+ getTargetStreamer().updateABIInfo(*this);
+
+ // If printing assembly, use the recently updated abiflags information.
+ // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+ // emitted at the end).
+ getTargetStreamer().emitDirectiveModuleFP();
+
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
@@ -4512,6 +4850,7 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
StringRef Directive) {
MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
+ bool ModuleLevelOptions = Directive == ".module";
if (Lexer.is(AsmToken::Identifier)) {
StringRef Value = Parser.getTok().getString();
@@ -4528,6 +4867,13 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
}
FpABI = MipsABIFlagsSection::FpABIKind::XX;
+ if (ModuleLevelOptions) {
+ setModuleFeatureBits(Mips::FeatureFPXX, "fpxx");
+ clearModuleFeatureBits(Mips::FeatureFP64Bit, "fp64");
+ } else {
+ setFeatureBits(Mips::FeatureFPXX, "fpxx");
+ clearFeatureBits(Mips::FeatureFP64Bit, "fp64");
+ }
return true;
}
@@ -4547,8 +4893,23 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
}
FpABI = MipsABIFlagsSection::FpABIKind::S32;
- } else
+ if (ModuleLevelOptions) {
+ clearModuleFeatureBits(Mips::FeatureFPXX, "fpxx");
+ clearModuleFeatureBits(Mips::FeatureFP64Bit, "fp64");
+ } else {
+ clearFeatureBits(Mips::FeatureFPXX, "fpxx");
+ clearFeatureBits(Mips::FeatureFP64Bit, "fp64");
+ }
+ } else {
FpABI = MipsABIFlagsSection::FpABIKind::S64;
+ if (ModuleLevelOptions) {
+ clearModuleFeatureBits(Mips::FeatureFPXX, "fpxx");
+ setModuleFeatureBits(Mips::FeatureFP64Bit, "fp64");
+ } else {
+ clearFeatureBits(Mips::FeatureFPXX, "fpxx");
+ setFeatureBits(Mips::FeatureFP64Bit, "fp64");
+ }
+ }
return true;
}
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index c8629b5d7bd2..a34ba3bd0ee8 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -178,6 +178,11 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -1564,6 +1569,18 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::COP0RegClassID, RegNo);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -1855,6 +1872,6 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::createImm(SignExtend32<23>(Insn) << 2));
+ Inst.addOperand(MCOperand::createImm(SignExtend32<25>(Insn << 2)));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 725ea7f971eb..70b9cca8cf6e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -66,4 +66,4 @@ MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
return OS;
}
-} // namespace llvm
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index bf306ee4814b..b078cd30a87b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -186,6 +186,6 @@ public:
};
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index aa965e82a6bf..40c5681acc17 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -73,6 +73,6 @@ public:
unsigned GetEhDataReg(unsigned I) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 5c746b2894b2..328e71720cac 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -59,10 +59,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case Mips::fixup_MIPS_PCLO16:
break;
case Mips::fixup_Mips_PC16:
- // So far we are only using this type for branches.
- // For branches we start 1 instruction after the branch
- // so the displacement will be one instruction size less.
- Value -= 4;
// The displacement is then divided by 4 to give us an 18 bit
// address range. Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
@@ -135,7 +131,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup");
break;
case Mips::fixup_MIPS_PC21_S2:
- Value -= 4;
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 21-bit signed immediate.
@@ -143,7 +138,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup");
break;
case Mips::fixup_MIPS_PC26_S2:
- Value -= 4;
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 26-bit signed immediate.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index fe84e4021d34..b3d5a4964f86 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -87,6 +87,6 @@ public:
}; // class MipsAsmBackend
-} // namespace llvm
+} // namespace
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index a7d5a1e75e41..ff7779ec1e78 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -119,7 +119,7 @@ namespace MipsII {
FormMask = 15
};
-} // namespace MipsII
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index a45e2ad8cf16..9b2952720edd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -51,7 +51,7 @@ struct MipsRelocationEntry {
virtual void sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) override;
};
-} // namespace
+}
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 93925bf8ca03..e36263d54ca4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -226,8 +226,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
assert(MO.isExpr() &&
"getBranchTargetOpValue expects only expressions or immediates");
- const MCExpr *Expr = MO.getExpr();
- Fixups.push_back(MCFixup::create(0, Expr,
+ const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+ MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+ Fixups.push_back(MCFixup::create(0, FixupExpression,
MCFixupKind(Mips::fixup_Mips_PC16)));
return 0;
}
@@ -315,8 +316,9 @@ getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
assert(MO.isExpr() &&
"getBranchTarget21OpValue expects only expressions or immediates");
- const MCExpr *Expr = MO.getExpr();
- Fixups.push_back(MCFixup::create(0, Expr,
+ const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+ MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+ Fixups.push_back(MCFixup::create(0, FixupExpression,
MCFixupKind(Mips::fixup_MIPS_PC21_S2)));
return 0;
}
@@ -337,8 +339,9 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
assert(MO.isExpr() &&
"getBranchTarget26OpValue expects only expressions or immediates");
- const MCExpr *Expr = MO.getExpr();
- Fixups.push_back(MCFixup::create(0, Expr,
+ const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+ MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+ Fixups.push_back(MCFixup::create(0, FixupExpression,
MCFixupKind(Mips::fixup_MIPS_PC26_S2)));
return 0;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 81a0a987bc4e..687b800c2409 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -25,6 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 20358a0f9cf2..4069d7d184ed 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -62,7 +62,7 @@ namespace MIPS_MC {
StringRef selectMipsCPU(const Triple &TT, StringRef CPU);
}
-} // namespace llvm
+} // End llvm namespace
// Defines symbolic names for Mips registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 537867503eda..aef9bd3a8e2a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -265,4 +265,4 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
return S;
}
-} // namespace llvm
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 49116326139c..24b602810d6e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -79,6 +79,9 @@ void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg,
if (GPR32RegClass->contains(CurrentSubReg) ||
GPR64RegClass->contains(CurrentSubReg))
ri_gprmask |= Value;
+ else if (COP0RegClass->contains(CurrentSubReg))
+ ri_cprmask[0] |= Value;
+ // MIPS COP1 is the FPU.
else if (FGR32RegClass->contains(CurrentSubReg) ||
FGR64RegClass->contains(CurrentSubReg) ||
AFGR64RegClass->contains(CurrentSubReg) ||
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index a051f4c123fc..e4da2df75d47 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -92,15 +92,23 @@ void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) {
}
-void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
- bool IsO32ABI) {
- if (!Enabled && !IsO32ABI)
+
+void MipsTargetStreamer::emitDirectiveModuleFP() {}
+
+void MipsTargetStreamer::emitDirectiveModuleOddSPReg() {
+ if (!ABIFlagsSection.OddSPReg && !ABIFlagsSection.Is32BitABI)
report_fatal_error("+nooddspreg is only valid for O32");
}
+void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {}
+void MipsTargetStreamer::emitDirectiveModuleHardFloat() {}
void MipsTargetStreamer::emitDirectiveSetFp(
MipsABIFlagsSection::FpABIKind Value) {
forbidModuleDirective();
}
+void MipsTargetStreamer::emitDirectiveSetOddSPReg() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoOddSPReg() {
+ forbidModuleDirective();
+}
MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
@@ -369,12 +377,9 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
forbidModuleDirective();
}
-void MipsTargetAsmStreamer::emitDirectiveModuleFP(
- MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) {
- MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI);
-
+void MipsTargetAsmStreamer::emitDirectiveModuleFP() {
OS << "\t.module\tfp=";
- OS << ABIFlagsSection.getFpABIString(Value) << "\n";
+ OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n";
}
void MipsTargetAsmStreamer::emitDirectiveSetFp(
@@ -385,11 +390,28 @@ void MipsTargetAsmStreamer::emitDirectiveSetFp(
OS << ABIFlagsSection.getFpABIString(Value) << "\n";
}
-void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
- bool IsO32ABI) {
- MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
+void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg() {
+ MipsTargetStreamer::emitDirectiveModuleOddSPReg();
+
+ OS << "\t.module\t" << (ABIFlagsSection.OddSPReg ? "" : "no") << "oddspreg\n";
+}
- OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n";
+void MipsTargetAsmStreamer::emitDirectiveSetOddSPReg() {
+ MipsTargetStreamer::emitDirectiveSetOddSPReg();
+ OS << "\t.set\toddspreg\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoOddSPReg() {
+ MipsTargetStreamer::emitDirectiveSetNoOddSPReg();
+ OS << "\t.set\tnooddspreg\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleSoftFloat() {
+ OS << "\t.module\tsoftfloat\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() {
+ OS << "\t.module\thardfloat\n";
}
// This part is for ELF object output.
@@ -800,10 +822,3 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() {
OS << ABIFlagsSection;
}
-
-void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
- bool IsO32ABI) {
- MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
-
- ABIFlagsSection.OddSPReg = Enabled;
-}
diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 78ba76d27cbb..187a022b2563 100644
--- a/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -240,3 +240,50 @@ class ERETNC_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
let Inst{15-6} = 0x3cd;
let Inst{5-0} = 0x3c;
}
+
+class BREAK_MMR6_ENC<string instr_asm> : MMR6Arch<instr_asm> {
+ bits<10> code_1;
+ bits<10> code_2;
+ bits<32> Inst;
+ let Inst{31-26} = 0x0;
+ let Inst{25-16} = code_1;
+ let Inst{15-6} = code_2;
+ let Inst{5-0} = 0x07;
+}
+
+class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-21} = 0x0;
+ let Inst{20-16} = 0x0;
+ let Inst{15-11} = op;
+ let Inst{10-6} = 0x0;
+ let Inst{5-0} = 0x0;
+}
+
+class EIDI_MMR6_ENC<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> {
+ bits<32> Inst;
+ bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = 0x00;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<instr_asm> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> shamt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = shamt;
+ let Inst{10} = rotate;
+ let Inst{9-0} = funct;
+}
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index ed71c3d9b5f6..53bde1379e29 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -29,6 +29,7 @@ class AUI_MMR6_ENC : AUI_FM_MMR6;
class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>;
class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>;
class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
+class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>;
class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011111>;
class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b111000>;
@@ -40,6 +41,8 @@ class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>;
class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
+class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>;
+class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>;
class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">;
class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">;
class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
@@ -60,6 +63,7 @@ class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>;
class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>;
class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>;
class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>;
+class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>;
class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>;
class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>;
class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
@@ -144,6 +148,8 @@ class BITSWAP_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
+class BRK_MMR6_DESC : BRK_FT<"break">;
+
class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
dag OutOperandList = (outs);
@@ -166,6 +172,9 @@ class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd>;
class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
+class EHB_MMR6_DESC : Barrier<"ehb">;
+class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>;
+
class ERET_MMR6_DESC : ER_FT<"eret">;
class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
@@ -255,6 +264,7 @@ class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
+class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>;
class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>;
class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>;
class MOD_MMR6_DESC : ArithLogicR<"mod", GPR32Opnd>;
@@ -302,11 +312,14 @@ def BLTZALC_MMR6 : R6MMR6Rel, BLTZALC_MMR6_ENC, BLTZALC_MMR6_DESC,
ISA_MICROMIPS32R6;
def BNEZALC_MMR6 : R6MMR6Rel, BNEZALC_MMR6_ENC, BNEZALC_MMR6_DESC,
ISA_MICROMIPS32R6;
+def BREAK_MMR6 : StdMMR6Rel, BRK_MMR6_DESC, BRK_MMR6_ENC, ISA_MICROMIPS32R6;
def CACHE_MMR6 : R6MMR6Rel, CACHE_MMR6_ENC, CACHE_MMR6_DESC, ISA_MICROMIPS32R6;
def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6;
def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6;
def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
+def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6;
+def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6;
def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC,
ISA_MICROMIPS32R6;
@@ -330,8 +343,18 @@ def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC,
ISA_MICROMIPS32R6;
def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC,
ISA_MICROMIPS32R6;
+def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6;
def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6;
def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6;
def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
}
+
+//===----------------------------------------------------------------------===//
+//
+// MicroMips instruction aliases
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 2aab739dbd2b..39393840c6f2 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -934,3 +934,7 @@ class UncondBranchMMPseudo<string opstr> :
def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
def : MipsInstAlias<"nop", (MOVE16_MM ZERO, ZERO), 1>;
}
+
+let Predicates = [InMicroMips] in {
+def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2;
+}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 604b6704c033..671d7a87cc3d 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -31,6 +31,6 @@ namespace llvm {
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 2c33cfb96530..f281c927c1c4 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -42,6 +42,6 @@ public:
RegScavenger *RS) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index f2831fd5d0f6..893fc7cdf473 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -62,7 +62,7 @@ namespace {
};
char Mips16HardFloat::ID = 0;
-} // namespace
+}
//
// Return types that matter for hard float are:
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp
index bf82108728de..2eb6e5ddd2d9 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.cpp
+++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp
@@ -46,5 +46,5 @@ extern FuncSignature const *findFuncSignature(const char *name) {
}
return nullptr;
}
-} // namespace Mips16HardFloatInfo
-} // namespace llvm
+}
+}
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h
index 8354c33d33bc..7295c287576d 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -44,7 +44,7 @@ struct FuncNameSignature {
extern const FuncNameSignature PredefinedFuncs[];
extern FuncSignature const *findFuncSignature(const char *name);
-} // namespace Mips16HardFloatInfo
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index ce6b3f8486a9..ae0e61e19d9d 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -48,6 +48,6 @@ private:
FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index c52ef2a4e195..846e3c964f44 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -54,7 +54,7 @@ struct Mips16IntrinsicHelperType{
return std::strcmp(Name, RHS.Name) == 0;
}
};
-} // namespace
+}
// Libcalls for which no helper is generated. Sorted by name for binary search.
static const Mips16Libcall HardFloatLibCalls[] = {
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 99d3cacca67a..d3b9f750f347 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -77,6 +77,6 @@ namespace llvm {
unsigned SltiOpc, unsigned SltiXOpc,
MachineInstr *MI, MachineBasicBlock *BB )const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 1132d8a0318d..6540b40bc9ab 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -123,6 +123,6 @@ private:
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 83781ff24ac5..c37cf95cadc3 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -427,10 +427,10 @@ def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>;
/// Move between CPU and coprocessor registers
let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
-def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
-def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
-def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
-def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
+def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd, COP0Opnd>, MFC3OP_FM<0x10, 1>, ISA_MIPS3;
+def DMTC0 : MTC3OP<"dmtc0", COP0Opnd, GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
+def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd, COP2Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
+def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
}
//===----------------------------------------------------------------------===//
@@ -613,10 +613,10 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
ISA_MIPS3;
// Two operand (implicit 0 selector) versions:
-def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, COP0Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
+def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, COP2Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 COP2Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
let Predicates = [HasMips64, HasCnMips] in {
def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 6b5d02b7a7e0..ae3c38ced80b 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -58,6 +58,6 @@ namespace llvm {
unsigned ADDiu, ORi, SLL, LUi;
InstSeq Insts;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1c80021086bd..fdba064b5c5e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -747,8 +747,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// accept it. We therefore emit it when it contradicts the default or an
// option has changed the default (i.e. FPXX) and omit it otherwise.
if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX()))
- getTargetStreamer().emitDirectiveModuleOddSPReg(STI.useOddSPReg(),
- ABI.IsO32());
+ getTargetStreamer().emitDirectiveModuleOddSPReg();
}
void MipsAsmPrinter::emitInlineAsmStart() const {
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 3c2b843b8963..a7f3304a3da8 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -145,7 +145,7 @@ public:
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 04a9ef5ef051..081c393a09be 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -131,6 +131,6 @@ public:
bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 3d020abe2704..c2651b82d285 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -143,7 +144,7 @@ private:
unsigned materializeGV(const GlobalValue *GV, MVT VT);
unsigned materializeInt(const Constant *C, MVT VT);
unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
- unsigned materializeExternalCallSym(const char *SynName);
+ unsigned materializeExternalCallSym(MCSymbol *Syn);
MachineInstrBuilder emitInst(unsigned Opc) {
return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
@@ -369,12 +370,12 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
return DestReg;
}
-unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) {
+unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
unsigned DestReg = createResultReg(RC);
emitInst(Mips::LW, DestReg)
.addReg(MFI->getGlobalBaseReg())
- .addExternalSymbol(SymName, MipsII::MO_GOT);
+ .addSym(Sym, MipsII::MO_GOT);
return DestReg;
}
@@ -1234,7 +1235,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- const char *SymName = CLI.SymName;
+ MCSymbol *Symbol = CLI.Symbol;
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
@@ -1286,8 +1287,8 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue the call.
unsigned DestAddress;
- if (SymName)
- DestAddress = materializeExternalCallSym(SymName);
+ if (Symbol)
+ DestAddress = materializeExternalCallSym(Symbol);
else
DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress);
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index dab9c055df6f..5eabd58e8686 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -49,6 +49,6 @@ protected:
const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 83be74f0d466..1426d0fbf516 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -129,6 +129,6 @@ private:
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index e4f3cde0c804..bc9a1ce64097 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -204,7 +204,7 @@ namespace llvm {
SDL,
SDR
};
- } // namespace MipsISD
+ }
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
@@ -566,6 +566,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 3daff5fa5d36..08efc3509046 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -146,6 +146,6 @@ private:
const MipsInstrInfo *createMips16InstrInfo(const MipsSubtarget &STI);
const MipsInstrInfo *createMipsSEInstrInfo(const MipsSubtarget &STI);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 2a7949eb15eb..ab98c9054e74 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1050,8 +1050,12 @@ class SCBase<string opstr, RegisterOperand RO> :
let Constraints = "$rt = $dst";
}
-class MFC3OP<string asmstr, RegisterOperand RO> :
- InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins),
+class MFC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD> :
+ InstSE<(outs RO:$rt), (ins RD:$rd, uimm16:$sel),
+ !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>;
+
+class MTC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD> :
+ InstSE<(outs RO:$rd), (ins RD:$rt, uimm16:$sel),
!strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>;
class TrapBase<Instruction RealInst>
@@ -1278,7 +1282,9 @@ def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>,
def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
ISA_MIPS2_NOT_32R6_64R6;
-def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
+let AdditionalPredicates = [NotInMicroMips] in {
+def BREAK : MMRel, StdMMR6Rel, BRK_FT<"break">, BRK_FM<0xd>;
+}
def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
def TRAP : TrapBase<BREAK>;
def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
@@ -1288,7 +1294,9 @@ def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
}
def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
-def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+let AdditionalPredicates = [NotInMicroMips] in {
+def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+}
def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
@@ -1484,10 +1492,10 @@ def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
-def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32;
-def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32;
-def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>;
-def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>;
+def MFC0 : MFC3OP<"mfc0", GPR32Opnd, COP0Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32;
+def MTC0 : MTC3OP<"mtc0", COP0Opnd, GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32;
+def MFC2 : MFC3OP<"mfc2", GPR32Opnd, COP2Opnd>, MFC3OP_FM<0x12, 0>;
+def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>;
class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
FrmOther, asmstr>;
@@ -1603,11 +1611,13 @@ def : MipsInstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
+let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
-def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+}
+def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, COP0Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 COP0Opnd:$rd, GPR32Opnd:$rt, 0), 0>;
+def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, COP2Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 COP2Opnd:$rd, GPR32Opnd:$rt, 0), 0>;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
}
@@ -1623,7 +1633,9 @@ def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
+let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+}
def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
def : MipsInstAlias<"teq $rs, $rt",
@@ -1707,6 +1719,12 @@ def BLEU : CondBranchPseudo<"bleu">;
def BGEU : CondBranchPseudo<"bgeu">;
def BGTU : CondBranchPseudo<"bgtu">;
+def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+ "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+
+def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+ "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 6b2a44d7a893..80d9b75b85b7 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -88,6 +88,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Offset += MO.getOffset();
break;
+ case MachineOperand::MO_MCSymbol:
+ Symbol = MO.getMCSymbol();
+ Offset += MO.getOffset();
+ break;
+
case MachineOperand::MO_JumpTableIndex:
Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
break;
@@ -141,6 +146,7 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
case MachineOperand::MO_MachineBasicBlock:
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_MCSymbol:
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_BlockAddress:
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index a8bd1cd78d1d..1ce27e401850 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -45,6 +45,6 @@ private:
MCSymbolRefExpr::VariantKind Kind) const;
bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index 8568137ff374..b18a673912f8 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ namespace {
};
char MipsModuleDAGToDAGISel::ID = 0;
-} // namespace
+}
bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index 746feab1d9a8..23f0b7070d62 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -49,6 +49,7 @@ public:
FGR64RegClass = &(TRI->getRegClass(Mips::FGR64RegClassID));
AFGR64RegClass = &(TRI->getRegClass(Mips::AFGR64RegClassID));
MSA128BRegClass = &(TRI->getRegClass(Mips::MSA128BRegClassID));
+ COP0RegClass = &(TRI->getRegClass(Mips::COP0RegClassID));
COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
}
@@ -66,6 +67,7 @@ private:
const MCRegisterClass *FGR64RegClass;
const MCRegisterClass *AFGR64RegClass;
const MCRegisterClass *MSA128BRegClass;
+ const MCRegisterClass *COP0RegClass;
const MCRegisterClass *COP2RegClass;
const MCRegisterClass *COP3RegClass;
uint32_t ri_gprmask;
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 5c71272e99be..b6cd79193cfc 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -43,7 +43,7 @@ namespace {
};
char MipsOs16::ID = 0;
-} // namespace
+}
// Figure out if we need float point based on the function signature.
// We need to move variables in and/or out of floating point
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 7497a2556738..02bcac5a3ddb 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -201,6 +201,10 @@ let Namespace = "Mips" in {
foreach I = 0-7 in
def FCC#I : MipsReg<#I, "fcc"#I>;
+ // COP0 registers.
+ foreach I = 0-31 in
+ def COP0#I : MipsReg<#I, ""#I>;
+
// COP2 registers.
foreach I = 0-31 in
def COP2#I : MipsReg<#I, ""#I>;
@@ -431,6 +435,10 @@ def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
+// Coprocessor 0 registers.
+def COP0 : RegisterClass<"Mips", [i32], 32, (sequence "COP0%u", 0, 31)>,
+ Unallocatable;
+
// Coprocessor 2 registers.
def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
Unallocatable;
@@ -559,6 +567,10 @@ def HWRegsAsmOperand : MipsAsmRegOperand {
let Name = "HWRegsAsmReg";
}
+def COP0AsmOperand : MipsAsmRegOperand {
+ let Name = "COP0AsmReg";
+}
+
def COP2AsmOperand : MipsAsmRegOperand {
let Name = "COP2AsmReg";
}
@@ -609,6 +621,10 @@ def ACC64DSPOpnd : RegisterOperand<ACC64DSP> {
let ParserMatchClass = ACC64DSPAsmOperand;
}
+def COP0Opnd : RegisterOperand<COP0> {
+ let ParserMatchClass = COP0AsmOperand;
+}
+
def COP2Opnd : RegisterOperand<COP2> {
let ParserMatchClass = COP2AsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index a858f30b94a8..ec7bf314c641 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -75,7 +75,7 @@ private:
const MipsSEInstrInfo &TII;
const MipsRegisterInfo &RegInfo;
};
-} // namespace
+}
ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
: MF(MF_), MRI(MF.getRegInfo()),
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index ee56b8b8c8ff..2fcd6bbb9a15 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -39,6 +39,6 @@ public:
unsigned ehDataReg(unsigned I) const;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index fb2f04121556..a894034020e9 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -126,6 +126,6 @@ private:
FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 623630a18078..d44f8d82ec3e 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -112,6 +112,6 @@ namespace llvm {
MachineBasicBlock *emitFEXP2_D_1(MachineInstr *MI,
MachineBasicBlock *BB) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index cdafe9f4d48b..bebbabf7b838 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -113,6 +113,6 @@ private:
MachineBasicBlock::iterator I) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index feddf9808264..061423fbeb86 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
~MipsSelectionDAGInfo();
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index c8a2e4bd72c5..5f9296812e1c 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -292,6 +292,6 @@ public:
return &InstrItins;
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 976970ccbcc6..38b2ecff7d7f 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -90,6 +90,6 @@ public:
CodeGenOpt::Level OL);
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 39cadc1e0f83..6ce1be707d04 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -80,22 +80,15 @@ public:
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg);
- /// Emit a '.module fp=value' directive using the given values.
- /// Updates the .MIPS.abiflags section
- virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
- bool Is32BitABI) {
- ABIFlagsSection.setFpABI(Value, Is32BitABI);
- }
-
- /// Emit a '.module fp=value' directive using the current values of the
- /// .MIPS.abiflags section.
- void emitDirectiveModuleFP() {
- emitDirectiveModuleFP(ABIFlagsSection.getFpABI(),
- ABIFlagsSection.Is32BitABI);
- }
-
- virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
+ // FP abiflags directives
+ virtual void emitDirectiveModuleFP();
+ virtual void emitDirectiveModuleOddSPReg();
+ virtual void emitDirectiveModuleSoftFloat();
+ virtual void emitDirectiveModuleHardFloat();
virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value);
+ virtual void emitDirectiveSetOddSPReg();
+ virtual void emitDirectiveSetNoOddSPReg();
+
void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
@@ -198,11 +191,14 @@ public:
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
- // ABI Flags
- void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
- bool Is32BitABI) override;
- void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
+ // FP abiflags directives
+ void emitDirectiveModuleFP() override;
+ void emitDirectiveModuleOddSPReg() override;
+ void emitDirectiveModuleSoftFloat() override;
+ void emitDirectiveModuleHardFloat() override;
void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
+ void emitDirectiveSetOddSPReg() override;
+ void emitDirectiveSetNoOddSPReg() override;
};
// This part is for ELF object output
@@ -244,9 +240,7 @@ public:
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
- // ABI Flags
- void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
void emitMipsAbiFlags();
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 99e950eba80f..05fe06dbc07c 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources
NVPTXLowerAggrCopies.cpp
NVPTXLowerKernelArgs.cpp
NVPTXLowerAlloca.cpp
+ NVPTXPeephole.cpp
NVPTXMCExpr.cpp
NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 8144f3fde730..02c5a210d099 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -49,6 +49,6 @@ public:
raw_ostream &O, const char *Modifier = nullptr);
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index b55664ed32a7..a72ae2ef53a7 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -94,7 +94,7 @@ enum {
IsSurfTexQueryFlag = 0x800,
IsTexModeUnifiedFlag = 0x1000
};
-} // namespace NVPTXII
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 8a28b089ce35..221d2f093aeb 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -54,7 +54,10 @@ createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->initMCCodeGenInfo(RM, CM, OL);
+
+ // The default relocation model is used regardless of what the client has
+ // specified, as it is the only relocation model currently supported.
+ X->initMCCodeGenInfo(Reloc::Default, CM, OL);
return X;
}
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index 1480b61afdbe..a2d670f8d39d 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -43,6 +43,6 @@ public:
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index d06d61f5e550..fe28214e9588 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -71,6 +71,7 @@ MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
BasicBlockPass *createNVPTXLowerAllocaPass();
+MachineFunctionPass *createNVPTXPeephole();
bool isImageOrSamplerVal(const Value *, const Module *);
@@ -133,7 +134,7 @@ enum VecType {
V2 = 2,
V4 = 4
};
-} // namespace PTXLdStInstCode
+}
/// PTXCvtMode - Conversion code enumeration
namespace PTXCvtMode {
@@ -152,7 +153,7 @@ enum CvtMode {
FTZ_FLAG = 0x10,
SAT_FLAG = 0x20
};
-} // namespace PTXCvtMode
+}
/// PTXCmpMode - Comparison mode enumeration
namespace PTXCmpMode {
@@ -180,9 +181,9 @@ enum CmpMode {
BASE_MASK = 0xFF,
FTZ_FLAG = 0x100
};
-} // namespace PTXCmpMode
-} // namespace NVPTX
-} // namespace llvm
+}
+}
+} // end namespace llvm;
// Defines symbolic names for NVPTX registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 1a1a8ca7c666..cadd7a46cd9d 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -109,7 +109,7 @@ void VisitGlobalVariableForEmission(
Visited.insert(GV);
Visiting.erase(GV);
}
-} // namespace
+}
void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
@@ -826,7 +826,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler();
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1, STI);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 12d80a34a4e8..f6f7685e76f9 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -349,6 +349,6 @@ public:
DebugLoc prevDebugLoc;
void emitLineNumberAsDotLoc(const MachineInstr &);
};
-} // namespace llvm
+} // end of namespace
#endif
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 2d5e74c4c4bf..7d4be8e809cf 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -38,7 +38,7 @@ public:
/// \brief Clean up the name to remove symbols invalid in PTX.
std::string cleanUpName(StringRef Name);
};
-} // namespace
+}
char NVPTXAssignValidGlobalNames::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 3eb7024ff08a..69a229e32f43 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -107,7 +107,7 @@ private:
/// Helper function for bitcasts.
Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
};
-} // namespace
+}
char NVPTXFavorNonGenericAddrSpaces::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 5503494fc3c8..9b34aef3fdec 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -35,35 +35,33 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
if (MF.getFrameInfo()->hasStackObjects()) {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
- // Insert "mov.u32 %SP, %Depot"
- MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineInstr *MI = MBB.begin();
+ MachineRegisterInfo &MR = MF.getRegInfo();
+
// This instruction really occurs before first instruction
// in the BB, so giving it no debug location.
DebugLoc dl = DebugLoc();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // mov %SPL, %depot;
- // cvta.local %SP, %SPL;
- if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
- unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
- MachineInstr *MI =
- BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
- NVPTX::cvta_local_yes_64),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl,
- MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
- LocalReg).addImm(MF.getFunctionNumber());
- } else {
- unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
- MachineInstr *MI =
- BuildMI(MBB, MBBI, dl,
- MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl,
- MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
- LocalReg).addImm(MF.getFunctionNumber());
+ // Emits
+ // mov %SPL, %depot;
+ // cvta.local %SP, %SPL;
+ // for local address accesses in MF.
+ bool Is64Bit =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
+ unsigned CvtaLocalOpcode =
+ (Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes);
+ unsigned MovDepotOpcode =
+ (Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
+ if (!MR.use_empty(NVPTX::VRFrame)) {
+ // If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
+ MI = BuildMI(MBB, MI, dl,
+ MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
+ NVPTX::VRFrame)
+ .addReg(NVPTX::VRFrameLocal);
}
+ BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
+ NVPTX::VRFrameLocal)
+ .addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 488edecc6e7b..14f8bb7b98fe 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -31,6 +31,6 @@ public:
MachineBasicBlock::iterator I) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 5879df31f8a6..fe20580c83a2 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -95,6 +95,6 @@ private:
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
};
-} // namespace
+}
#endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b5af72ab855a..09e0bd5d3d88 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -206,7 +206,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
// Turn FP truncstore into trunc + store.
+ // FIXME: vector types should also be expanded
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 276851f872ea..ed94775b3002 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -427,7 +427,7 @@ enum NodeType : unsigned {
Suld3DV4I16Zero,
Suld3DV4I32Zero
};
-} // namespace NVPTXISD
+}
class NVPTXSubtarget;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index c86f861acd55..aa36b6be7250 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -42,7 +42,7 @@ private:
Value *cleanupValue(Value *V);
void replaceWith(Instruction *From, ConstantInt *To);
};
-} // namespace
+}
char NVPTXImageOptimizer::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index 24dcb122b94e..b533f316d8a9 100644
--- a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -132,6 +132,10 @@ void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
assert(!Arg->hasByValAttr() &&
"byval params should be handled by handleByValParam");
+ // Do nothing if the argument already points to the global address space.
+ if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+ return;
+
Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
Instruction *ArgInGlobal = new AddrSpaceCastInst(
Arg, PointerType::get(Arg->getType()->getPointerElementType(),
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 4b9322c77a40..10f1135ad841 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -46,6 +46,6 @@ public:
return ImageHandleList[Idx].c_str();
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
new file mode 100644
index 000000000000..a61c291d233f
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -0,0 +1,154 @@
+//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
+// of a MachineFunction.
+//
+// mov %SPL, %depot
+// cvta.local %SP, %SPL
+//
+// Because Frame Index is a generic address and alloca can only return generic
+// pointer, without this pass the instructions producing alloca'ed address will
+// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
+// this address with their .local versions, but this may introduce a lot of
+// cvta.to.local instructions. Performance can be improved if we avoid casting
+// address back and forth and directly calculate local address based on %SPL.
+// This peephole pass optimizes these cases, for example
+//
+// It will transform the following pattern
+// %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
+// %vreg1<def> = cvta_to_local_yes_64 %vreg0
+//
+// into
+// %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
+//
+// %VRFrameLocal is the virtual register name of %SPL
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-peephole"
+
+namespace llvm {
+void initializeNVPTXPeepholePass(PassRegistry &);
+}
+
+namespace {
+struct NVPTXPeephole : public MachineFunctionPass {
+ public:
+ static char ID;
+ NVPTXPeephole() : MachineFunctionPass(ID) {
+ initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "NVPTX optimize redundant cvta.to.local instruction";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char NVPTXPeephole::ID = 0;
+
+INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
+
+static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
+ auto &MBB = *Root.getParent();
+ auto &MF = *MBB.getParent();
+ // Check current instruction is cvta.to.local
+ if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
+ Root.getOpcode() != NVPTX::cvta_to_local_yes)
+ return false;
+
+ auto &Op = Root.getOperand(1);
+ const auto &MRI = MF.getRegInfo();
+ MachineInstr *GenericAddrDef = nullptr;
+ if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+ GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+ }
+
+ // Check the register operand is uniquely defined by LEA_ADDRi instruction
+ if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
+ (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
+ GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
+ return false;
+ }
+
+ // Check the LEA_ADDRi operand is Frame index
+ auto &BaseAddrOp = GenericAddrDef->getOperand(1);
+ if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
+ return true;
+ }
+
+ return false;
+}
+
+static void CombineCVTAToLocal(MachineInstr &Root) {
+ auto &MBB = *Root.getParent();
+ auto &MF = *MBB.getParent();
+ const auto &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
+ Root.getOperand(0).getReg())
+ .addReg(NVPTX::VRFrameLocal)
+ .addOperand(Prev.getOperand(2));
+
+ MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
+
+ // Check if MRI has only one non dbg use, which is Root
+ if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
+ Prev.eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ Root.eraseFromParentAndMarkDBGValuesForRemoval();
+}
+
+bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ // Loop over all of the basic blocks.
+ for (auto &MBB : MF) {
+ // Traverse the basic block.
+ auto BlockIter = MBB.begin();
+
+ while (BlockIter != MBB.end()) {
+ auto &MI = *BlockIter++;
+ if (isCVTAToLocalCombinationCandidate(MI)) {
+ CombineCVTAToLocal(MI);
+ Changed = true;
+ }
+ } // Instruction
+ } // Basic Block
+
+ // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
+ const auto &MRI = MF.getRegInfo();
+ if (MRI.use_empty(NVPTX::VRFrame)) {
+ if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
+ MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ }
+
+ return Changed;
+}
+
+MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index ea58f7787489..5fd69a6815a8 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -39,7 +39,7 @@ public:
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
};
-} // namespace
+}
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
return new NVPTXPrologEpilogPass();
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 3ef997b006fa..6e97f9efbc27 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -69,7 +69,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
}
return "";
}
-} // namespace llvm
+}
NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index efcee6b6f2bd..ff6ccc457db7 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -65,5 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot,
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRFrameLocal, VRDepot,
(sequence "ENVREG%u", 0, 31))>;
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index bb0adc59a3fd..e83f735a551e 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -45,7 +45,7 @@ private:
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
unsigned &Idx);
};
-} // namespace
+}
char NVPTXReplaceImageHandles::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index d4520451d37d..c7287719be5f 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -103,6 +103,6 @@ public:
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index c071ee82abc6..9d9072efc382 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -210,6 +210,10 @@ bool NVPTXPassConfig::addInstSelector() {
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
+ // NVPTXPrologEpilogPass calculates frame object offset and replace frame
+ // index with VRFrame register. NVPTXPeephole need to be run after that and
+ // will replace VRFrame with VRFrameLocal when possible.
+ addPass(createNVPTXPeephole());
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 4d937c6a8bec..7e2ce73daaa3 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -91,6 +91,6 @@ void dumpInstRec(Value *v, std::set<Instruction *> *visited);
void dumpInstRec(Value *v);
void dumpParent(Value *v);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 1c2043069e1e..5e375b7852e4 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -75,7 +75,7 @@ private:
bool handleFunction(Function *ReflectFunction);
void setVarMap();
};
-} // namespace
+}
ModulePass *llvm::createNVVMReflectPass() {
return new NVVMReflect();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 36119d5d7e46..992be5b966c1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
};
-} // namespace
+}
PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
: MCELFObjectTargetWriter(Is64Bit, OSABI,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index ad614f2ddf35..ae43e59d3cb1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -50,7 +50,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-} // namespace PPC
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 489905b26fcc..5c38fe173d96 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -219,7 +219,7 @@ public:
llvm_unreachable("Unknown pseudo-op: .localentry");
}
};
-} // namespace
+}
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 18818a1c335e..77fe45882289 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -81,7 +81,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
-} // namespace llvm
+} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
// undefine PPC here. PPC may be predefined on some hosts.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 9b5491f92491..9d7289658f0f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -51,7 +51,7 @@ public:
FixedValue);
}
};
-} // namespace
+}
/// computes the log2 of the size of the relocation,
/// used for relocation_info::r_length.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index ff9b059d906a..6075631a541f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -62,7 +62,7 @@ namespace PPC {
/// Assume the condition register is set by MI(a,b), return the predicate if
/// we modify the instructions such that condition register is set by MI(b,a).
Predicate getSwappedPredicate(Predicate Opcode);
-} // namespace PPC
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 49f77b538c1b..ae8d8b4f5dfe 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -98,6 +98,6 @@ namespace llvm {
};
} // end namespace PPCII
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 2b6030aea2b1..940d55ac1f36 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -51,7 +51,7 @@ namespace {
}
};
char PPCBSel::ID = 0;
-} // namespace
+}
INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
false, false)
diff --git a/lib/Target/PowerPC/PPCCallingConv.h b/lib/Target/PowerPC/PPCCallingConv.h
index 550cac62927e..eb904a858592 100644
--- a/lib/Target/PowerPC/PPCCallingConv.h
+++ b/lib/Target/PowerPC/PPCCallingConv.h
@@ -29,7 +29,7 @@ inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 9cd9c2faa51f..fc89753ed94e 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -191,7 +191,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
"PowerPC Early-Return Creation", false, false)
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 82ff5307d0b7..fafcd76f9d18 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1448,9 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- const char *SymName = CLI.SymName;
+ const MCSymbol *Symbol = CLI.Symbol;
- if (!Callee && !SymName)
+ if (!Callee && !Symbol)
return false;
// Allow SelectionDAG isel to handle tail calls.
@@ -2347,4 +2347,4 @@ namespace llvm {
return new PPCFastISel(FuncInfo, LibInfo);
return nullptr;
}
-} // namespace llvm
+}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index b232863c9614..28d074ecd79d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -93,6 +93,6 @@ public:
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 5f9f9f2e341f..c85c2610d2f5 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -234,7 +234,7 @@ private:
SDNode *transferMemOperands(SDNode *N, SDNode *Result);
};
-} // namespace
+}
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
@@ -2773,18 +2773,6 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
else
DM[i] = 1;
- // For little endian, we must swap the input operands and adjust
- // the mask elements (reverse and invert them).
- if (PPCSubTarget->isLittleEndian()) {
- std::swap(Op1, Op2);
- unsigned tmp = DM[0];
- DM[0] = 1 - DM[1];
- DM[1] = 1 - tmp;
- }
-
- SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
- MVT::i32);
-
if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isa<LoadSDNode>(Op1.getOperand(0))) {
@@ -2800,6 +2788,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
}
+ // For little endian, we must swap the input operands and adjust
+ // the mask elements (reverse and invert them).
+ if (PPCSubTarget->isLittleEndian()) {
+ std::swap(Op1, Op2);
+ unsigned tmp = DM[0];
+ DM[0] = 1 - DM[1];
+ DM[1] = 1 - tmp;
+ }
+
+ SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
+ MVT::i32);
SDValue Ops[] = { Op1, Op2, DMV };
return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1cdfb4178544..594472bbb47b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1279,6 +1279,99 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
}
}
+/**
+ * \brief Common function used to match vmrgew and vmrgow shuffles
+ *
+ * The indexOffset determines whether to look for even or odd words in
+ * the shuffle mask. This is based on the of the endianness of the target
+ * machine.
+ * - Little Endian:
+ * - Use offset of 0 to check for odd elements
+ * - Use offset of 4 to check for even elements
+ * - Big Endian:
+ * - Use offset of 0 to check for even elements
+ * - Use offset of 4 to check for odd elements
+ * A detailed description of the vector element ordering for little endian and
+ * big endian can be found at
+ * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
+ * Targeting your applications - what little endian and big endian IBM XL C/C++
+ * compiler differences mean to you
+ *
+ * The mask to the shuffle vector instruction specifies the indices of the
+ * elements from the two input vectors to place in the result. The elements are
+ * numbered in array-access order, starting with the first vector. These vectors
+ * are always of type v16i8, thus each vector will contain 16 elements of size
+ * 8. More info on the shuffle vector can be found in the
+ * http://llvm.org/docs/LangRef.html#shufflevector-instruction
+ * Language Reference.
+ *
+ * The RHSStartValue indicates whether the same input vectors are used (unary)
+ * or two different input vectors are used, based on the following:
+ * - If the instruction uses the same vector for both inputs, the range of the
+ * indices will be 0 to 15. In this case, the RHSStart value passed should
+ * be 0.
+ * - If the instruction has two different vectors then the range of the
+ * indices will be 0 to 31. In this case, the RHSStart value passed should
+ * be 16 (indices 0-15 specify elements in the first vector while indices 16
+ * to 31 specify elements in the second vector).
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] IndexOffset Specifies whether to look for even or odd elements
+ * \param[in] RHSStartValue Specifies the starting index for the righthand input
+ * vector to the shuffle_vector instruction
+ * \return true iff this shuffle vector represents an even or odd word merge
+ */
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
+ unsigned RHSStartValue) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
+
+ for (unsigned i = 0; i < 2; ++i)
+ for (unsigned j = 0; j < 4; ++j)
+ if (!isConstantOrUndef(N->getMaskElt(i*4+j),
+ i*RHSStartValue+j+IndexOffset) ||
+ !isConstantOrUndef(N->getMaskElt(i*4+j+8),
+ i*RHSStartValue+j+IndexOffset+8))
+ return false;
+ return true;
+}
+
+/**
+ * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
+ * vmrgow instructions.
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
+ * \param[in] ShuffleKind Identify the type of merge:
+ * - 0 = big-endian merge with two different inputs;
+ * - 1 = either-endian merge with two identical inputs;
+ * - 2 = little-endian merge with two different inputs (inputs are swapped for
+ * little-endian merges).
+ * \param[in] DAG The current SelectionDAG
+ * \return true iff this shuffle mask
+ */
+bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ unsigned indexOffset = CheckEven ? 4 : 0;
+ if (ShuffleKind == 1) // Unary
+ return isVMerge(N, indexOffset, 0);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, indexOffset, 16);
+ else
+ return false;
+ }
+ else {
+ unsigned indexOffset = CheckEven ? 0 : 4;
+ if (ShuffleKind == 1) // Unary
+ return isVMerge(N, indexOffset, 0);
+ else if (ShuffleKind == 0) // Normal
+ return isVMerge(N, indexOffset, 16);
+ else
+ return false;
+ }
+ return false;
+}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
@@ -3765,7 +3858,7 @@ struct TailCallArgumentInfo {
TailCallArgumentInfo() : FrameIdx(0) {}
};
-} // namespace
+}
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void
@@ -7046,7 +7139,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
return Op;
}
}
@@ -7064,7 +7159,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -9863,7 +9960,9 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
case ISD::INTRINSIC_W_CHAIN: {
MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
Chain = Intrin->getChain();
- Base = Intrin->getBasePtr();
+ // Similarly to the store case below, Intrin->getBasePtr() doesn't get
+ // us what we want. Get operand 2 instead.
+ Base = Intrin->getOperand(2);
MMO = Intrin->getMemOperand();
break;
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index c33d60565b79..02242b512a4f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -353,7 +353,7 @@ namespace llvm {
/// the last operand.
TOC_ENTRY
};
- } // namespace PPCISD
+ }
/// Define some predicates that are used for node matching.
namespace PPC {
@@ -382,6 +382,11 @@ namespace llvm {
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG);
+ /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VMRGEW or VMRGOW instruction
+ bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
@@ -405,7 +410,7 @@ namespace llvm {
/// If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int isQVALIGNIShuffleMask(SDNode *N);
- } // namespace PPC
+ }
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &Subtarget;
@@ -871,6 +876,6 @@ namespace llvm {
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-} // namespace llvm
+}
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 9ff604bbee9d..cb0271fe8d0c 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -155,6 +155,33 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
}]>;
+def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG);
+}]>;
+def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG);
+}]>;
+def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG);
+}]>;
+def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG);
+}]>;
+def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG);
+}]>;
+def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG);
+}]>;
+
+
+
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
}]>;
@@ -1008,6 +1035,29 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
+// Vector merge
+def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrgew $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrgow $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+// Match vmrgew(x,x) and vmrgow(x,x)
+def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
+ (VMRGEW $vA, $vA)>;
+def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef),
+ (VMRGOW $vA, $vA)>;
+
+// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be swapped for correct
+// semantics.w
+def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGEW $vB, $vA)>;
+def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGOW $vB, $vA)>;
+
+
// Vector shifts
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index ec94fa5580ff..cf71b1c59869 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -38,6 +38,6 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
return MIB.addFrameIndex(FI).addImm(Offset);
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d3bb7a63c622..696a83860e53 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -352,15 +352,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
bool isPPC64 = Subtarget.isPPC64();
// If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
+
if (!isUnpredicatedTerminator(I))
return false;
@@ -513,14 +508,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
}
unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 39bf4547733c..e2d6346aa532 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -237,6 +237,6 @@ public:
void getNoopForMachoTarget(MCInst &NopInst) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index d08b80871f3e..43ba4994fde6 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -457,22 +457,34 @@ let Uses = [RM] in {
defm XVCMPEQDP : XX3Form_Rcr<60, 99,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ [(set v2i64:$XT,
+ (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>;
defm XVCMPEQSP : XX3Form_Rcr<60, 67,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ [(set v4i32:$XT,
+ (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>;
defm XVCMPGEDP : XX3Form_Rcr<60, 115,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ [(set v2i64:$XT,
+ (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>;
defm XVCMPGESP : XX3Form_Rcr<60, 83,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ [(set v4i32:$XT,
+ (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>;
defm XVCMPGTDP : XX3Form_Rcr<60, 107,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ [(set v2i64:$XT,
+ (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>;
defm XVCMPGTSP : XX3Form_Rcr<60, 75,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ [(set v4i32:$XT,
+ (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>;
// Move Instructions
def XSABSDP : XX2Form<60, 345,
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index e783b5e65333..b4e1c099f190 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -88,7 +88,7 @@ namespace {
const TargetTransformInfo *TTI;
const DataLayout *DL;
};
-} // namespace
+}
char PPCLoopDataPrefetch::ID = 0;
INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 1891b6315c51..b6e7799402e1 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -87,7 +87,7 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
};
-} // namespace
+}
char PPCLoopPreIncPrep::ID = 0;
static const char *name = "Prepare loop for pre-inc. addressing modes";
@@ -113,7 +113,7 @@ namespace {
protected:
ScalarEvolution *SE;
};
-} // namespace
+}
static bool IsPtrInBounds(Value *BasePtr) {
Value *StrippedBasePtr = BasePtr;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index c44d5d70f8dc..76837ecb32de 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -57,7 +57,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
- Mang->getNameWithPrefix(Name, MO.getSymbolName());
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
} else {
const GlobalValue *GV = MO.getGlobal();
TM.getNameWithPrefix(Name, GV, *Mang);
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index d2eaeb42dbc4..2c1378d5670d 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
~PPCSelectionDAGInfo();
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index ea17e1c189b8..e9cc3d4bd5bc 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -58,7 +58,7 @@ namespace PPC {
DIR_PWR8,
DIR_64
};
-} // namespace PPC
+}
class GlobalValue;
class TargetMachine;
@@ -286,6 +286,6 @@ public:
bool enableSubRegLiveness() const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 7a9db0fabb07..2dc0d825c80d 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -156,7 +156,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
"PowerPC TLS Dynamic Call Fixup", false, false)
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 61b963fe6da5..bf165c9edc6e 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -145,7 +145,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE,
"PowerPC TOC Register Dependencies", false, false)
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index a5c4c23c7901..dbe7617d3542 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -22,6 +22,6 @@ public:
virtual void emitAbiVersion(int AbiVersion) = 0;
virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 537db656fd60..5e3ae2a4471b 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -165,7 +165,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
"PowerPC VSX Copy Legalization", false, false)
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index a029ddf0bc08..f352fa647ace 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -317,7 +317,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
"PowerPC VSX FMA Mutation", false, false)
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 939293a5638e..e7ab71ac2106 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -79,7 +79,6 @@ struct PPCVSXSwapEntry {
unsigned int IsStore : 1;
unsigned int IsSwap : 1;
unsigned int MentionsPhysVR : 1;
- unsigned int HasImplicitSubreg : 1;
unsigned int IsSwappable : 1;
unsigned int SpecialHandling : 3;
unsigned int WebRejected : 1;
@@ -224,7 +223,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
for (MachineInstr &MI : MBB) {
bool RelevantInstr = false;
- bool ImplicitSubreg = false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
@@ -232,8 +230,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
unsigned Reg = MO.getReg();
if (isVecReg(Reg)) {
RelevantInstr = true;
- if (MO.getSubReg() != 0)
- ImplicitSubreg = true;
break;
}
}
@@ -249,9 +245,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
PPCVSXSwapEntry SwapEntry{};
int VecIdx = addSwapEntry(&MI, SwapEntry);
- if (ImplicitSubreg)
- SwapVector[VecIdx].HasImplicitSubreg = 1;
-
switch(MI.getOpcode()) {
default:
// Unless noted otherwise, an instruction is considered
@@ -260,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// select, compare, etc.).
SwapVector[VecIdx].IsSwappable = 1;
break;
- case PPC::XXPERMDI:
+ case PPC::XXPERMDI: {
// This is a swap if it is of the form XXPERMDI t, s, s, 2.
// Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
// can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
@@ -268,9 +261,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// SUBREG_TO_REG to find the real source value for comparison.
// If the real source value is a physical register, then mark the
// XXPERMDI as mentioning a physical register.
- // Any other form of XXPERMDI is lane-sensitive and unsafe
- // for the optimization.
- if (MI.getOperand(3).getImm() == 2) {
+ int immed = MI.getOperand(3).getImm();
+ if (immed == 2) {
unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
VecIdx);
unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
@@ -278,7 +270,26 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (trueReg1 == trueReg2)
SwapVector[VecIdx].IsSwap = 1;
}
+ // This is a doubleword splat if it is of the form
+ // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we
+ // must look through chains of copy-likes to find the source
+ // register. We turn off the marking for mention of a physical
+ // register, because splatting it is safe; the optimization
+ // will not swap the value in the physical register.
+ else if (immed == 0 || immed == 3) {
+ unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+ VecIdx);
+ unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+ VecIdx);
+ if (trueReg1 == trueReg2) {
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].MentionsPhysVR = 0;
+ }
+ }
+ // Any other form of XXPERMDI is lane-sensitive and unsafe
+ // for the optimization.
break;
+ }
case PPC::LVX:
// Non-permuting loads are currently unsafe. We can use special
// handling for this in the future. By not marking these as
@@ -307,14 +318,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
SwapVector[VecIdx].IsStore = 1;
SwapVector[VecIdx].IsSwap = 1;
break;
- case PPC::SUBREG_TO_REG:
- // These are fine provided they are moving between full vector
- // register classes. For example, the VRs are a subset of the
- // VSRs, but each VR and each VSR is a full 128-bit register.
- if (isVecReg(MI.getOperand(0).getReg()) &&
- isVecReg(MI.getOperand(2).getReg()))
- SwapVector[VecIdx].IsSwappable = 1;
- break;
case PPC::COPY:
// These are fine provided they are moving between full vector
// register classes.
@@ -349,7 +352,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::LVSL:
case PPC::LVSR:
case PPC::LVXL:
- case PPC::LXVDSX:
case PPC::STVEBX:
case PPC::STVEHX:
case PPC::STVEWX:
@@ -457,23 +459,19 @@ int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
// such operations to the ultimate source register. If a
// physical register is encountered, we stop the search and
// flag the swap entry indicated by VecIdx (the original
-// XXPERMDI) as mentioning a physical register. Similarly
-// for implicit subregister mentions (which should never
-// happen).
+// XXPERMDI) as mentioning a physical register.
unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
unsigned VecIdx) {
MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
- unsigned CopySrcReg, CopySrcSubreg;
- if (MI->isCopy()) {
+ unsigned CopySrcReg;
+ if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
- CopySrcSubreg = MI->getOperand(1).getSubReg();
- } else {
+ else {
assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg();
- CopySrcSubreg = MI->getOperand(2).getSubReg();
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
@@ -481,11 +479,6 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
return CopySrcReg;
}
- if (CopySrcSubreg != 0) {
- SwapVector[VecIdx].HasImplicitSubreg = 1;
- return CopySrcReg;
- }
-
return lookThruCopyLike(CopySrcReg, VecIdx);
}
@@ -552,11 +545,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
- // Reject webs containing mentions of physical registers or implicit
- // subregs, or containing operations that we don't know how to handle
- // in a lane-permuted region.
+ // Reject webs containing mentions of physical registers, or containing
+ // operations that we don't know how to handle in a lane-permuted region.
if (SwapVector[EntryIdx].MentionsPhysVR ||
- SwapVector[EntryIdx].HasImplicitSubreg ||
!(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
SwapVector[Repr].WebRejected = 1;
@@ -765,8 +756,6 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
DEBUG(dbgs() << "swap ");
if (SwapVector[EntryIdx].MentionsPhysVR)
DEBUG(dbgs() << "physreg ");
- if (SwapVector[EntryIdx].HasImplicitSubreg)
- DEBUG(dbgs() << "implsubreg ");
if (SwapVector[EntryIdx].IsSwappable) {
DEBUG(dbgs() << "swappable ");
@@ -809,7 +798,7 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
DEBUG(dbgs() << "\n");
}
-} // namespace
+} // end default namespace
INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
"PowerPC VSX Swap Removal", false, false)
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 59f011aefe66..3e56b9e9b883 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -41,7 +41,7 @@ public:
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
-} // namespace
+}
namespace llvm {
extern Target TheSparcTarget, TheSparcV9Target, TheSparcelTarget;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 800a5f254b8f..0be60fd7a051 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -36,7 +36,7 @@ namespace {
unsigned Type) const override;
};
-} // namespace
+}
unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index 34c58da10d5d..8d79396d936e 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -91,7 +91,7 @@ namespace llvm {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
- } // namespace Sparc
-} // namespace llvm
+ }
+}
#endif
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 8f62de4a4fd2..a9c9f15454ec 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -41,7 +41,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU);
MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
bool IsLIttleEndian, uint8_t OSABI);
-} // namespace llvm
+} // End llvm namespace
// Defines symbolic names for Sparc registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 133af8694139..96378d522dc0 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -33,7 +33,7 @@ namespace llvm {
void LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
MCInst &OutMI,
AsmPrinter &AP);
-} // namespace llvm
+} // end namespace llvm;
namespace llvm {
// Enums corresponding to Sparc condition codes, both icc's and fcc's. These
@@ -74,7 +74,7 @@ namespace llvm {
FCC_ULE = 14+16, // Unordered or Less or Equal
FCC_O = 15+16 // Ordered
};
- } // namespace SPCC
+ }
inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
switch (CC) {
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 3d73bbd0d90c..bb3b78861cbd 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -55,6 +55,6 @@ private:
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index a4b9c79c3264..b6bc3d255713 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -49,7 +49,7 @@ namespace llvm {
TLS_LD,
TLS_CALL
};
- } // namespace SPISD
+ }
class SparcTargetLowering : public TargetLowering {
const SparcSubtarget *Subtarget;
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index b59dd896019c..15673f134d80 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -96,6 +96,6 @@ public:
unsigned getGlobalBaseReg(MachineFunction *MF) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index b1f795b81e8f..a02bae07a336 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -353,13 +353,6 @@ let hasSideEffects = 1, mayStore = 1 in {
[(flushw)]>;
}
-let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in
- def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>;
-
-let rd = 0 in
- def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
- "unimp $imm22", []>;
-
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence. This has to handle all
// permutations of selection between i32/f32/f64 on ICC and FCC.
@@ -406,36 +399,6 @@ let usesCustomInserter = 1, Uses = [FCC0] in {
[(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
}
-// JMPL Instruction.
-let isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
- DecoderMethod = "DecodeJMPL" in {
- def JMPLrr: F3_1<2, 0b111000, (outs IntRegs:$dst), (ins MEMrr:$addr),
- "jmpl $addr, $dst", []>;
- def JMPLri: F3_2<2, 0b111000, (outs IntRegs:$dst), (ins MEMri:$addr),
- "jmpl $addr, $dst", []>;
-}
-
-// Section A.3 - Synthetic Instructions, p. 85
-// special cases of JMPL:
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
- isCodeGenOnly = 1 in {
- let rd = 0, rs1 = 15 in
- def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
- "jmp %o7+$val", [(retflag simm13:$val)]>;
-
- let rd = 0, rs1 = 31 in
- def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
- "jmp %i7+$val", []>;
-}
-
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1,
- isBarrier = 1, rd = 0, DecoderMethod = "DecodeReturn" in {
- def RETTrr : F3_1<2, 0b111001, (outs), (ins MEMrr:$addr),
- "rett $addr", []>;
- def RETTri : F3_2<2, 0b111001, (outs), (ins MEMri:$addr),
- "rett $addr", []>;
-}
-
// Section B.1 - Load Integer Instructions, p. 90
let DecoderMethod = "DecodeLoadInt" in {
defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8, IntRegs, i32>;
@@ -470,6 +433,24 @@ let DecoderMethod = "DecodeStoreQFP" in
defm STQF : Store<"stq", 0b100110, store, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
+// Section B.8 - SWAP Register with Memory Instruction
+// (Atomic swap)
+let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
+ def SWAPrr : F3_1<3, 0b001111,
+ (outs IntRegs:$dst), (ins MEMrr:$addr, IntRegs:$val),
+ "swap [$addr], $dst",
+ [(set i32:$dst, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>;
+ def SWAPri : F3_2<3, 0b001111,
+ (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val),
+ "swap [$addr], $dst",
+ [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>;
+ def SWAPArr : F3_1_asi<3, 0b011111,
+ (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val),
+ "swapa [$addr] $asi, $dst",
+ [/*FIXME: pattern?*/]>;
+}
+
+
// Section B.9 - SETHI Instruction, p. 104
def SETHIi: F2_1<0b100,
(outs IntRegs:$rd), (ins i32imm:$imm22),
@@ -725,6 +706,56 @@ let Uses = [O6],
}
}
+// Section B.25 - Jump and Link Instruction
+
+// JMPL Instruction.
+let isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
+ DecoderMethod = "DecodeJMPL" in {
+ def JMPLrr: F3_1<2, 0b111000, (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "jmpl $addr, $dst", []>;
+ def JMPLri: F3_2<2, 0b111000, (outs IntRegs:$dst), (ins MEMri:$addr),
+ "jmpl $addr, $dst", []>;
+}
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in {
+ let rd = 0, rs1 = 15 in
+ def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+ let rd = 0, rs1 = 31 in
+ def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %i7+$val", []>;
+}
+
+// Section B.26 - Return from Trap Instruction
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1,
+ isBarrier = 1, rd = 0, DecoderMethod = "DecodeReturn" in {
+ def RETTrr : F3_1<2, 0b111001, (outs), (ins MEMrr:$addr),
+ "rett $addr", []>;
+ def RETTri : F3_2<2, 0b111001, (outs), (ins MEMri:$addr),
+ "rett $addr", []>;
+}
+
+
+// Section B.27 - Trap on Integer Condition Codes Instruction
+multiclass TRAP<string regStr> {
+ def rr : TRAPSPrr<0b111010, (outs), (ins IntRegs:$rs1, IntRegs:$rs2,
+ CCOp:$cond),
+ !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $rs2"), []>;
+ def ri : TRAPSPri<0b111010, (outs), (ins IntRegs:$rs1, i32imm:$imm,
+ CCOp:$cond),
+ !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $imm"), []>;
+}
+
+let hasSideEffects = 1, Uses = [ICC], cc = 0b00 in
+ defm TICC : TRAP<"%icc">;
+
+let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in
+ def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>;
+
// Section B.28 - Read State Register Instructions
let rs2 = 0 in
def RDASR : F3_1<2, 0b101000,
@@ -787,6 +818,18 @@ let Predicates = [HasNoV9] in {
}
}
+// Section B.30 - STBAR Instruction
+let hasSideEffects = 1, rd = 0, rs1 = 0b01111, rs2 = 0 in
+ def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>;
+
+
+// Section B.31 - Unimplmented Instruction
+let rd = 0 in
+ def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
+ "unimp $imm22", []>;
+
+// Section B.33 - Floating-point Operate (FPop) Instructions
+
// Convert Integer to Floating-point Instructions, p. 141
def FITOS : F3_3u<2, 0b110100, 0b011000100,
(outs FPRegs:$rd), (ins FPRegs:$rs2),
@@ -1168,29 +1211,10 @@ let rs1 = 0 in
def : Pat<(ctpop i32:$src),
(POPCrr (SRLri $src, 0))>;
-// Atomic swap.
-let hasSideEffects =1, rd = 0, rs1 = 0b01111, rs2 = 0 in
- def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>;
-
let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
def MEMBARi : F3_2<2, 0b101000, (outs), (ins simm13Op:$simm13),
"membar $simm13", []>;
-let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
- def SWAPrr : F3_1<3, 0b001111,
- (outs IntRegs:$dst), (ins MEMrr:$addr, IntRegs:$val),
- "swap [$addr], $dst",
- [(set i32:$dst, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>;
- def SWAPri : F3_2<3, 0b001111,
- (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val),
- "swap [$addr], $dst",
- [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>;
- def SWAPArr : F3_1_asi<3, 0b011111,
- (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val),
- "swapa [$addr] $asi, $dst",
- [/*FIXME: pattern?*/]>;
-}
-
// TODO: Should add a CASArr variant. In fact, the CAS instruction,
// unlike other instructions, only comes in a form which requires an
// ASI be provided. The ASI value hardcoded here is ASI_PRIMARY, the
@@ -1215,18 +1239,6 @@ let hasSideEffects = 1 in {
}
}
-multiclass TRAP<string regStr> {
- def rr : TRAPSPrr<0b111010, (outs), (ins IntRegs:$rs1, IntRegs:$rs2,
- CCOp:$cond),
- !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $rs2"), []>;
- def ri : TRAPSPri<0b111010, (outs), (ins IntRegs:$rs1, i32imm:$imm,
- CCOp:$cond),
- !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $imm"), []>;
-}
-
-let hasSideEffects = 1, Uses = [ICC], cc = 0b00 in
- defm TICC : TRAP<"%icc">;
-
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 0471443f5961..104744279d9d 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -51,6 +51,6 @@ namespace llvm {
void setLeafProc(bool rhs) { IsLeafProc = rhs; }
bool isLeafProc() const { return IsLeafProc; }
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index 2ceae82c8cdb..6818291b30b4 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
~SparcSelectionDAGInfo() override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 0eb3d6593fe6..75fd37f01a19 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -96,7 +96,10 @@ struct SystemZAddressingMode {
// Return a mask with Count low bits set.
static uint64_t allOnes(unsigned int Count) {
- return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
+ assert(Count <= 64);
+ if (Count > 63)
+ return UINT64_MAX;
+ return (uint64_t(1) << Count) - 1;
}
// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation
@@ -903,6 +906,8 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getSizeInBits() > 64)
+ return nullptr;
RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
unsigned Count = 0;
while (expandRxSBG(RISBG))
@@ -958,6 +963,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getSizeInBits() > 64)
+ return nullptr;
// Try treating each operand of N as the second operand of the RxSBG
// and see which goes deepest.
RxSBGOperands RxSBG[] = {
@@ -993,8 +1002,6 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
Opcode = SystemZ::RISBGN;
}
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
SDValue Ops[5] = {
convertTo(DL, MVT::i64, Op0),
convertTo(DL, MVT::i64, RxSBG[I].Input),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 75845796de79..372f6fb3ea50 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2005,17 +2005,17 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
// bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
// always true for CC>3.
- C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+ C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
// ...and the inverse of that.
- C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+ C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
// bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
// always true for CC>3.
- C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+ C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
// ...and the inverse of that.
- C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+ C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
else
llvm_unreachable("Unexpected integer comparison type");
C.CCMask &= CCValid;
@@ -3292,7 +3292,7 @@ struct Permute {
unsigned Operand;
unsigned char Bytes[SystemZ::VectorBytes];
};
-} // namespace
+}
static const Permute PermuteForms[] = {
// VMRHG
@@ -3574,7 +3574,7 @@ struct GeneralShuffle {
// The type of the shuffle result.
EVT VT;
};
-} // namespace
+}
// Add an extra undefined element to the shuffle.
void GeneralShuffle::addUndef() {
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
new file mode 100644
index 000000000000..df04c2a3460b
--- /dev/null
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
+
+tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(WebAssemblyCommonTableGen)
+
+add_llvm_target(WebAssemblyCodeGen
+ WebAssemblyFrameLowering.cpp
+ WebAssemblyInstrInfo.cpp
+ WebAssemblyISelDAGToDAG.cpp
+ WebAssemblyISelLowering.cpp
+ WebAssemblyMachineFunctionInfo.cpp
+ WebAssemblyRegisterInfo.cpp
+ WebAssemblySelectionDAGInfo.cpp
+ WebAssemblySubtarget.cpp
+ WebAssemblyTargetMachine.cpp
+ WebAssemblyTargetTransformInfo.cpp
+)
+
+add_dependencies(LLVMWebAssemblyCodeGen intrinsics_gen)
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt b/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..5394b67d2b87
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMWebAssemblyAsmPrinter
+ WebAssemblyInstPrinter.cpp
+ )
diff --git a/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt b/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..54df6d65570a
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt -------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyAsmPrinter
+parent = WebAssembly
+required_libraries = MC Support
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/InstPrinter/Makefile b/lib/Target/WebAssembly/InstPrinter/Makefile
new file mode 100644
index 000000000000..87534379f796
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/WebAssembly/AsmPrinter/Makefile ----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyAsmPrinter
+
+# Hack: we need to include 'main' wasm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
new file mode 100644
index 000000000000..fbb985aaafbb
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -0,0 +1,43 @@
+//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Print MCInst instructions to wasm format.
+///
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssembly.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
+ unsigned RegNo) const {
+ llvm_unreachable("TODO: implement printRegName");
+}
+
+void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
+ llvm_unreachable("TODO: implement printInst");
+}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
new file mode 100644
index 000000000000..70fcef214ce2
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -0,0 +1,38 @@
+// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This class prints an WebAssembly MCInst to wasm file syntax.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCOperand;
+class MCSubtargetInfo;
+
+class WebAssemblyInstPrinter : public MCInstPrinter {
+public:
+ WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI);
+
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/LLVMBuild.txt b/lib/Target/WebAssembly/LLVMBuild.txt
new file mode 100644
index 000000000000..04ef9c4e4bcf
--- /dev/null
+++ b/lib/Target/WebAssembly/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/WebAssembly/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = WebAssembly
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = WebAssemblyCodeGen
+parent = WebAssembly
+required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyDesc WebAssemblyInfo
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ccc0f0d7ccbc
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMWebAssemblyDesc
+ WebAssemblyMCAsmInfo.cpp
+ WebAssemblyMCTargetDesc.cpp
+)
diff --git a/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt b/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..ce7cb5dd4daf
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt ------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyDesc
+parent = WebAssembly
+required_libraries = MC Support WebAssemblyAsmPrinter WebAssemblyInfo
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/Makefile b/lib/Target/WebAssembly/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..11dcb4ff6075
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/WebAssembly/TargetDesc/Makefile ----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
new file mode 100644
index 000000000000..55346f71c6fc
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -0,0 +1,53 @@
+//===-- WebAssemblyMCAsmInfo.cpp - WebAssembly asm properties -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declarations of the WebAssemblyMCAsmInfo
+/// properties.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-mc-asm-info"
+
+WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+
+WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+ PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit();
+
+ // TODO: What should MaxInstLength be?
+
+ PrivateGlobalPrefix = "";
+ PrivateLabelPrefix = "";
+
+ UseDataRegionDirectives = true;
+
+ Data8bitsDirective = "\t.int8\t";
+ Data16bitsDirective = "\t.int16\t";
+ Data32bitsDirective = "\t.int32\t";
+ Data64bitsDirective = "\t.int64\t";
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveAlignmentIsInBytes = false;
+ LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
+
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+
+ SupportsDebugInformation = true;
+
+ // For now, WebAssembly does not support exceptions.
+ ExceptionsType = ExceptionHandling::None;
+
+ // TODO: UseIntegratedAssembler?
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
new file mode 100644
index 000000000000..d2b8fb7748fc
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -0,0 +1,32 @@
+//===-- WebAssemblyMCAsmInfo.h - WebAssembly asm properties -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the WebAssemblyMCAsmInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+class Triple;
+
+class WebAssemblyMCAsmInfo final : public MCAsmInfo {
+public:
+ explicit WebAssemblyMCAsmInfo(const Triple &T);
+ ~WebAssemblyMCAsmInfo() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
new file mode 100644
index 000000000000..d248556c62d7
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- WebAssemblyMCTargetDesc.cpp - WebAssembly Target Descriptions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides WebAssembly-specific target descriptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCTargetDesc.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssemblyMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-mc-target-desc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT) {
+ MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
+ return MAI;
+}
+
+static MCInstPrinter *
+createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
+ const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ if (SyntaxVariant == 0 || SyntaxVariant == 1)
+ return new WebAssemblyInstPrinter(MAI, MII, MRI);
+ return nullptr;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeWebAssemblyTargetMC() {
+ for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+ }
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
new file mode 100644
index 000000000000..24893daec7ea
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -0,0 +1,53 @@
+//==- WebAssemblyMCTargetDesc.h - WebAssembly Target Descriptions -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides WebAssembly-specific target descriptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+
+class formatted_raw_ostream;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCObjectWriter;
+class MCStreamer;
+class MCSubtargetInfo;
+class MCTargetStreamer;
+class StringRef;
+class Target;
+class Triple;
+class raw_ostream;
+
+extern Target TheWebAssemblyTarget32;
+extern Target TheWebAssemblyTarget64;
+
+MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+
+} // end namespace llvm
+
+// Defines symbolic names for WebAssembly registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_SUBTARGETINFO_ENUM
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile
new file mode 100644
index 000000000000..35d835c6506c
--- /dev/null
+++ b/lib/Target/WebAssembly/Makefile
@@ -0,0 +1,19 @@
+##===- lib/Target/WebAssembly/Makefile ---------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMWebAssemblyCodeGen
+TARGET = WebAssembly
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = WebAssemblyGenSubtargetInfo.inc WebAssemblyGenMCCodeEmitter.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
new file mode 100644
index 000000000000..7a71060a638f
--- /dev/null
+++ b/lib/Target/WebAssembly/README.txt
@@ -0,0 +1,15 @@
+//===-- README.txt - Notes for WebAssembly code gen -----------------------===//
+
+This WebAssembly backend is presently in a very early stage of development.
+The code should build and not break anything else, but don't expect a lot more
+at this point.
+
+For more information on WebAssembly itself, see the design documents:
+ * https://github.com/WebAssembly/design/blob/master/README.md
+
+The following documents contain some information on the planned semantics and
+binary encoding of WebAssembly itself:
+ * https://github.com/WebAssembly/design/blob/master/AstSemantics.md
+ * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt b/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..ef6e4d2b617a
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMWebAssemblyInfo
+ WebAssemblyTargetInfo.cpp
+ )
+
+add_dependencies(LLVMWebAssemblyInfo WebAssemblyCommonTableGen)
diff --git a/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt b/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..f4da9239bbe8
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt --------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyInfo
+parent = WebAssembly
+required_libraries = Support
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/TargetInfo/Makefile b/lib/Target/WebAssembly/TargetInfo/Makefile
new file mode 100644
index 000000000000..b021eb6d9455
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/WebAssembly/TargetInfo/Makefile ----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
new file mode 100644
index 000000000000..ddb1eb1d1892
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -0,0 +1,30 @@
+//===-- WebAssemblyTargetInfo.cpp - WebAssembly Target Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file registers the WebAssembly target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-target-info"
+
+Target llvm::TheWebAssemblyTarget32;
+Target llvm::TheWebAssemblyTarget64;
+
+extern "C" void LLVMInitializeWebAssemblyTargetInfo() {
+ RegisterTarget<Triple::wasm32> X(TheWebAssemblyTarget32, "wasm32",
+ "WebAssembly 32-bit");
+ RegisterTarget<Triple::wasm64> Y(TheWebAssemblyTarget64, "wasm64",
+ "WebAssembly 64-bit");
+}
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
new file mode 100644
index 000000000000..3ff19d46f437
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -0,0 +1,31 @@
+//===-- WebAssembly.h - Top-level interface for WebAssembly ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the entry points for global functions defined in
+/// the LLVM WebAssembly back-end.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
+
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+
+class WebAssemblyTargetMachine;
+class FunctionPass;
+
+FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td
new file mode 100644
index 000000000000..a123bf6f66b6
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssembly.td
@@ -0,0 +1,62 @@
+//- WebAssembly.td - Describe the WebAssembly Target Machine --*- tablegen -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the WebAssembly architecture, which is
+// also known as "wasm".
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false",
+ "Enable 128-bit SIMD">;
+
+//===----------------------------------------------------------------------===//
+// Architectures.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyInstrInfo.td"
+
+def WebAssemblyInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Processors supported.
+//===----------------------------------------------------------------------===//
+
+// Minimal Viable Product.
+def : ProcessorModel<"mvp", NoSchedModel, []>;
+
+// Latest and greatest experimental version of WebAssembly. Bugs included!
+def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def WebAssembly : Target {
+ let InstructionSet = WebAssemblyInstrInfo;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
new file mode 100644
index 000000000000..e4ca82e963c2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -0,0 +1,74 @@
+//===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of
+/// TargetFrameLowering class.
+///
+/// On WebAssembly, there aren't a lot of things to do here. There are no
+/// callee-saved registers to save, and no spill slots.
+///
+/// The stack grows downward.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyFrameLowering.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyInstrInfo.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-frame-info"
+
+// TODO: Implement a red zone?
+
+/// Return true if the specified function should have a dedicated frame pointer
+/// register.
+bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
+ llvm_unreachable("TODO: implement hasFP");
+}
+
+/// Under normal circumstances, when a frame pointer is not required, we reserve
+/// argument space for call sites in the function immediately on entry to the
+/// current function. This eliminates the need for add/sub sp brackets around
+/// call sites. Returns true if the call frame is included as part of the stack
+/// frame.
+bool WebAssemblyFrameLowering::hasReservedCallFrame(
+ const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr");
+}
+
+void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ llvm_unreachable("TODO: implement emitPrologue");
+}
+
+void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ llvm_unreachable("TODO: implement emitEpilogue");
+}
+
+void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan(
+ MachineFunction &MF, RegScavenger *RS) const {
+ llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan");
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
new file mode 100644
index 000000000000..0b112d02c0bf
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -0,0 +1,48 @@
+// WebAssemblyFrameLowering.h - TargetFrameLowering for WebAssembly -*- C++ -*-/
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This class implements WebAssembly-specific bits of
+/// TargetFrameLowering class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class WebAssemblyFrameLowering final : public TargetFrameLowering {
+public:
+ WebAssemblyFrameLowering()
+ : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16,
+ /*LocalAreaOffset=*/0,
+ /*TransientStackAlignment=*/16,
+ /*StackRealignable=*/true) {}
+
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+
+ /// These methods insert prolog and epilog code into the function.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
new file mode 100644
index 000000000000..518ef332a6c7
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -0,0 +1,73 @@
+//- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines an instruction selector for the WebAssembly target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h" // To access function attributes.
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-isel"
+
+//===--------------------------------------------------------------------===//
+/// WebAssembly-specific code to select WebAssembly machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
+ /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+ /// right decision when generating code for different targets.
+ const WebAssemblySubtarget *Subtarget;
+
+ bool ForCodeSize;
+
+public:
+ WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
+ }
+
+ const char *getPassName() const override {
+ return "WebAssembly Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ ForCodeSize =
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+ MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+ }
+
+ SDNode *Select(SDNode *Node) override;
+
+private:
+ // add select functions here...
+};
+} // end anonymous namespace
+
+SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
+ llvm_unreachable("TODO: implement Select");
+}
+
+/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
+/// for instruction scheduling.
+FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new WebAssemblyDAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
new file mode 100644
index 000000000000..4eec02efbd94
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -0,0 +1,63 @@
+//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyTargetLowering class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyISelLowering.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower"
+
+WebAssemblyTargetLowering::WebAssemblyTargetLowering(
+ const TargetMachine &TM, const WebAssemblySubtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
+ // WebAssembly does not produce floating-point exceptions on normal floating
+ // point operations.
+ setHasFloatingPointExceptions(false);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Lowering private implementation.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ return getDataSection();
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
new file mode 100644
index 000000000000..efd60a7bacd6
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -0,0 +1,49 @@
+//- WebAssemblyISelLowering.h - WebAssembly DAG Lowering Interface -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the interfaces that WebAssembly uses to lower LLVM
+/// code into a selection DAG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+namespace WebAssemblyISD {
+
+enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
+};
+
+} // end namespace WebAssemblyISD
+
+class WebAssemblySubtarget;
+class WebAssemblyTargetMachine;
+
+class WebAssemblyTargetLowering final : public TargetLowering {
+public:
+ WebAssemblyTargetLowering(const TargetMachine &TM,
+ const WebAssemblySubtarget &STI);
+
+private:
+ /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+ /// right decision when generating code for different targets.
+ const WebAssemblySubtarget *Subtarget;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
new file mode 100644
index 000000000000..35e88eec8573
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -0,0 +1,46 @@
+// WebAssemblyInstrAtomics.td-WebAssembly Atomic codegen support-*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly Atomic operand code-gen constructs.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: Implement atomic instructions.
+
+//===----------------------------------------------------------------------===//
+// Atomic fences
+//===----------------------------------------------------------------------===//
+
+// TODO: add atomic fences here...
+
+//===----------------------------------------------------------------------===//
+// Atomic loads
+//===----------------------------------------------------------------------===//
+
+// TODO: add atomic loads here...
+
+//===----------------------------------------------------------------------===//
+// Atomic stores
+//===----------------------------------------------------------------------===//
+
+// TODO: add atomic stores here...
+
+//===----------------------------------------------------------------------===//
+// Low-level exclusive operations
+//===----------------------------------------------------------------------===//
+
+// TODO: add exclusive operations here...
+
+// Load-exclusives.
+
+// Store-exclusives.
+
+// Store-release-exclusives.
+
+// And clear exclusive.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
new file mode 100644
index 000000000000..8bbf3e9ec87b
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -0,0 +1,28 @@
+// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+// WebAssembly Instruction Format
+class WebAssemblyInst<string cstr> : Instruction {
+ field bits<0> Inst; // Instruction encoding.
+ let Namespace = "WebAssembly";
+ let Pattern = [];
+ let Constraints = cstr;
+}
+
+// Normal instructions
+class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
+ : WebAssemblyInst<cstr> {
+ dag OutOperandList = oops;
+ dag InOperandList = iops;
+ let Pattern = pattern;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
new file mode 100644
index 000000000000..ea8937c8f9f2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -0,0 +1,28 @@
+//===-- WebAssemblyInstrInfo.cpp - WebAssembly Instruction Information ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// TargetInstrInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyInstrInfo.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-instr-info"
+
+WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
+ : RI(STI.getTargetTriple()) {}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
new file mode 100644
index 000000000000..1c4ae22f16d6
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -0,0 +1,37 @@
+//=- WebAssemblyInstrInfo.h - WebAssembly Instruction Information -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// TargetInstrInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
+
+#include "WebAssemblyRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+class WebAssemblySubtarget;
+
+class WebAssemblyInstrInfo final {
+ const WebAssemblyRegisterInfo RI;
+
+public:
+ explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI);
+
+ const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
new file mode 100644
index 000000000000..142eccfbcaa5
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -0,0 +1,46 @@
+// WebAssemblyInstrInfo.td-Describe the WebAssembly Instructions-*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+
+def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
+def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
+def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
+ AssemblerPredicate<"FeatureSIMD128", "simd128">;
+
+//===----------------------------------------------------------------------===//
+// WebAssembly-specific DAG Node Types.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly-specific Operands.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Instruction Format Definitions.
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Additional sets of instructions.
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyInstrAtomics.td"
+include "WebAssemblyInstrSIMD.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
new file mode 100644
index 000000000000..e25483ad3f7a
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -0,0 +1,15 @@
+// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly SIMD operand code-gen constructs.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: Implement SIMD instructions.
+// Note: use Requires<[HasSIMD128]>.
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..542d984b9006
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -0,0 +1,19 @@
+//=- WebAssemblyMachineFunctionInfo.cpp - WebAssembly Machine Function Info -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements WebAssembly-specific per-machine-function
+/// information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMachineFunctionInfo.h"
+using namespace llvm;
+
+WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
new file mode 100644
index 000000000000..fc5e910b09ef
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -0,0 +1,37 @@
+// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares WebAssembly-specific per-machine-function
+/// information.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
+
+#include "WebAssemblyRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private
+/// WebAssembly-specific information for each MachineFunction.
+class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
+ MachineFunction &MF;
+
+public:
+ explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ ~WebAssemblyFunctionInfo() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
new file mode 100644
index 000000000000..ad24c90af6a2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -0,0 +1,33 @@
+//===-- WebAssemblyRegisterInfo.cpp - WebAssembly Register Information ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// TargetRegisterInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyRegisterInfo.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyFrameLowering.h"
+#include "WebAssemblyInstrInfo.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-info"
+
+WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) : TT(TT) {}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
new file mode 100644
index 000000000000..55300287a51e
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -0,0 +1,35 @@
+// WebAssemblyRegisterInfo.h - WebAssembly Register Information Impl -*- C++ -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// WebAssemblyRegisterInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
+
+namespace llvm {
+
+class MachineFunction;
+class RegScavenger;
+class TargetRegisterClass;
+class Triple;
+
+class WebAssemblyRegisterInfo final {
+ const Triple &TT;
+
+public:
+ explicit WebAssemblyRegisterInfo(const Triple &TT);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
new file mode 100644
index 000000000000..7b3d636a2605
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -0,0 +1,28 @@
+//WebAssemblyRegisterInfo.td-Describe the WebAssembly Registers -*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the WebAssembly register classes and some nominal
+// physical registers.
+//
+//===----------------------------------------------------------------------===//
+
+class WebAssemblyReg<string n> : Register<n> {
+ let Namespace = "WebAssembly";
+}
+
+class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList>
+ : RegisterClass<"WebAssembly", regTypes, alignment, regList>;
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
new file mode 100644
index 000000000000..cfd1bafff236
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- WebAssemblySelectionDAGInfo.cpp - WebAssembly SelectionDAG Info ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblySelectionDAGInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-selectiondag-info"
+
+WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
+
+WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
new file mode 100644
index 000000000000..03e8d393558d
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//=- WebAssemblySelectionDAGInfo.h - WebAssembly SelectionDAG Info -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly subclass for
+/// TargetSelectionDAGInfo.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo {
+public:
+ explicit WebAssemblySelectionDAGInfo(const DataLayout *DL);
+ ~WebAssemblySelectionDAGInfo() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
new file mode 100644
index 000000000000..addea8e3cc36
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -0,0 +1,48 @@
+//===-- WebAssemblySubtarget.cpp - WebAssembly Subtarget Information ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssembly-specific subclass of
+/// TargetSubtarget.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyInstrInfo.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-subtarget"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+WebAssemblySubtarget &
+WebAssemblySubtarget::initializeSubtargetDependencies(StringRef FS) {
+ // Determine default and user-specified characteristics
+
+ if (CPUString.empty())
+ CPUString = "generic";
+
+ ParseSubtargetFeatures(CPUString, FS);
+ return *this;
+}
+
+WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
+ const std::string &CPU,
+ const std::string &FS,
+ const TargetMachine &TM)
+ : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
+ CPUString(CPU), TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS)),
+ TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
+
+bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h
new file mode 100644
index 000000000000..6f1761940930
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -0,0 +1,79 @@
+//=- WebAssemblySubtarget.h - Define Subtarget for the WebAssembly -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the WebAssembly-specific subclass of
+/// TargetSubtarget.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H
+
+#include "WebAssemblyFrameLowering.h"
+#include "WebAssemblyISelLowering.h"
+#include "WebAssemblyInstrInfo.h"
+#include "WebAssemblySelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+namespace llvm {
+
+class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
+ bool HasSIMD128;
+
+ /// String name of used CPU.
+ std::string CPUString;
+
+ /// What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ WebAssemblyFrameLowering FrameLowering;
+ WebAssemblyInstrInfo InstrInfo;
+ WebAssemblySelectionDAGInfo TSInfo;
+ WebAssemblyTargetLowering TLInfo;
+
+ /// Initializes using CPUString and the passed in feature string so that we
+ /// can use initializer lists for subtarget initialization.
+ WebAssemblySubtarget &initializeSubtargetDependencies(StringRef FS);
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ WebAssemblySubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM);
+
+ const WebAssemblySelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const WebAssemblyFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const WebAssemblyTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const Triple &getTargetTriple() const { return TargetTriple; }
+ bool enableMachineScheduler() const override;
+ bool useAA() const override { return true; }
+
+ // Predicates used by WebAssemblyInstrInfo.td.
+ bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
+ bool hasSIMD128() const { return HasSIMD128; }
+
+ /// Parses features string setting specified subtarget options. Definition of
+ /// function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
new file mode 100644
index 000000000000..6f93248bd13c
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -0,0 +1,173 @@
+//===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific subclass of TargetMachine.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm"
+
+extern "C" void LLVMInitializeWebAssemblyTarget() {
+ // Register the target.
+ RegisterTargetMachine<WebAssemblyTargetMachine> X(TheWebAssemblyTarget32);
+ RegisterTargetMachine<WebAssemblyTargetMachine> Y(TheWebAssemblyTarget64);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Lowering public interface.
+//===----------------------------------------------------------------------===//
+
+/// Create an WebAssembly architecture model.
+///
+WebAssemblyTargetMachine::WebAssemblyTargetMachine(
+ const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT.isArch64Bit()
+ ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128"
+ : "e-p:32:32-i64:64-v128:8:128-n32:64-S128",
+ TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+ initAsmInfo();
+
+ // We need a reducible CFG, so disable some optimizations which tend to
+ // introduce irreducibility.
+ setRequiresStructuredCFG(true);
+}
+
+WebAssemblyTargetMachine::~WebAssemblyTargetMachine() {}
+
+const WebAssemblySubtarget *
+WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
+
+namespace {
+/// WebAssembly Code Generator Pass Configuration Options.
+class WebAssemblyPassConfig final : public TargetPassConfig {
+public:
+ WebAssemblyPassConfig(WebAssemblyTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const {
+ return getTM<WebAssemblyTargetMachine>();
+ }
+
+ FunctionPass *createTargetRegisterAllocator(bool) override;
+ void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+ void addIRPasses() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addILPOpts() override;
+ void addPreRegAlloc() override;
+ void addRegAllocPasses(bool Optimized);
+ void addPostRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+} // end anonymous namespace
+
+TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
+ });
+}
+
+TargetPassConfig *
+WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new WebAssemblyPassConfig(this, PM);
+}
+
+FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
+ return nullptr; // No reg alloc
+}
+
+void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ assert(!RegAllocPass && "WebAssembly uses no regalloc!");
+ addRegAllocPasses(false);
+}
+
+void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ assert(!RegAllocPass && "WebAssembly uses no regalloc!");
+ addRegAllocPasses(true);
+}
+
+//===----------------------------------------------------------------------===//
+// The following functions are called from lib/CodeGen/Passes.cpp to modify
+// the CodeGen pass sequence.
+//===----------------------------------------------------------------------===//
+
+void WebAssemblyPassConfig::addIRPasses() {
+ // FIXME: the default for this option is currently POSIX, whereas
+ // WebAssembly's MVP should default to Single.
+ if (TM->Options.ThreadModel == ThreadModel::Single)
+ addPass(createLowerAtomicPass());
+ else
+ // Expand some atomic operations. WebAssemblyTargetLowering has hooks which
+ // control specifically what gets lowered.
+ addPass(createAtomicExpandPass(TM));
+
+ TargetPassConfig::addIRPasses();
+}
+
+bool WebAssemblyPassConfig::addPreISel() { return false; }
+
+bool WebAssemblyPassConfig::addInstSelector() {
+ addPass(
+ createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
+ return false;
+}
+
+bool WebAssemblyPassConfig::addILPOpts() { return true; }
+
+void WebAssemblyPassConfig::addPreRegAlloc() {}
+
+void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {}
+
+void WebAssemblyPassConfig::addPostRegAlloc() {}
+
+void WebAssemblyPassConfig::addPreSched2() {}
+
+void WebAssemblyPassConfig::addPreEmitPass() {}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
new file mode 100644
index 000000000000..3226edcdc614
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -0,0 +1,51 @@
+// WebAssemblyTargetMachine.h - Define TargetMachine for WebAssembly -*- C++ -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the WebAssembly-specific subclass of
+/// TargetMachine.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H
+
+#include "WebAssemblySubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class WebAssemblyTargetMachine final : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ mutable StringMap<std::unique_ptr<WebAssemblySubtarget>> SubtargetMap;
+
+public:
+ WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ ~WebAssemblyTargetMachine() override;
+ const WebAssemblySubtarget *
+ getSubtargetImpl(const Function &F) const override;
+
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ /// \brief Get the TargetIRAnalysis for this target.
+ TargetIRAnalysis getTargetIRAnalysis() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
new file mode 100644
index 000000000000..ee78b945ada2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -0,0 +1,67 @@
+//===-- WebAssemblyTargetObjectFile.h - WebAssembly Object Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the WebAssembly-specific subclass of
+/// TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+class GlobalVariable;
+
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile {
+public:
+ WebAssemblyTargetObjectFile() {
+ TextSection = nullptr;
+ DataSection = nullptr;
+ BSSSection = nullptr;
+ ReadOnlySection = nullptr;
+
+ StaticCtorSection = nullptr;
+ StaticDtorSection = nullptr;
+ LSDASection = nullptr;
+ EHFrameSection = nullptr;
+ DwarfAbbrevSection = nullptr;
+ DwarfInfoSection = nullptr;
+ DwarfLineSection = nullptr;
+ DwarfFrameSection = nullptr;
+ DwarfPubTypesSection = nullptr;
+ DwarfDebugInlineSection = nullptr;
+ DwarfStrSection = nullptr;
+ DwarfLocSection = nullptr;
+ DwarfARangesSection = nullptr;
+ DwarfRangesSection = nullptr;
+ }
+
+ MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override {
+ return ReadOnlySection;
+ }
+
+ MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override {
+ return DataSection;
+ }
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
new file mode 100644
index 000000000000..fa88ed526df2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -0,0 +1,28 @@
+//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific TargetTransformInfo
+/// implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasmtti"
+
+TargetTransformInfo::PopcntSupportKind
+WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ // TODO: Make Math.popcount32 happen in WebAssembly.
+ return TTI::PSK_Software;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
new file mode 100644
index 000000000000..08bd88c06985
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -0,0 +1,87 @@
+//==- WebAssemblyTargetTransformInfo.h - WebAssembly-specific TTI -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file a TargetTransformInfo::Concept conforming object specific
+/// to the WebAssembly target machine.
+///
+/// It uses the target's detailed information to provide more precise answers to
+/// certain TTI queries, while letting the target independent and default TTI
+/// implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H
+
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include <algorithm>
+
+namespace llvm {
+
+class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
+ typedef BasicTTIImplBase<WebAssemblyTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const WebAssemblyTargetMachine *TM;
+ const WebAssemblySubtarget *ST;
+ const WebAssemblyTargetLowering *TLI;
+
+ const WebAssemblySubtarget *getST() const { return ST; }
+ const WebAssemblyTargetLowering *getTLI() const { return TLI; }
+
+public:
+ WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
+ : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
+ TLI(Arg.TLI) {}
+ WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
+ ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
+ WebAssemblyTTIImpl &operator=(const WebAssemblyTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ TM = RHS.TM;
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ WebAssemblyTTIImpl &operator=(WebAssemblyTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ TM = std::move(RHS.TM);
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ // TODO: Implement more Scalar TTI for WebAssembly
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ // TODO: Implement Vector TTI for WebAssembly
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 6ba897b8636d..9eee4a0f3d82 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1080,4 +1080,4 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
return new X86AsmInstrumentation(STI);
}
-} // namespace llvm
+} // End llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 341fc81c0480..19ebcc44f61e 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -61,6 +61,6 @@ protected:
unsigned InitialFrameReg;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index b3066efbab24..7ec02408ffa4 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -238,18 +238,34 @@ struct X86Operand : public MCParsedAsmOperand {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
+ bool isMemVX32X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
+ }
bool isMemVY32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
+ bool isMemVY32X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
+ }
bool isMemVX64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
+ bool isMemVX64X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
+ }
bool isMemVY64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
+ bool isMemVY64X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
+ }
bool isMemVZ32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 5b53fbef3f71..cfc3ee2fb08f 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -69,7 +69,7 @@ namespace X86 {
extern Target TheX86_32Target, TheX86_64Target;
-} // namespace llvm
+}
static bool translateInstruction(MCInst &target,
InternalInstruction &source,
@@ -551,9 +551,15 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if(immediate & 0x80)
+ if (immediate & 0x80)
immediate |= ~(0xffull);
break;
+ case TYPE_REL16:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ if (immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
case TYPE_REL32:
case TYPE_REL64:
isBranch = true;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index d990bf3484bf..f73fa75f888e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1165,35 +1165,30 @@ static int readSIB(struct InternalInstruction* insn) {
return -1;
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ // FIXME: The fifth bit (bit index 4) is only to be used for instructions
+ // that understand VSIB indexing. ORing the bit in here is mildy dangerous
+ // because performing math on an 'enum SIBIndex' can produce garbage.
+ // Excluding the "none" value, it should cover 6 spaces of register names:
+ // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI
+ // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX
+ // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX
+ // - 32 possibilities for each of XMM, YMM, ZMM registers
+ // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode,
+ // summing in a fully decoded index between 0 and 31 can end up with a value
+ // that looks like something in the low half of the XMM range.
+ // translateRMMemory() tries to reverse the damage, with only partial success,
+ // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt"
if (insn->vectorExtensionType == TYPE_EVEX)
index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
- switch (index) {
- case 0x4:
+ if (index == 0x4) {
insn->sibIndex = SIB_INDEX_NONE;
- break;
- default:
+ } else {
insn->sibIndex = (SIBIndex)(sibIndexBase + index);
- if (insn->sibIndex == SIB_INDEX_sib ||
- insn->sibIndex == SIB_INDEX_sib64)
- insn->sibIndex = SIB_INDEX_NONE;
- break;
}
- switch (scaleFromSIB(insn->sib)) {
- case 0:
- insn->sibScale = 1;
- break;
- case 1:
- insn->sibScale = 2;
- break;
- case 2:
- insn->sibScale = 4;
- break;
- case 3:
- insn->sibScale = 8;
- break;
- }
+ insn->sibScale = 1 << scaleFromSIB(insn->sib);
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index ac484f317276..62b6b73e7864 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -140,6 +140,6 @@ public:
private:
bool HasCustomInstComment;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 2bee518fed68..6e371da37290 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -159,6 +159,6 @@ public:
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 2d85f84d6669..3e0dc1424609 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -29,13 +29,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// Option to allow disabling arithmetic relaxation to workaround PR9807, which
-// is useful when running bitwise comparison experiments on Darwin. We should be
-// able to remove this once PR9807 is resolved.
-static cl::opt<bool>
-MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
- cl::desc("Disable relaxation of arithmetic instruction for X86"));
-
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default:
@@ -243,29 +236,18 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
return true;
- if (MCDisableArithRelaxation)
- return false;
-
// Check if this instruction is ever relaxable.
if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
- // Check if it has an expression and is not RIP relative.
- bool hasExp = false;
- bool hasRIP = false;
- for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
- const MCOperand &Op = Inst.getOperand(i);
- if (Op.isExpr())
- hasExp = true;
-
- if (Op.isReg() && Op.getReg() == X86::RIP)
- hasRIP = true;
- }
+ // Check if the relaxable operand has an expression. For the current set of
+ // relaxable instructions, the relaxable operand is always the last operand.
+ unsigned RelaxableOp = Inst.getNumOperands() - 1;
+ if (Inst.getOperand(RelaxableOp).isExpr())
+ return true;
- // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
- // how we do relaxations?
- return hasExp && !hasRIP;
+ return false;
}
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -426,7 +408,7 @@ namespace CU {
UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
};
-} // namespace CU
+} // end CU namespace
class DarwinX86AsmBackend : public X86AsmBackend {
const MCRegisterInfo &MRI;
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 69e9c7b4a83e..f0d00b0c1bc3 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,7 +41,7 @@ namespace X86 {
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
-} // namespace X86
+} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -213,11 +213,7 @@ namespace X86II {
/// the offset from beginning of section.
///
/// This is the TLS offset for the COFF/Windows TLS mechanism.
- MO_SECREL,
-
- /// MO_NOPREFIX - On a symbol operand this indicates that the symbol should
- /// not be mangled with a prefix.
- MO_NOPREFIX,
+ MO_SECREL
};
enum : uint64_t {
@@ -762,8 +758,8 @@ namespace X86II {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
-} // namespace X86II
+}
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 512afebf482e..a33468dc4769 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -28,7 +28,7 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
};
-} // namespace
+}
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 7c09e5d59580..89f394582631 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -26,14 +26,17 @@ public:
X86_64ELFRelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
const MCExpr *createExprForRelocation(RelocationRef Rel) override {
- uint64_t RelType; Rel.getType(RelType);
- symbol_iterator SymI = Rel.getSymbol();
+ uint64_t RelType = Rel.getType();
+ elf_symbol_iterator SymI = Rel.getSymbol();
+
+ ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+ if (std::error_code EC = SymNameOrErr.getError())
+ report_fatal_error(EC.message());
+ StringRef SymName = *SymNameOrErr;
- StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
uint64_t SymSize = SymI->getSize();
- auto *Obj = cast<ELFObjectFileBase>(Rel.getObjectFile());
- int64_t Addend = *Obj->getRelocationAddend(Rel.getRawDataRefImpl());
+ int64_t Addend = *ELFRelocationRef(Rel).getAddend();
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index a523a32b2a2d..4899900dcef9 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-} // namespace X86
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 020803b57f76..6221baba1793 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -62,7 +62,7 @@ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
/// do not need to go through TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
StringRef FS);
-} // namespace X86_MC
+}
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -98,7 +98,7 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
/// Construct X86-64 ELF relocation info.
MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
-} // namespace llvm
+} // End llvm namespace
// Defines symbolic names for X86 registers. This defines a mapping from
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index a5aadd6a385e..c9479b62f7b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -25,12 +25,15 @@ public:
X86_64MachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
const MCExpr *createExprForRelocation(RelocationRef Rel) override {
- const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObjectFile());
+ const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObject());
- uint64_t RelType; Rel.getType(RelType);
+ uint64_t RelType = Rel.getType();
symbol_iterator SymI = Rel.getSymbol();
- StringRef SymName; SymI->getName(SymName);
+ ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+ if (std::error_code EC = SymNameOrErr.getError())
+ report_fatal_error(EC.message());
+ StringRef SymName = *SymNameOrErr;
uint64_t SymAddr; SymI->getAddress(SymAddr);
any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
@@ -89,10 +92,11 @@ public:
symbol_iterator RSymI = Rel.getSymbol();
uint64_t RSymAddr;
RSymI->getAddress(RSymAddr);
- StringRef RSymName;
- RSymI->getName(RSymName);
+ ErrorOr<StringRef> RSymName = RSymI->getName();
+ if (std::error_code EC = RSymName.getError())
+ report_fatal_error(EC.message());
- MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName);
+ MCSymbol *RSym = Ctx.getOrCreateSymbol(*RSymName);
if (!RSym->isVariable())
RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx));
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 773fbf41a7b1..9e801fc8f191 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -69,7 +69,7 @@ public:
FixedValue);
}
};
-} // namespace
+}
static bool isFixupKindRIPRel(unsigned Kind) {
return Kind == X86::reloc_riprel_4byte ||
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 7d262cdbf51d..bd1bc9943b6d 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
bool IsCrossSection,
const MCAsmBackend &MAB) const override;
};
-} // namespace
+}
X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index dc6dd66bcd85..92f42b68ae51 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -46,7 +46,7 @@ void X86WinCOFFStreamer::FinishImpl() {
MCWinCOFFStreamer::FinishImpl();
}
-} // namespace
+}
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS,
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1e7d94287c4a..ef3318ba7580 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -431,4 +431,4 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-} // namespace llvm
+} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 0139297fc72d..14b69434806e 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,6 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-} // namespace llvm
+} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 80f457984951..8403ae6101df 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -80,6 +80,6 @@ FunctionPass *createX86WinEHStatePass();
/// must run after prologue/epilogue insertion and before lowering
/// the MachineInstr to MC.
FunctionPass *createX86ExpandPseudoPass();
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 205140144ab5..ba33248d2039 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -581,34 +581,6 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
return AsmPrinter::GetCPISymbol(CPID);
}
-void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
- SmallString<128> Directive;
- raw_svector_ostream OS(Directive);
- StringRef Name = Sym->getName();
- const Triple &TT = TM.getTargetTriple();
-
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << " /EXPORT:";
- else
- OS << " -export:";
-
- if ((TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) &&
- (Name[0] == getDataLayout().getGlobalPrefix()))
- Name = Name.drop_front();
-
- OS << Name;
-
- if (IsData) {
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << ",DATA";
- else
- OS << ",data";
- }
-
- OS.flush();
- OutStreamer->EmitBytes(Directive);
-}
-
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
@@ -692,39 +664,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
if (TT.isOSBinFormatCOFF()) {
- // Necessary for dllexport support
- std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
- for (const auto &Function : M)
- if (Function.hasDLLExportStorageClass() && !Function.isDeclaration())
- DLLExportedFns.push_back(getSymbol(&Function));
+ std::string Flags;
+ raw_string_ostream FlagsOS(Flags);
+ for (const auto &Function : M)
+ TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function, *Mang);
for (const auto &Global : M.globals())
- if (Global.hasDLLExportStorageClass() && !Global.isDeclaration())
- DLLExportedGlobals.push_back(getSymbol(&Global));
-
- for (const auto &Alias : M.aliases()) {
- if (!Alias.hasDLLExportStorageClass())
- continue;
-
- if (Alias.getType()->getElementType()->isFunctionTy())
- DLLExportedFns.push_back(getSymbol(&Alias));
- else
- DLLExportedGlobals.push_back(getSymbol(&Alias));
- }
+ TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global, *Mang);
+ for (const auto &Alias : M.aliases())
+ TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias, *Mang);
- // Output linker support code for dllexported globals on windows.
- if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
- const TargetLoweringObjectFileCOFF &TLOFCOFF =
- static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+ FlagsOS.flush();
+ // Output collected flags.
+ if (!Flags.empty()) {
OutStreamer->SwitchSection(TLOFCOFF.getDrectveSection());
-
- for (auto & Symbol : DLLExportedGlobals)
- GenerateExportDirective(Symbol, /*IsData=*/true);
- for (auto & Symbol : DLLExportedFns)
- GenerateExportDirective(Symbol, /*IsData=*/false);
+ OutStreamer->EmitBytes(Flags);
}
+
+ SM.serializeToStackMapSection();
}
if (TT.isOSBinFormatELF()) {
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index acba21169c9c..7f5d127c68d5 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -30,8 +30,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
StackMaps SM;
FaultMaps FM;
- void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
-
// This utility class tracks the length of a stackmap instruction's 'shadow'.
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
// invariants (i.e. no other stackmaps, patchpoints, or control flow within
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 6d6831b18b0a..031ba4ba9e66 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -78,7 +78,7 @@ private:
typedef DenseMap<MachineInstr *, CallContext> ContextMap;
bool isLegal(MachineFunction &MF);
-
+
bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -90,6 +90,13 @@ private:
MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
unsigned Reg);
+ enum InstClassification { Convert, Skip, Exit };
+
+ InstClassification classifyInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const X86RegisterInfo &RegInfo,
+ DenseSet<unsigned int> &UsedRegs);
+
const char *getPassName() const override { return "X86 Optimize Call Frame"; }
const TargetInstrInfo *TII;
@@ -99,13 +106,13 @@ private:
};
char X86CallFrameOptimization::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86CallFrameOptimization() {
return new X86CallFrameOptimization();
}
-// This checks whether the transformation is legal.
+// This checks whether the transformation is legal.
// Also returns false in cases where it's potentially legal, but
// we don't even want to try.
bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
@@ -170,9 +177,8 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
if (!OptForSize)
return false;
-
unsigned StackAlign = TFL->getStackAlignment();
-
+
int64_t Advantage = 0;
for (auto CC : CallSeqMap) {
// Call sites where no parameters are passed on the stack
@@ -205,7 +211,6 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
return (Advantage >= 0);
}
-
bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TFL = MF.getSubtarget().getFrameLowering();
@@ -237,6 +242,64 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+X86CallFrameOptimization::InstClassification
+X86CallFrameOptimization::classifyInstruction(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
+ if (MI == MBB.end())
+ return Exit;
+
+ // The instructions we actually care about are movs onto the stack
+ int Opcode = MI->getOpcode();
+ if (Opcode == X86::MOV32mi || Opcode == X86::MOV32mr)
+ return Convert;
+
+ // Not all calling conventions have only stack MOVs between the stack
+ // adjust and the call.
+
+ // We want to tolerate other instructions, to cover more cases.
+ // In particular:
+ // a) PCrel calls, where we expect an additional COPY of the basereg.
+ // b) Passing frame-index addresses.
+ // c) Calling conventions that have inreg parameters. These generate
+ // both copies and movs into registers.
+ // To avoid creating lots of special cases, allow any instruction
+ // that does not write into memory, does not def or use the stack
+ // pointer, and does not def any register that was used by a preceding
+ // push.
+ // (Reading from memory is allowed, even if referenced through a
+ // frame index, since these will get adjusted properly in PEI)
+
+ // The reason for the last condition is that the pushes can't replace
+ // the movs in place, because the order must be reversed.
+ // So if we have a MOV32mr that uses EDX, then an instruction that defs
+ // EDX, and then the call, after the transformation the push will use
+ // the modified version of EDX, and not the original one.
+ // Since we are still in SSA form at this point, we only need to
+ // make sure we don't clobber any *physical* registers that were
+ // used by an earlier mov that will become a push.
+
+ if (MI->isCall() || MI->mayStore())
+ return Exit;
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned int Reg = MO.getReg();
+ if (!RegInfo.isPhysicalRegister(Reg))
+ continue;
+ if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
+ return Exit;
+ if (MO.isDef()) {
+ for (unsigned int U : UsedRegs)
+ if (RegInfo.regsOverlap(Reg, U))
+ return Exit;
+ }
+ }
+
+ return Skip;
+}
+
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
@@ -254,8 +317,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
- unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
-
+ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
+
// A zero adjustment means no stack parameters
if (!MaxAdjust) {
Context.NoStackParams = true;
@@ -284,11 +347,17 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
if (MaxAdjust > 4)
Context.MovVector.resize(MaxAdjust, nullptr);
- do {
- int Opcode = I->getOpcode();
- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
- break;
+ InstClassification Classification;
+ DenseSet<unsigned int> UsedRegs;
+ while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
+ Exit) {
+ if (Classification == Skip) {
+ ++I;
+ continue;
+ }
+
+ // We know the instruction is a MOV32mi/MOV32mr.
// We only want movs of the form:
// movl imm/r32, k(%esp)
// If we run into something else, bail.
@@ -323,24 +392,20 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
return;
Context.MovVector[StackDisp] = I;
- ++I;
- } while (I != MBB.end());
-
- // We now expect the end of the sequence - a call and a stack adjust.
- if (I == MBB.end())
- return;
+ for (const MachineOperand &MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ unsigned int Reg = MO.getReg();
+ if (RegInfo.isPhysicalRegister(Reg))
+ UsedRegs.insert(Reg);
+ }
- // For PCrel calls, we expect an additional COPY of the basereg.
- // If we find one, skip it.
- if (I->isCopy()) {
- if (I->getOperand(1).getReg() ==
- MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
- ++I;
- else
- return;
+ ++I;
}
- if (!I->isCall())
+ // We now expect the end of the sequence. If we stopped early,
+ // or reached the end of the block without finding a call, bail.
+ if (I == MBB.end() || !I->isCall())
return;
Context.Call = I;
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index a377eb6051ae..0eb2494f1d63 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -42,7 +42,7 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3dc75d76cee3..02645460b6a2 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -2821,7 +2822,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool &IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- const char *SymName = CLI.SymName;
+ MCSymbol *Symbol = CLI.Symbol;
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CC);
@@ -3117,8 +3118,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
}
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
- if (SymName)
- MIB.addExternalSymbol(SymName, OpFlags);
+ if (Symbol)
+ MIB.addSym(Symbol, OpFlags);
else
MIB.addGlobalAddress(GV, 0, OpFlags);
}
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 8305a0454c80..5eb4faeedff4 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -91,7 +91,7 @@ private:
const X86InstrInfo *TII; // Machine instruction info.
};
char FixupLEAPass::ID = 0;
-} // namespace
+}
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6f1d8e523732..40b9c8a863a3 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -279,7 +279,7 @@ namespace {
void setKillFlags(MachineBasicBlock &MBB) const;
};
char FPS::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
@@ -544,7 +544,7 @@ namespace {
return V < TE.from;
}
};
-} // namespace
+}
#ifndef NDEBUG
static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
@@ -1530,7 +1530,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
if (Op.isKill())
moveToTop(FPReg, Inst);
else
- duplicateToTop(FPReg, FPReg, Inst);
+ duplicateToTop(FPReg, ScratchFPReg, Inst);
// Emit the call. This will pop the operand.
BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 2858e86cd0e0..c274c8820149 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -153,6 +153,6 @@ private:
bool InEpilogue) const;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index f6785e161188..6b23e62a2d35 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -67,19 +67,19 @@ namespace {
const Constant *CP;
const BlockAddress *BlockAddr;
const char *ES;
+ MCSymbol *MCSym;
int JT;
unsigned Align; // CP alignment.
unsigned char SymbolFlags; // X86II::MO_*
X86ISelAddressMode()
- : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
- Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
- JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {
- }
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
+ MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
bool hasSymbolicDisplacement() const {
return GV != nullptr || CP != nullptr || ES != nullptr ||
- JT != -1 || BlockAddr != nullptr;
+ MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
}
bool hasBaseOrIndexReg() const {
@@ -134,11 +134,16 @@ namespace {
dbgs() << ES;
else
dbgs() << "nul";
+ dbgs() << " MCSym ";
+ if (MCSym)
+ dbgs() << MCSym;
+ else
+ dbgs() << "nul";
dbgs() << " JT" << JT << " Align" << Align << '\n';
}
#endif
};
-} // namespace
+}
namespace {
//===--------------------------------------------------------------------===//
@@ -258,6 +263,10 @@ namespace {
else if (AM.ES) {
assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+ } else if (AM.MCSym) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
+ assert(AM.SymbolFlags == 0 && "oo");
+ Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
} else if (AM.JT != -1) {
assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
@@ -310,7 +319,7 @@ namespace {
return true;
}
};
-} // namespace
+}
bool
@@ -604,7 +613,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
X86ISelAddressMode &AM) {
// Cannot combine ExternalSymbol displacements with integer offsets.
- if (Offset != 0 && AM.ES)
+ if (Offset != 0 && (AM.ES || AM.MCSym))
return true;
int64_t Val = AM.Disp + Offset;
CodeModel::Model M = TM.getCodeModel();
@@ -690,6 +699,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
+ } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
+ AM.MCSym = S->getMCSymbol();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
@@ -728,6 +739,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
+ } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
+ AM.MCSym = S->getMCSymbol();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
@@ -1001,7 +1014,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// FIXME: JumpTable and ExternalSymbol address currently don't like
// displacements. It isn't very important, but this should be fixed for
// consistency.
- if (!AM.ES && AM.JT != -1) return true;
+ if (!(AM.ES || AM.MCSym) && AM.JT != -1)
+ return true;
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
@@ -1013,13 +1027,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
default: break;
case ISD::FRAME_ALLOC_RECOVER: {
if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
- if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0)))
- if (ESNode->getOpcode() == ISD::TargetExternalSymbol) {
- // Use the symbol and don't prefix it.
- AM.ES = ESNode->getSymbol();
- AM.SymbolFlags = X86II::MO_NOPREFIX;
- return false;
- }
+ if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
+ // Use the symbol and don't prefix it.
+ AM.MCSym = ESNode->getMCSymbol();
+ return false;
+ }
break;
}
case ISD::Constant: {
@@ -1473,6 +1485,7 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
N->getOpcode() != ISD::TargetJumpTable &&
N->getOpcode() != ISD::TargetGlobalAddress &&
N->getOpcode() != ISD::TargetExternalSymbol &&
+ N->getOpcode() != ISD::MCSymbol &&
N->getOpcode() != ISD::TargetBlockAddress)
return false;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ce1ca20ee81a..b16bd18aefaa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1111,7 +1111,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
- if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
@@ -6259,42 +6259,6 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return true;
}
-/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane.
-///
-/// This checks a shuffle mask to see if it is performing the same
-/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies
-/// that it is also not lane-crossing. It may however involve a blend from the
-/// same lane of a second vector.
-///
-/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
-/// non-trivial to compute in the face of undef lanes. The representation is
-/// *not* suitable for use with existing 256-bit shuffles as it will contain
-/// entries from both V1 and V2 inputs to the wider mask.
-static bool
-is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
- SmallVectorImpl<int> &RepeatedMask) {
- int LaneSize = 256 / VT.getScalarSizeInBits();
- RepeatedMask.resize(LaneSize, -1);
- int Size = Mask.size();
- for (int i = 0; i < Size; ++i) {
- if (Mask[i] < 0)
- continue;
- if ((Mask[i] % Size) / LaneSize != i / LaneSize)
- // This entry crosses lanes, so there is no way to model this shuffle.
- return false;
-
- // Ok, handle the in-lane shuffles by detecting if and when they repeat.
- if (RepeatedMask[i % LaneSize] == -1)
- // This is the first non-undef entry in this slot of a 256-bit lane.
- RepeatedMask[i % LaneSize] =
- Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
- else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
- // Found a mismatch with the repeated mask.
- return false;
- }
- return true;
-}
-
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
///
@@ -6354,22 +6318,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
return DAG.getConstant(Imm, DL, MVT::i8);
}
-/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask.
-///
-/// This helper function produces an 8-bit shuffle immediate corresponding to
-/// the ubiquitous shuffle encoding scheme used in x86 instructions for
-/// shuffling 8 lanes.
-static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
- SelectionDAG &DAG) {
- assert(Mask.size() <= 8 &&
- "Up to 8 elts may be in Imm8 1-bit lane shuffle mask");
- unsigned Imm = 0;
- for (unsigned i = 0; i < Mask.size(); ++i)
- if (Mask[i] >= 0)
- Imm |= (Mask[i] % 2) << i;
- return DAG.getConstant(Imm, DL, MVT::i8);
-}
-
/// \brief Try to emit a blend instruction for a shuffle using bit math.
///
/// This is used as a fallback approach when first class blend instructions are
@@ -9385,30 +9333,6 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
DAG.getConstant(PermMask, DL, MVT::i8));
}
-/// \brief Handle lowering 4-lane 128-bit shuffles.
-static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> WidenedMask,
- SelectionDAG &DAG) {
-
- assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
- // form a 128-bit permutation.
- // convert the 64-bit shuffle mask selection values into 128-bit selection
- // bits defined by a vshuf64x2 instruction's immediate control byte.
- unsigned PermMask = 0, Imm = 0;
-
- for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
- if(WidenedMask[i] == SM_SentinelZero)
- return SDValue();
-
- // use first element in place of undef musk
- Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
- PermMask |= (Imm % 4) << (i * 2);
- }
-
- return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
- DAG.getConstant(PermMask, DL, MVT::i8));
-}
-
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
@@ -10144,105 +10068,86 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
-static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
-
- assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN");
- // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right)
- int AlignVal = -1;
- for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) {
- if (Mask[i] < 0)
- continue;
- if (Mask[i] < i)
- return SDValue();
- if (AlignVal == -1)
- AlignVal = Mask[i] - i;
- else if (Mask[i] - i != AlignVal)
- return SDValue();
- }
- // Vector source operands should be swapped
- return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1,
- DAG.getConstant(AlignVal, DL, MVT::i8));
-}
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
-static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
- assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+}
- MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
- MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
+static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- SmallVector<SDValue, 32> VPermMask;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
- VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) :
- DAG.getConstant(Mask[i], DL,MaskEltVT));
- SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT,
- VPermMask);
- if (isSingleInputShuffleMask(Mask))
- return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask,
+ {// First 128-bit lane.
+ 0, 16, 1, 17, 4, 20, 5, 21,
+ // Second 128-bit lane.
+ 8, 24, 9, 25, 12, 28, 13, 29}))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask,
+ {// First 128-bit lane.
+ 2, 18, 3, 19, 6, 22, 7, 23,
+ // Second 128-bit lane.
+ 10, 26, 11, 27, 14, 30, 15, 31}))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
}
-
-/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
+static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- assert((V1.getSimpleValueType() == MVT::v8f64 ||
- V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
- assert((V2.getSimpleValueType() == MVT::v8f64 ||
- V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
+ assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- SmallVector<int, 4> WidenedMask;
- if (canWidenShuffleElements(Mask, WidenedMask))
- if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
- return Op;
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
-
- if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
- return Op;
-
- if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG))
- return Op;
-
- // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
- if (isSingleInputShuffleMask(Mask)) {
- if (!is128BitLaneCrossingShuffleMask(VT, Mask))
- return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
- get1bitLaneShuffleImm8ForMask(Mask, DL, DAG));
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
- SmallVector<int, 4> RepeatedMask;
- if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
- return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
- getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
- }
- return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- assert((V1.getSimpleValueType() == MVT::v16i32 ||
- V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
- assert((V2.getSimpleValueType() == MVT::v16i32 ||
- V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
+ assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -10253,39 +10158,16 @@ static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
0, 16, 1, 17, 4, 20, 5, 21,
// Second 128-bit lane.
8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask,
{// First 128-bit lane.
2, 18, 3, 19, 6, 22, 7, 23,
// Second 128-bit lane.
10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
- 12, 12, 14, 14}))
- return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11,
- 13, 13, 15, 15}))
- return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1);
-
- SmallVector<int, 4> RepeatedMask;
- if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
- if (isSingleInputShuffleMask(Mask)) {
- unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI;
- return DAG.getNode(Opc, DL, VT, V1,
- getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
- }
-
- for (int i = 0; i < 4; ++i)
- if (RepeatedMask[i] >= 16)
- RepeatedMask[i] -= 12;
- return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG);
- }
-
- if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
- return Op;
-
- return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10345,11 +10227,13 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {
case MVT::v8f64:
- case MVT::v8i64:
- return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16f32:
+ return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i64:
+ return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16i32:
- return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i16:
if (Subtarget->hasBWI())
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
@@ -10759,11 +10643,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
assert(VecVT.is128BitVector() && "Unexpected vector length");
- if (Subtarget->hasSSE41()) {
- SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
- if (Res.getNode())
+ if (Subtarget->hasSSE41())
+ if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
- }
MVT VT = Op.getSimpleValueType();
// TODO: handle v16i8.
@@ -12253,11 +12135,9 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasFp256()) {
- SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
- if (Res.getNode())
+ if (Subtarget->hasFp256())
+ if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
- }
return SDValue();
}
@@ -12272,11 +12152,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
- if (Subtarget->hasFp256()) {
- SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
- if (Res.getNode())
+ if (Subtarget->hasFp256())
+ if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
- }
assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements());
@@ -15117,6 +14995,54 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
+/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
+/// function or when returning to a parent frame after catching an exception, we
+/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
+/// Here's the math:
+/// RegNodeBase = EntryEBP - RegNodeSize
+/// ParentFP = RegNodeBase - RegNodeFrameOffset
+/// Subtracting RegNodeSize takes us to the offset of the registration node, and
+/// subtracting the offset (negative on x86) takes us back to the parent FP.
+static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
+ SDValue EntryEBP) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc dl;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy();
+
+ // It's possible that the parent function no longer has a personality function
+ // if the exceptional code was optimized away, in which case we just return
+ // the incoming EBP.
+ if (!Fn->hasPersonalityFn())
+ return EntryEBP;
+
+ // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
+ // WinEHStatePass for the full struct definition.
+ int RegNodeSize;
+ switch (classifyEHPersonality(Fn->getPersonalityFn())) {
+ default:
+ report_fatal_error("can only recover FP for MSVC EH personality functions");
+ case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break;
+ case EHPersonality::MSVC_CXX: RegNodeSize = 16; break;
+ }
+
+ // Get an MCSymbol that will ultimately resolve to the frame offset of the EH
+ // registration.
+ MCSymbol *OffsetSym =
+ MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
+ GlobalValue::getRealLinkageName(Fn->getName()));
+ SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
+ SDValue RegNodeFrameOffset =
+ DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal);
+
+ // RegNodeBase = EntryEBP - RegNodeSize
+ // ParentFP = RegNodeBase - RegNodeFrameOffset
+ SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
+ DAG.getConstant(RegNodeSize, dl, PtrVT));
+ return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset);
+}
+
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -15206,6 +15132,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_2OP_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ // We specify 2 possible modes for intrinsics, with/without rounding modes.
+ // First, we check if the intrinsic have rounding mode (6 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 6)
+ Rnd = Op.getOperand(5);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -15230,11 +15173,26 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASK:
+ case FMA_OP_MASK3:
+ case FMA_OP_MASKZ:
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ SDValue PassThru = SDValue();
+
+ // set PassThru element
+ if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+ else if (IntrData->Type == FMA_OP_MASK3)
+ PassThru = Src3;
+ else
+ PassThru = Src1;
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15246,12 +15204,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
- Mask, Src1, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Src1, Src2, Src3),
- Mask, Src1, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
case CMP_MASK:
case CMP_MASK_CC: {
@@ -15330,18 +15288,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue PassThru = Op.getOperand(2);
if (isAllOnes(Mask)) // return data as is
return Op.getOperand(1);
- EVT VT = Op.getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDLoc dl(Op);
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
- return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
- PassThru);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ DataToCompress),
+ Mask, PassThru, Subtarget, DAG);
}
case BLEND: {
SDValue Mask = Op.getOperand(3);
@@ -15532,15 +15482,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
GlobalValue::getRealLinkageName(Fn->getName()));
- StringRef Name = LSDASym->getName();
- assert(Name.data()[Name.size()] == '\0' && "not null terminated");
// Generate a simple absolute symbol reference. This intrinsic is only
// supported on 32-bit Windows, which isn't PIC.
- SDValue Result =
- DAG.getTargetExternalSymbol(Name.data(), VT, X86II::MO_NOPREFIX);
+ SDValue Result = DAG.getMCSymbol(LSDASym, VT);
return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
}
+
+ case Intrinsic::x86_seh_recoverfp: {
+ SDValue FnOp = Op.getOperand(1);
+ SDValue IncomingFPOp = Op.getOperand(2);
+ GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+ auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+ if (!Fn)
+ report_fatal_error(
+ "llvm.x86.seh.recoverfp must take a function as the first argument");
+ return recoverFramePointer(DAG, Fn, IncomingFPOp);
+ }
}
}
@@ -15550,7 +15508,12 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
+ if (!C)
+ llvm_unreachable("Invalid scale type");
+ unsigned ScaleVal = C->getZExtValue();
+ if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
+ llvm_unreachable("Valid scale values are 1, 2, 4, 8");
+
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
@@ -15558,8 +15521,16 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- else
- MaskInReg = DAG.getBitcast(MaskVT, Mask);
+ else {
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
+ }
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15576,7 +15547,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
+ if (!C)
+ llvm_unreachable("Invalid scale type");
+ unsigned ScaleVal = C->getZExtValue();
+ if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
+ llvm_unreachable("Valid scale values are 1, 2, 4, 8");
+
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15586,8 +15562,16 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- else
- MaskInReg = DAG.getBitcast(MaskVT, Mask);
+ else {
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
+ }
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -15725,37 +15709,38 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
-static SDValue LowerEXCEPTIONINFO(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
SDLoc dl(Op);
- SDValue FnOp = Op.getOperand(2);
- SDValue FPOp = Op.getOperand(3);
+ SDValue Chain = Op.getOperand(0);
- // Compute the symbol for the parent EH registration. We know it'll get
- // emitted later.
- auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(FnOp)->getGlobal());
- MCSymbol *ParentFrameSym =
- MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(Fn->getName()));
- StringRef Name = ParentFrameSym->getName();
- assert(Name.data()[Name.size()] == '\0' && "not null terminated");
-
- // Create a TargetExternalSymbol for the label to avoid any target lowering
- // that would make this PC relative.
- MVT PtrVT = Op.getSimpleValueType();
- SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
- SDValue OffsetVal =
- DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSym);
-
- // Add the offset to the FP.
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, FPOp, OffsetVal);
-
- // Load the second field of the struct, which is 4 bytes in. See
- // WinEHStatePass for more info.
- Add = DAG.getNode(ISD::ADD, dl, PtrVT, Add, DAG.getConstant(4, dl, PtrVT));
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Add, MachinePointerInfo(),
- false, false, false, 0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT VT = TLI.getPointerTy();
+
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg =
+ RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+ unsigned SPReg = RegInfo->getStackRegister();
+
+ // Get incoming EBP.
+ SDValue IncomingEBP =
+ DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+ // Load [EBP-24] into SP.
+ SDValue SPAddr =
+ DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT));
+ SDValue NewSP =
+ DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
+ false, VT.getScalarSizeInBits() / 8);
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
+
+ // FIXME: Restore the base pointer in case of stack realignment!
+
+ // Adjust EBP to point back to the original frame position.
+ SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP);
+ Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
+ return Chain;
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
@@ -15764,8 +15749,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
- if (IntNo == Intrinsic::x86_seh_exceptioninfo)
- return LowerEXCEPTIONINFO(Op, Subtarget, DAG);
+ if (IntNo == llvm::Intrinsic::x86_seh_restoreframe)
+ return LowerSEHRESTOREFRAME(Op, Subtarget, DAG);
return SDValue();
}
@@ -15884,16 +15869,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
-
- SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
- DataToCompress, DAG.getUNDEF(VT));
+ SDValue Compressed =
+ getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
+ Mask, DAG.getUNDEF(VT), Subtarget, DAG);
return DAG.getStore(Chain, dl, Compressed, Addr,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
@@ -15901,7 +15879,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
- SDValue PathThru = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
EVT VT = Op.getValueType();
@@ -15909,21 +15887,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (isAllOnes(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits()/8);
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
false, false, false,
VT.getScalarSizeInBits()/8);
SDValue Results[] = {
- DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
- Chain};
+ getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand),
+ Mask, PassThru, Subtarget, DAG), Chain};
return DAG.getMergeValues(Results, dl);
}
}
@@ -18476,6 +18447,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UMIN: return "X86ISD::UMIN";
case X86ISD::SMAX: return "X86ISD::SMAX";
case X86ISD::SMIN: return "X86ISD::SMIN";
+ case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
@@ -18618,9 +18590,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
+ case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
- case X86ISD::AVG: return "X86ISD::AVG";
+ case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
}
@@ -18777,7 +18750,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+ if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
return false;
VT = VT.getScalarType();
@@ -19962,6 +19935,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Replace 213-type (isel default) FMA3 instructions with 231-type for
// accumulator loops. Writing back to the accumulator allows the coalescer
// to remove extra copies in the loop.
+// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937).
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -21302,8 +21276,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
- if (LD.getNode())
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
return LD;
if (isTargetShuffle(N->getOpcode())) {
@@ -21451,8 +21424,7 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
/// use 64-bit extracts and shifts.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
- if (NewOp.getNode())
+ if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
SDValue InputVector = N->getOperand(0);
@@ -22895,16 +22867,14 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- if (N->getOpcode() == ISD::SHL) {
- SDValue V = PerformSHLCombine(N, DAG);
- if (V.getNode()) return V;
- }
+ if (N->getOpcode() == ISD::SHL)
+ if (SDValue V = PerformSHLCombine(N, DAG))
+ return V;
- if (N->getOpcode() != ISD::SRA) {
- // Try to fold this logical shift into a zero vector.
- SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
- if (V.getNode()) return V;
- }
+ // Try to fold this logical shift into a zero vector.
+ if (N->getOpcode() != ISD::SRA)
+ if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
+ return V;
return SDValue();
}
@@ -23284,8 +23254,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
- if (R.getNode())
+ if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
SDValue N0 = N->getOperand(0);
@@ -23480,11 +23449,9 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- if (Subtarget->hasCMov()) {
- SDValue RV = performIntegerAbsCombine(N, DAG);
- if (RV.getNode())
+ if (Subtarget->hasCMov())
+ if (SDValue RV = performIntegerAbsCombine(N, DAG))
return RV;
- }
return SDValue();
}
@@ -24266,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- if (VT.isVector()) {
- auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+ if (VT.isVector() && Subtarget->hasSSE2()) {
+ auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
EVT InVT = N.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
- 128 / InVT.getScalarSizeInBits());
- SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+ Size / InVT.getScalarSizeInBits());
+ SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
DAG.getUNDEF(InVT));
Opnds[0] = N;
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
};
+ // If target-size is less than 128-bits, extend to a type that would extend
+ // to 128 bits, extend that and extract the original target vector.
+ if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) &&
+ (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+ (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+ unsigned Scale = 128 / VT.getSizeInBits();
+ EVT ExVT =
+ EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
+ SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
if (VT.getSizeInBits() == 128 &&
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
- SDValue ExOp = ExtendToVec128(DL, N0);
+ SDValue ExOp = ExtendVecSize(DL, N0, 128);
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
}
@@ -24301,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
DAG.getIntPtrConstant(Offset, DL));
- SrcVec = ExtendToVec128(DL, SrcVec);
+ SrcVec = ExtendVecSize(DL, SrcVec, 128);
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
Opnds.push_back(SrcVec);
}
@@ -24312,11 +24293,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasFp256())
return SDValue();
- if (VT.isVector() && VT.getSizeInBits() == 256) {
- SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
- if (R.getNode())
+ if (VT.isVector() && VT.getSizeInBits() == 256)
+ if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
- }
return SDValue();
}
@@ -24332,7 +24311,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
EVT ScalarVT = VT.getScalarType();
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
- (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
+ !Subtarget->hasAVX512()))
return SDValue();
SDValue A = N->getOperand(0);
@@ -24398,11 +24378,10 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(1, dl, VT));
}
}
- if (VT.is256BitVector()) {
- SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
- if (R.getNode())
+
+ if (VT.is256BitVector())
+ if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
- }
// (i8,i32 zext (udivrem (i8 x, i8 y)) ->
// (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
@@ -24606,10 +24585,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
- SDValue Flags;
-
- Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
- if (Flags.getNode()) {
+ if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
@@ -24628,10 +24604,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
- SDValue Flags;
-
- Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
- if (Flags.getNode()) {
+ if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
Flags);
@@ -24695,16 +24668,18 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
// Now move on to more general possibilities.
SDValue Op0 = N->getOperand(0);
- EVT InVT = Op0->getValueType(0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = Op0.getValueType();
+ EVT InSVT = InVT.getScalarType();
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
- InVT == MVT::v8i16 || InVT == MVT::v4i16) {
+ if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
SDLoc dl(N);
- MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
- return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
@@ -24714,10 +24689,10 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
EVT LdVT = Ld->getValueType(0);
// This transformation is not supported if the result type is f16
- if (N->getValueType(0) == MVT::f16)
+ if (VT == MVT::f16)
return SDValue();
- if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ if (!Ld->isVolatile() && !VT.isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget->is64Bit() && LdVT == MVT::i64) {
SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
@@ -25683,75 +25658,40 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Otherwise, check to see if this is a register class of the wrong value
// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
// turn into {ax},{dx}.
- if (Res.second->hasType(VT))
+ // MVT::Other is used to specify clobber names.
+ if (Res.second->hasType(VT) || VT == MVT::Other)
return Res; // Correct type already, nothing to do.
- // All of the single-register GCC register classes map their values onto
- // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
- // really want an 8-bit or 32-bit register, map to the appropriate register
- // class and return the appropriate register.
- if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8 || VT == MVT::i1) {
- unsigned DestReg = 0;
- switch (Res.first) {
- default: break;
- case X86::AX: DestReg = X86::AL; break;
- case X86::DX: DestReg = X86::DL; break;
- case X86::CX: DestReg = X86::CL; break;
- case X86::BX: DestReg = X86::BL; break;
- }
- if (DestReg) {
- Res.first = DestReg;
- Res.second = &X86::GR8RegClass;
- }
- } else if (VT == MVT::i32 || VT == MVT::f32) {
- unsigned DestReg = 0;
- switch (Res.first) {
- default: break;
- case X86::AX: DestReg = X86::EAX; break;
- case X86::DX: DestReg = X86::EDX; break;
- case X86::CX: DestReg = X86::ECX; break;
- case X86::BX: DestReg = X86::EBX; break;
- case X86::SI: DestReg = X86::ESI; break;
- case X86::DI: DestReg = X86::EDI; break;
- case X86::BP: DestReg = X86::EBP; break;
- case X86::SP: DestReg = X86::ESP; break;
- }
- if (DestReg) {
- Res.first = DestReg;
- Res.second = &X86::GR32RegClass;
- }
- } else if (VT == MVT::i64 || VT == MVT::f64) {
- unsigned DestReg = 0;
- switch (Res.first) {
- default: break;
- case X86::AX: DestReg = X86::RAX; break;
- case X86::DX: DestReg = X86::RDX; break;
- case X86::CX: DestReg = X86::RCX; break;
- case X86::BX: DestReg = X86::RBX; break;
- case X86::SI: DestReg = X86::RSI; break;
- case X86::DI: DestReg = X86::RDI; break;
- case X86::BP: DestReg = X86::RBP; break;
- case X86::SP: DestReg = X86::RSP; break;
- }
- if (DestReg) {
- Res.first = DestReg;
- Res.second = &X86::GR64RegClass;
- }
- } else if (VT != MVT::Other) {
- // Type mismatch and not a clobber: Return an error;
+ // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
+ // return "eax". This should even work for things like getting 64bit integer
+ // registers when given an f64 type.
+ const TargetRegisterClass *Class = Res.second;
+ if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
+ Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
+ unsigned Size = VT.getSizeInBits();
+ MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
+ : Size == 16 ? MVT::i16
+ : Size == 32 ? MVT::i32
+ : Size == 64 ? MVT::i64
+ : MVT::Other;
+ unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+ if (DestReg > 0) {
+ Res.first = DestReg;
+ Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
+ : SimpleTy == MVT::i16 ? &X86::GR16RegClass
+ : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+ : &X86::GR64RegClass;
+ assert(Res.second->contains(Res.first) && "Register in register class");
+ } else {
+ // No register found/type mismatch.
Res.first = 0;
Res.second = nullptr;
}
- } else if (Res.second == &X86::FR32RegClass ||
- Res.second == &X86::FR64RegClass ||
- Res.second == &X86::VR128RegClass ||
- Res.second == &X86::VR256RegClass ||
- Res.second == &X86::FR32XRegClass ||
- Res.second == &X86::FR64XRegClass ||
- Res.second == &X86::VR128XRegClass ||
- Res.second == &X86::VR256XRegClass ||
- Res.second == &X86::VR512RegClass) {
+ } else if (Class == &X86::FR32RegClass || Class == &X86::FR64RegClass ||
+ Class == &X86::VR128RegClass || Class == &X86::VR256RegClass ||
+ Class == &X86::FR32XRegClass || Class == &X86::FR64XRegClass ||
+ Class == &X86::VR128XRegClass || Class == &X86::VR256XRegClass ||
+ Class == &X86::VR512RegClass) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
@@ -25767,15 +25707,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = &X86::VR256RegClass;
else if (X86::VR512RegClass.hasType(VT))
Res.second = &X86::VR512RegClass;
- else if (VT != MVT::Other) {
+ else {
// Type mismatch and not a clobber: Return an error;
Res.first = 0;
Res.second = nullptr;
}
- } else if (VT != MVT::Other) {
- // Type mismatch and not a clobber: Return an error;
- Res.first = 0;
- Res.second = nullptr;
}
return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9c98333776cf..17660891635c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -211,7 +211,8 @@ namespace llvm {
// FP vector get exponent
FGETEXP_RND,
-
+ // FP Scale
+ SCALEF,
// Integer add/sub with unsigned saturation.
ADDUS,
SUBUS,
@@ -238,6 +239,9 @@ namespace llvm {
/// Signed integer max and min.
SMAX, SMIN,
+ // Integer absolute value
+ ABS,
+
/// Floating point max and min.
FMAX, FMIN,
@@ -516,7 +520,7 @@ namespace llvm {
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
// thought as target memory ops!
};
- } // namespace X86ISD
+ }
/// Define some predicates that are used for node matching.
namespace X86 {
@@ -583,7 +587,7 @@ namespace llvm {
TO_ZERO = 3,
CUR_DIRECTION = 4
};
- } // namespace X86
+ }
//===--------------------------------------------------------------------===//
// X86 Implementation of the TargetLowering interface
@@ -638,9 +642,8 @@ namespace llvm {
/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;
- /// Returns true if the target allows
- /// unaligned memory accesses. of the specified type. Returns whether it
- /// is "fast" by reference in the second argument.
+ /// Returns true if the target allows unaligned memory accesses of the
+ /// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
bool *Fast) const override;
@@ -1120,6 +1123,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-} // namespace llvm
+}
#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index de6a83506b28..b309b8210851 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -274,6 +274,16 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, _, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -3436,7 +3446,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86VectorVTInfo _, bit IsCommutable> {
+ X86VectorVTInfo _> {
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -3446,7 +3456,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86VectorVTInfo _, bit IsCommutable> {
+ X86VectorVTInfo _> {
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -3481,16 +3491,16 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
- defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
- defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
@@ -3513,6 +3523,48 @@ let Predicates = [HasDQI] in {
defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
}
+multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
+ EVEX_4V, EVEX_B;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+}
+defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
@@ -3870,6 +3922,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
+
+multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
+
+ let Predicates = [HasVLX, HasBWI] in {
+ defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
+ defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
+ }
+}
+
+defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
@@ -3950,188 +4015,295 @@ let Predicates = [HasAVX512] in {
//
let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- SDPatternOperator OpNode = null_frag> {
+multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
AVX512FMA3Base;
- let mayLoad = 1 in
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
AVX512FMA3Base;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
- }
-} // Constraints = "$src1 = $dst"
+ }
+}
-let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _,
- SDPatternOperator OpNode> {
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
- }
+}
} // Constraints = "$src1 = $dst"
-multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
- X86VectorVTInfo VTI, SDPatternOperator OpNode> {
- defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
- VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
+ }
}
-multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
- string OpcodeStr, X86VectorVTInfo VTI,
- SDPatternOperator OpNode> {
- defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
- VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
- defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
- VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
}
-multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
- string OpcodeStr,
- SDPatternOperator OpNode,
- SDPatternOperator OpNodeRnd> {
-let ExeDomain = SSEPackedSingle in {
- defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v16f32_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr,
- v16f32_info, OpNodeRnd>, EVEX_V512;
- defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v8f32x_info, OpNode>, EVEX_V256;
- defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f32x_info, OpNode>, EVEX_V128;
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
+
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
+ "$src2, ${src3}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src2,
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
}
-let ExeDomain = SSEPackedDouble in {
- defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v8f64_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
- OpNodeRnd>, EVEX_V512, VEX_W;
- defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f64x_info, OpNode>,
- EVEX_V256, VEX_W;
- defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v2f64x_info, OpNode>,
- EVEX_V128, VEX_W;
+}
+
+multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
-defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
-defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
- [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2),
- _.RC:$src3)))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr,
- ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
- [(set _.RC:$dst,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))),
- _.RC:$src3))]>, EVEX_B;
+multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.RC:$src2),
+ OpcodeStr, "$src2, $src3", "$src3, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.MemOp:$src2),
+ OpcodeStr, "$src2, $src3", "$src3, $src2",
+ (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.ScalarMemOp:$src2),
+ OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
+ "$src3, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
+ }
}
-} // Constraints = "$src1 = $dst"
-multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
+ (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
-let ExeDomain = SSEPackedSingle in {
- defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode,v16f32_info>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v8f32x_info>, EVEX_V256,
- EVEX_CD8<32, CD8VF>;
- defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v4f32x_info>, EVEX_V128,
- EVEX_CD8<32, CD8VF>;
+multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
-let ExeDomain = SSEPackedDouble in {
- defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v8f64_info>, EVEX_V512,
- VEX_W, EVEX_CD8<32, CD8VF>;
- defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v4f64x_info>, EVEX_V256,
- VEX_W, EVEX_CD8<32, CD8VF>;
- defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v2f64x_info>, EVEX_V128,
- VEX_W, EVEX_CD8<32, CD8VF>;
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
-defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
-defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
-defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
-defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
-defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
-defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
+multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
// Scalar FMA
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType OpVT,
- X86MemOperand x86memop, Operand memop,
- PatFrag mem_frag> {
- let isCommutable = 1 in
- def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
+ dag RHS_r, dag RHS_m > {
+ defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;
+
let mayLoad = 1 in
- def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, f128mem:$src3),
- !strconcat(OpcodeStr,
+ defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
+
+ defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+
+ let isCodeGenOnly = 1 in {
+ def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1,
- (mem_frag addr:$src3))))]>;
+ [RHS_r]>;
+ let mayLoad = 1 in
+ def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [RHS_m]>;
+ }// isCodeGenOnly = 1
+}
+}// Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
+ string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
+ string SUFF> {
+
+ defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1,
+ (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))))),
+ (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
+ (i32 imm:$rc))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
+ _.FRC:$src3))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src3))))>;
+
+ defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)),
+ (_.VT (OpNode _.RC:$src2,
+ (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src1)),
+ (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
+ (i32 imm:$rc))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
+ _.FRC:$src1))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
+
+ defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)),
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src2)),
+ (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
+ (i32 imm:$rc))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
+ _.FRC:$src2))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
+}
+
+multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
+ string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
+ let Predicates = [HasAVX512] in {
+ defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+ OpNodeRnd, f32x_info, "SS">,
+ EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+ OpNodeRnd, f64x_info, "SD">,
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+ }
}
-} // Constraints = "$src1 = $dst"
-defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
@@ -5427,10 +5599,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag GatherNode> {
- let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
+ let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
+ ExeDomain = _.ExeDomain in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set _.RC:$dst, _.KRCWM:$mask_wb,
(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
@@ -5438,67 +5611,104 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
EVEX_CD8<_.EltSize, CD8VT1>;
}
-let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
+multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
+ vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W;
+ defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
+ vz64mem, mgatherv8i64>, EVEX_V512, VEX_W;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
+ vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W;
+ defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
+ vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W;
+ defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W;
+ defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W;
+}
+}
+
+multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem,
+ mgatherv16i32>, EVEX_V512;
+ defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem,
+ mgatherv8i64>, EVEX_V512;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
+ vy32xmem, mgatherv8i32>, EVEX_V256;
+ defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+ vy64xmem, mgatherv4i64>, EVEX_V256;
+ defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mgatherv4i32>, EVEX_V128;
+ defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mgatherv2i64>, EVEX_V128;
}
-
-let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
}
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
+defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
+ avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
+
+defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
+ avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag ScatterNode> {
-let mayStore = 1, Constraints = "$mask = $mask_wb" in
+let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
(ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr#_.Suffix,
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
_.KRCWM:$mask, vectoraddr:$dst))]>,
EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
-let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
+multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
+ vy32xmem, mscatterv8i32>, EVEX_V512, VEX_W;
+ defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
+ vz64mem, mscatterv8i64>, EVEX_V512, VEX_W;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
+ vx32xmem, mscatterv4i32>, EVEX_V256, VEX_W;
+ defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
+ vy64xmem, mscatterv4i64>, EVEX_V256, VEX_W;
+ defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mscatterv4i32>, EVEX_V128, VEX_W;
+ defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mscatterv2i64>, EVEX_V128, VEX_W;
+}
+}
+
+multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz32mem,
+ mscatterv16i32>, EVEX_V512;
+ defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz64mem,
+ mscatterv8i64>, EVEX_V512;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
+ vy32xmem, mscatterv8i32>, EVEX_V256;
+ defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+ vy64xmem, mscatterv4i64>, EVEX_V256;
+ defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mscatterv4i32>, EVEX_V128;
+ defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mscatterv2i64>, EVEX_V128;
}
-
-let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
}
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
+defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
+ avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
+defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
+ avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
@@ -5599,77 +5809,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
-multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
- RegisterClass KRC, RegisterClass RC,
- X86MemOperand x86memop, X86MemOperand x86scalar_mop,
- string BrdcstStr> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", $dst|$dst, ${src}", BrdcstStr, "}"),
- []>, EVEX, EVEX_B;
- def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
- []>, EVEX, EVEX_B, EVEX_K;
- def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
- BrdcstStr, "}"),
- []>, EVEX, EVEX_B, EVEX_KZ;
- }
-}
-
-defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
- i512mem, i32mem, "{1to16}">, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
- i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
-
-def : Pat<(xor
- (bc_v16i32 (v16i1sextv16i32)),
- (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v8i1sextv8i64)),
- (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
- (VPABSQZrr VR512:$src)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
- (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPABSQZrr VR512:$src)>;
-
multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop,
@@ -5868,26 +6007,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
//===----------------------------------------------------------------------===//
// AVX-512 - COMPRESS and EXPAND
//
+
multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
- def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
- _.ImmAllZerosV)))]>, EVEX_KZ;
-
- let Constraints = "$src0 = $dst" in
- def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
- _.RC:$src0)))]>, EVEX_K;
+ defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
let mayStore = 1 in {
+ def mr : AVX5128I<opc, MRMDestMem, (outs),
+ (ins _.MemOp:$dst, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst |$dst, $src}",
+ []>, EVEX_CD8<_.EltSize, CD8VT1>;
+
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
+ [(store (_.VT (vselect _.KRCWM:$mask,
+ (_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
@@ -5915,37 +6052,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
- _.ImmAllZerosV)))]>, EVEX_KZ;
-
- let Constraints = "$src0 = $dst" in
- def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
- (_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
-
- let mayLoad = 1, Constraints = "$src0 = $dst" in
- def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
- (_.VT (bitconvert
- (_.LdFrag addr:$src))),
- _.RC:$src0)))]>,
- EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
let mayLoad = 1 in
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.MemOp:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
- (_.VT (bitconvert (_.LdFrag addr:$src))),
- _.ImmAllZerosV)))]>,
- EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
+ (_.VT (X86expand (_.VT (bitconvert
+ (_.LdFrag addr:$src1)))))>,
+ AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -6175,3 +6291,91 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
EVEX_CD8<32, CD8VF>;
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
+
+multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1), OpcodeStr##_.Suffix,
+ "$src1", "$src1",
+ (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
+
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+ "$src1", "$src1",
+ (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+ EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> :
+ avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+ "${src1}"##_.BroadcastStr,
+ "${src1}"##_.BroadcastStr,
+ (_.VT (OpNode (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1))))>,
+ EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+ SDNode OpNode, Predicate prd> {
+ defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+ prd>, VEX_W;
+ defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+ SDNode OpNode, Predicate prd> {
+ defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
+ defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+ bits<8> opc_d, bits<8> opc_q,
+ string OpcodeStr, SDNode OpNode> {
+ defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+ HasAVX512>,
+ avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+ HasBWI>;
+}
+
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
+
+def : Pat<(xor
+ (bc_v16i32 (v16i1sextv16i32)),
+ (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
+ (VPABSDZrr VR512:$src)>;
+def : Pat<(xor
+ (bc_v8i64 (v8i1sextv8i64)),
+ (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
+ (VPABSQZrr VR512:$src)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index eb4dc48a7a65..2056056d23a5 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -179,6 +179,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
.addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 912a0fb356ed..7f850d6830e1 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -869,6 +869,7 @@ def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;
def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
@@ -879,6 +880,8 @@ def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
(ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),
+ (ADD32ri GR32:$src1, mcsym:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
(ADD32ri GR32:$src1, tblockaddress:$src2)>;
@@ -886,6 +889,8 @@ def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV32mi addr:$dst, tglobaladdr:$src)>;
def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, mcsym:$src)>;
def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV32mi addr:$dst, tblockaddress:$src)>;
@@ -900,6 +905,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper mcsym:$dst)),
+ (MOV64ri mcsym:$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
@@ -914,6 +921,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper mcsym:$dst)),
+ (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
@@ -932,12 +941,15 @@ def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV64mi32 addr:$dst, texternalsym:$src)>,
Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, mcsym:$src)>,
+ Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tblockaddress:$src)>,
Requires<[NearData, IsStatic]>;
-def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
+def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;
+def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;
// Calls
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 0dd05d8befd6..49068e9c37d3 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -633,16 +633,16 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
-def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave\t$dst", [], IIC_FXSAVE>, TB;
-def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave64\t$dst", [], IIC_FXSAVE>, TB,
- Requires<[In64BitMode]>;
+def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+ IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
+ "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor64\t$src", [], IIC_FXRSTOR>, TB,
- Requires<[In64BitMode]>;
+ "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+ IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 16ae77dd81a3..fe245c3a7e38 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -251,6 +251,7 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
+def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -310,6 +311,7 @@ def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
@@ -347,12 +349,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
- SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
-def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 3>,
- SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
@@ -561,6 +561,14 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
+def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
+ Mgt->getBasePtr().getValueType() == MVT::v4i32);
+ return false;
+}]>;
+
def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -569,6 +577,20 @@ def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
+def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
+ Mgt->getBasePtr().getValueType() == MVT::v2i64);
+ return false;
+}]>;
+def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
+ Mgt->getBasePtr().getValueType() == MVT::v4i64);
+ return false;
+}]>;
def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -584,6 +606,30 @@ def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
+def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ return (Sc->getIndex().getValueType() == MVT::v2i64 ||
+ Sc->getBasePtr().getValueType() == MVT::v2i64);
+ return false;
+}]>;
+
+def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ return (Sc->getIndex().getValueType() == MVT::v4i32 ||
+ Sc->getBasePtr().getValueType() == MVT::v4i32);
+ return false;
+}]>;
+
+def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ return (Sc->getIndex().getValueType() == MVT::v4i64 ||
+ Sc->getBasePtr().getValueType() == MVT::v4i64);
+ return false;
+}]>;
+
def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_scatter node:$src1, node:$src2, node:$src3) , [{
if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4aa0ae6f1959..b92ba99fb100 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1577,38 +1577,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPXORYrr, X86::VPXORYrm, 0 },
// FMA4 foldable patterns
- { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 },
- { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 },
- { X86::VFMADDPS4rr, X86::VFMADDPS4mr, 0 },
- { X86::VFMADDPD4rr, X86::VFMADDPD4mr, 0 },
- { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, 0 },
- { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, 0 },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, 0 },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, 0 },
- { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, 0 },
- { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, 0 },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, 0 },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, 0 },
- { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, 0 },
- { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, 0 },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, 0 },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, 0 },
- { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, 0 },
- { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, 0 },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, 0 },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, 0 },
- { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, 0 },
- { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, 0 },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, 0 },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, 0 },
- { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, 0 },
- { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, 0 },
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_NONE },
// XOP foldable instructions
{ X86::VPCMOVrr, X86::VPCMOVmr, 0 },
@@ -1852,38 +1852,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE },
// FMA4 foldable patterns
- { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 },
- { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 },
- { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
- { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
- { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
- { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
- { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
- { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
- { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
- { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
- { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
- { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
- { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
- { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
- { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
- { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_NONE },
// XOP foldable instructions
{ X86::VPCMOVrr, X86::VPCMOVrm, 0 },
@@ -5295,21 +5295,57 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Size, Alignment, /*AllowCommute=*/true);
}
-static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
- const MachineFunction &MF) {
+/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
+/// because the latter uses contents that wouldn't be defined in the folded
+/// version. For instance, this transformation isn't legal:
+/// movss (%rdi), %xmm0
+/// addps %xmm0, %xmm0
+/// ->
+/// addps (%rdi), %xmm0
+///
+/// But this one is:
+/// movss (%rdi), %xmm0
+/// addss %xmm0, %xmm0
+/// ->
+/// addss (%rdi), %xmm0
+///
+static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineInstr &UserMI,
+ const MachineFunction &MF) {
unsigned Opc = LoadMI.getOpcode();
+ unsigned UserOpc = UserMI.getOpcode();
unsigned RegSize =
MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
- if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) {
// These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return true;
+ // destination register is wider than 32 bits (4 bytes), and its user
+ // instruction isn't scalar (SS).
+ switch (UserOpc) {
+ case X86::ADDSSrr_Int: case X86::VADDSSrr_Int:
+ case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
+ case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
+ case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+ return false;
+ default:
+ return true;
+ }
+ }
- if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) {
// These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
- return true;
+ // destination register is wider than 64 bits (8 bytes), and its user
+ // instruction isn't scalar (SD).
+ switch (UserOpc) {
+ case X86::ADDSDrr_Int: case X86::VADDSDrr_Int:
+ case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
+ case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
+ case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+ return false;
+ default:
+ return true;
+ }
+ }
return false;
}
@@ -5321,7 +5357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
- if (isPartialRegisterLoad(*LoadMI, MF))
+ if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
}
@@ -5434,7 +5470,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
break;
}
default: {
- if (isPartialRegisterLoad(*LoadMI, MF))
+ if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
@@ -6334,22 +6370,11 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
return isHighLatencyDef(DefMI->getOpcode());
}
-/// If the input instruction is part of a chain of dependent ops that are
-/// suitable for reassociation, return the earlier instruction in the sequence
-/// that defines its first operand, otherwise return a nullptr.
-/// If the instruction's operands must be commuted to be considered a
-/// reassociation candidate, Commuted will be set to true.
-static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
- unsigned AssocOpcode,
- bool checkPrevOneUse,
- bool &Commuted) {
- if (Inst.getOpcode() != AssocOpcode)
- return nullptr;
-
- MachineOperand Op1 = Inst.getOperand(1);
- MachineOperand Op2 = Inst.getOperand(2);
-
- const MachineBasicBlock *MBB = Inst.getParent();
+static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) {
+ assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
// We need virtual register definitions.
@@ -6359,80 +6384,99 @@ static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
MI1 = MRI.getUniqueVRegDef(Op1.getReg());
if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
MI2 = MRI.getUniqueVRegDef(Op2.getReg());
-
+
// And they need to be in the trace (otherwise, they won't have a depth).
- if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
- return nullptr;
-
- Commuted = false;
- if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+ if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
+ return true;
+
+ return false;
+}
+
+static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned AssocOpcode = Inst.getOpcode();
+
+ // If only one operand has the same opcode and it's the second source operand,
+ // the operands must be commuted.
+ Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ if (Commuted)
std::swap(MI1, MI2);
- Commuted = true;
- }
- // Avoid reassociating operands when it won't provide any benefit. If both
- // operands are produced by instructions of this type, we may already
- // have the optimal sequence.
- if (MI2->getOpcode() == AssocOpcode)
- return nullptr;
-
- // The instruction must only be used by the other instruction that we
- // reassociate with.
- if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
- return nullptr;
-
- // We must match a simple chain of dependent ops.
- // TODO: This check is not necessary for the earliest instruction in the
- // sequence. Instead of a sequence of 3 dependent instructions with the same
- // opcode, we only need to find a sequence of 2 dependent instructions with
- // the same opcode plus 1 other instruction that adds to the height of the
- // trace.
- if (MI1->getOpcode() != AssocOpcode)
- return nullptr;
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 3. The previous instruction's result must only be used by Inst.
+ if (MI1->getOpcode() == AssocOpcode &&
+ hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+ return true;
- return MI1;
+ return false;
}
-/// Select a pattern based on how the operands of each associative operation
-/// need to be commuted.
-static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
- bool CommuteRoot) {
- if (CommutePrev) {
- if (CommuteRoot)
- return MachineCombinerPattern::MC_REASSOC_XA_YB;
- return MachineCombinerPattern::MC_REASSOC_XA_BY;
- } else {
- if (CommuteRoot)
- return MachineCombinerPattern::MC_REASSOC_AX_YB;
- return MachineCombinerPattern::MC_REASSOC_AX_BY;
- }
+/// Return true if the input instruction is part of a chain of dependent ops
+/// that are suitable for reassociation, otherwise return false.
+/// If the instruction's operands must be commuted to have a previous
+/// instruction of the same type define the first source operand, Commuted will
+/// be set to true.
+static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
+ bool &Commuted) {
+ // 1. The instruction must have the correct type.
+ // 2. The instruction must have virtual register definitions for its
+ // operands in the same basic block.
+ // 3. The instruction must have a reassociatable sibling.
+ if (Inst.getOpcode() == AssocOpcode &&
+ hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
+ hasReassocSibling(Inst, Commuted))
+ return true;
+
+ return false;
}
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+// instruction is known to not increase the critical path, then don't match
+// that pattern.
bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
return false;
+ // TODO: There is nothing x86-specific here except the instruction type.
+ // This logic could be hoisted into the machine combiner pass itself.
+
+ // Look for this reassociation pattern:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+
// TODO: There are many more associative instruction types to match:
// 1. Other forms of scalar FP add (non-AVX)
// 2. Other data types (double, integer, vectors)
// 3. Other math / logic operations (mul, and, or)
unsigned AssocOpcode = X86::VADDSSrr;
- // TODO: There is nothing x86-specific here except the instruction type.
- // This logic could be hoisted into the machine combiner pass itself.
- bool CommuteRoot;
- if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
- CommuteRoot)) {
- bool CommutePrev;
- if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
- // We found a sequence of instructions that may be suitable for a
- // reassociation of operands to increase ILP.
- Patterns.push_back(getPattern(CommutePrev, CommuteRoot));
- return true;
+ bool Commute = false;
+ if (isReassocCandidate(Root, AssocOpcode, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
}
+ return true;
}
-
+
return false;
}
@@ -6525,14 +6569,16 @@ void X86InstrInfo::genAlternativeCodeSequence(
// Select the previous instruction in the sequence based on the input pattern.
MachineInstr *Prev = nullptr;
- if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
- Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
- Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
- Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
- Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
- else
- llvm_unreachable("Unknown pattern for machine combiner");
+ switch (Pattern) {
+ case MachineCombinerPattern::MC_REASSOC_AX_BY:
+ case MachineCombinerPattern::MC_REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::MC_REASSOC_AX_YB:
+ case MachineCombinerPattern::MC_REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ }
+ assert(Prev && "Unknown pattern for machine combiner");
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
return;
@@ -6604,7 +6650,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char CGBR::ID = 0;
FunctionPass*
@@ -6716,7 +6762,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char LDTLSCleanup::ID = 0;
FunctionPass*
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4912951140d9..bf63336c7005 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -90,7 +90,7 @@ namespace X86 {
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(CondCode CC);
-} // namespace X86
+} // end namespace X86;
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -512,6 +512,6 @@ private:
int &FrameIndex) const;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e936b4bc466e..6f38cb8eaf33 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -282,6 +282,10 @@ let RenderMethod = "addMemOperands" in {
def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; }
def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; }
def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; }
+ def X86MemVX32XOperand : AsmOperandClass { let Name = "MemVX32X"; }
+ def X86MemVY32XOperand : AsmOperandClass { let Name = "MemVY32X"; }
+ def X86MemVX64XOperand : AsmOperandClass { let Name = "MemVX64X"; }
+ def X86MemVY64XOperand : AsmOperandClass { let Name = "MemVY64X"; }
}
def X86AbsMemAsmOperand : AsmOperandClass {
@@ -332,7 +336,11 @@ def vx32mem : X86VMemOperand<VR128, "printi32mem", X86MemVX32Operand>;
def vy32mem : X86VMemOperand<VR256, "printi32mem", X86MemVY32Operand>;
def vx64mem : X86VMemOperand<VR128, "printi64mem", X86MemVX64Operand>;
def vy64mem : X86VMemOperand<VR256, "printi64mem", X86MemVY64Operand>;
-def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>;
+
+def vx32xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX32XOperand>;
+def vx64xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX64XOperand>;
+def vy32xmem : X86VMemOperand<VR256X, "printi32mem", X86MemVY32XOperand>;
+def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 95629184f2cf..2a896dfe8aa8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7860,10 +7860,11 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
int_x86_avx2_vbroadcast_sd_pd_256,
WriteFShuffle256>, VEX_L;
-let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256,
- i128mem, v4i64, loadv2i64,
- WriteLoad>, VEX_L;
+let mayLoad = 1, Predicates = [HasAVX2] in
+def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
+ (ins i128mem:$src),
+ "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteLoad]>, VEX, VEX_L;
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 2b829301e327..61a33484b8bf 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,8 +21,9 @@ enum IntrinsicType {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK,
- INTR_TYPE_3OP_MASK, FMA_OP_MASK,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
+ INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+ VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
};
@@ -55,6 +56,22 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
@@ -129,15 +146,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
-
- X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
- X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
- X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
- X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm),
- X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH,
- X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
- X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
- X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
+ X86::VSCATTERPF1DPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
+ X86::VSCATTERPF1DPSm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
+ X86::VSCATTERPF1QPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
+ X86::VSCATTERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
@@ -251,6 +283,52 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB,
+ X86ISD::FMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
+ X86ISD::FMSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+ X86ISD::FMSUBADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+ X86ISD::FMSUBADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+
X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
@@ -382,9 +460,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
@@ -393,7 +471,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
- X86ISD::FMAX_RND),
+ X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
@@ -405,7 +483,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
- X86ISD::FMIN_RND),
+ X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
@@ -428,6 +506,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
@@ -581,6 +671,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -633,6 +729,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -667,12 +775,181 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
+ X86ISD::FNMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
+ X86ISD::FNMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+
+
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
@@ -696,54 +973,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
- X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
- X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
- X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
- X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB,
- X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB,
- X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
- X86ISD::FMSUBADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
- X86ISD::FMSUBADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
- X86ISD::FNMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
- X86ISD::FNMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
- X86ISD::FNMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
- X86ISD::FNMSUB_RND),
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 64135e0f53e5..3415cedc6fea 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -112,7 +112,7 @@ namespace llvm {
OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
SMShadowTracker.count(Inst, getSubtargetInfo());
}
-} // namespace llvm
+} // end llvm namespace
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
@@ -159,10 +159,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
const GlobalValue *GV = MO.getGlobal();
AsmPrinter.getNameWithPrefix(Name, GV);
} else if (MO.isSymbol()) {
- if (MO.getTargetFlags() == X86II::MO_NOPREFIX)
- Name += MO.getSymbolName();
- else
- getMang()->getNameWithPrefix(Name, MO.getSymbolName());
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
} else if (MO.isMBB()) {
assert(Suffix.empty());
Sym = MO.getMBB()->getSymbol();
@@ -241,7 +238,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
case X86II::MO_DARWIN_STUB:
- case X86II::MO_NOPREFIX:
break;
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
@@ -423,6 +419,8 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+ case MachineOperand::MO_MCSymbol:
+ return LowerSymbolOperand(MO, MO.getMCSymbol());
case MachineOperand::MO_JumpTableIndex:
return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
case MachineOperand::MO_ConstantPoolIndex:
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 342d26ab1fbb..d598b55aae3e 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -179,6 +179,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 33aa78ffdf8a..143e70bda9e7 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -84,7 +84,7 @@ namespace {
};
char PadShortFunc::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86PadShortFunctions() {
return new PadShortFunc();
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 00e213423974..0033b5058187 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -598,10 +598,10 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
}
namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
- bool High) {
+unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
switch (VT) {
- default: llvm_unreachable("Unexpected VT");
+ default: return 0;
case MVT::i8:
if (High) {
switch (Reg) {
@@ -625,7 +625,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
} else {
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -662,7 +662,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
case MVT::i16:
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -698,7 +698,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
case MVT::i32:
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -734,7 +734,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
case MVT::i64:
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -771,6 +771,14 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
}
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
+ unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High);
+ if (Res == 0)
+ llvm_unreachable("Unexpected register or VT");
+ return Res;
+}
+
unsigned get512BitSuperRegister(unsigned Reg) {
if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
return X86::ZMM0 + (Reg - X86::XMM0);
@@ -781,4 +789,4 @@ unsigned get512BitSuperRegister(unsigned Reg) {
llvm_unreachable("Unexpected SIMD register");
}
-} // namespace llvm
+}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 459ecf7fff72..8de1d0bf8ec8 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -128,14 +128,19 @@ public:
unsigned getSlotSize() const { return SlotSize; }
};
-// getX86SubSuperRegister - X86 utility function. It returns the sub or super
-// register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+/// Returns the sub or super register of a specific X86 register.
+/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX.
+/// Aborts on error.
unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
+/// Returns the sub or super register of a specific X86 register.
+/// Like getX86SubSuperRegister() but returns 0 on error.
+unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType,
+ bool High = false);
+
//get512BitRegister - X86 utility - returns 512-bit super register
unsigned get512BitSuperRegister(unsigned Reg);
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 25606d3f5df3..eb7e0ed9de6c 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -48,6 +48,6 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6934061c6922..d420abbe1433 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -490,6 +490,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 3d6eb4f7ce02..fb9cb4ba4c86 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -110,12 +110,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
if (Subtarget.isTargetWin64())
this->Options.TrapUnreachable = true;
- // TODO: By default, all reciprocal estimate operations are off because
- // that matches the behavior before TargetRecip was added (except for btver2
- // which used subtarget features to enable this type of codegen).
- // We should change this to match GCC behavior where everything but
- // scalar division estimates are turned on by default with -ffast-math.
- this->Options.Reciprocals.setDefaults("all", false, 1);
+ // By default (and when -ffast-math is on), enable estimate codegen for
+ // everything except scalar division. By default, use 1 refinement step for
+ // all operations. Defaults may be overridden by using command-line options.
+ // Scalar division estimates are disabled because they break too much
+ // real-world code. These defaults match GCC behavior.
+ this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
+ this->Options.Reciprocals.setDefaults("divf", false, 1);
+ this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
+ this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
initAsmInfo();
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index be56888b75f4..262955698e44 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -44,6 +44,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index f9f62904b64b..6f900ea351ef 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -131,52 +131,44 @@ static std::string APIntToHexString(const APInt &AI) {
return HexString;
}
-
static std::string scalarConstantToHexString(const Constant *C) {
Type *Ty = C->getType();
- APInt AI;
if (isa<UndefValue>(C)) {
- AI = APInt(Ty->getPrimitiveSizeInBits(), /*val=*/0);
- } else if (Ty->isFloatTy() || Ty->isDoubleTy()) {
- const auto *CFP = cast<ConstantFP>(C);
- AI = CFP->getValueAPF().bitcastToAPInt();
- } else if (Ty->isIntegerTy()) {
- const auto *CI = cast<ConstantInt>(C);
- AI = CI->getValue();
+ return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+ } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
+ return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
+ return APIntToHexString(CI->getValue());
} else {
- llvm_unreachable("unexpected constant pool element type!");
+ unsigned NumElements;
+ if (isa<VectorType>(Ty))
+ NumElements = Ty->getVectorNumElements();
+ else
+ NumElements = Ty->getArrayNumElements();
+ std::string HexString;
+ for (int I = NumElements - 1, E = -1; I != E; --I)
+ HexString += scalarConstantToHexString(C->getAggregateElement(I));
+ return HexString;
}
- return APIntToHexString(AI);
}
MCSection *
X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
const Constant *C) const {
- if (Kind.isReadOnly()) {
- if (C) {
- Type *Ty = C->getType();
- SmallString<32> COMDATSymName;
- if (Ty->isFloatTy() || Ty->isDoubleTy()) {
- COMDATSymName = "__real@";
- COMDATSymName += scalarConstantToHexString(C);
- } else if (const auto *VTy = dyn_cast<VectorType>(Ty)) {
- uint64_t NumBits = VTy->getBitWidth();
- if (NumBits == 128 || NumBits == 256) {
- COMDATSymName = NumBits == 128 ? "__xmm@" : "__ymm@";
- for (int I = VTy->getNumElements() - 1, E = -1; I != E; --I)
- COMDATSymName +=
- scalarConstantToHexString(C->getAggregateElement(I));
- }
- }
- if (!COMDATSymName.empty()) {
- unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_LNK_COMDAT;
- return getContext().getCOFFSection(".rdata", Characteristics, Kind,
- COMDATSymName,
- COFF::IMAGE_COMDAT_SELECT_ANY);
- }
- }
+ if (Kind.isMergeableConst() && C) {
+ const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT;
+ std::string COMDATSymName;
+ if (Kind.isMergeableConst4() || Kind.isMergeableConst8())
+ COMDATSymName = "__real@" + scalarConstantToHexString(C);
+ else if (Kind.isMergeableConst16())
+ COMDATSymName = "__xmm@" + scalarConstantToHexString(C);
+
+ if (!COMDATSymName.empty())
+ return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+ COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ANY);
}
return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 13384fab5985..0c82a700952b 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1130,3 +1130,18 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
return isLegalMaskedLoad(DataType, Consecutive);
}
+bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ // Work this as a subsetting of subtarget features.
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // FIXME: This is likely too limiting as it will include subtarget features
+ // that we might not care about for inlining, but it is conservatively
+ // correct.
+ return (CallerBits & CalleeBits) == CalleeBits;
+}
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index e570bb55710a..a83158440193 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -103,6 +103,8 @@ public:
Type *Ty);
bool isLegalMaskedLoad(Type *DataType, int Consecutive);
bool isLegalMaskedStore(Type *DataType, int Consecutive);
+ bool hasCompatibleFunctionAttributes(const Function *Caller,
+ const Function *Callee) const;
/// @}
};
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 71ce45b0bc2e..6925b272b4a5 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -86,7 +86,7 @@ namespace {
};
char VZeroUpperInserter::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86IssueVZeroUpperPass() {
return new VZeroUpperInserter();
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index c9e80945549b..90357257b9ef 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -105,7 +105,7 @@ private:
/// The linked list node subobject inside of RegNode.
Value *Link = nullptr;
};
-} // namespace
+}
FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
@@ -398,6 +398,7 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
// Set up RegNodeEscapeIndex
int RegNodeEscapeIndex = escapeRegNode(F);
+ FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
// Only insert stores in catch handlers.
Constant *FI8 =
@@ -480,8 +481,8 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
// Remember and return the index that we used. We save it in WinEHFuncInfo so
- // that we can lower llvm.x86.seh.exceptioninfo later in filter functions
- // without too much trouble.
+ // that we can lower llvm.x86.seh.recoverfp later in filter functions without
+ // too much trouble.
int RegNodeEscapeIndex = escapeRegNode(F);
FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
@@ -528,14 +529,12 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
}
}
- // Insert llvm.stackrestore into each __except block.
- Function *StackRestore =
- Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore);
+ // Insert llvm.x86.seh.restoreframe() into each __except block.
+ Function *RestoreFrame =
+ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe);
for (BasicBlock *ExceptBB : ExceptBlocks) {
IRBuilder<> Builder(ExceptBB->begin());
- Value *SP =
- Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
- Builder.CreateCall(StackRestore, {SP});
+ Builder.CreateCall(RestoreFrame, {});
}
}
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index e1baeacc3e57..2e44ac949b2c 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -40,7 +40,7 @@ public:
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
-} // namespace
+}
static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
uint64_t &Size, uint16_t &Insn) {
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 8699ce84006c..ac954d0a8fa4 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -123,7 +123,7 @@ void XCoreTargetAsmStreamer::emitCCBottomData(StringRef Name) {
void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
OS << "\t.cc_bottom " << Name << ".function\n";
}
-} // namespace
+}
static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index eb8b5ec0b112..ba6ca843671e 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -32,6 +32,6 @@ namespace llvm {
CodeGenOpt::Level OptLevel);
ModulePass *createXCoreLowerThreadLocalPass();
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 116e89a60ee4..607c77248952 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -58,6 +58,6 @@ namespace llvm {
return 4;
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 8d96105a2ebc..77292c4f8f52 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -34,7 +34,7 @@ namespace {
}
};
char XCoreFTAOElim::ID = 0;
-} // namespace
+}
/// createXCoreFrameToArgsOffsetEliminationPass - returns an instance of the
/// Frame to args offset elimination pass
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 9c49a8d0dbaa..97f0494b6fe3 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -85,7 +85,7 @@ namespace llvm {
// Memory barrier.
MEMBARRIER
};
- } // namespace XCoreISD
+ }
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
@@ -215,6 +215,6 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
LLVMContext &Context) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index a6e974e2e622..ee30344dcc25 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -41,7 +41,7 @@ namespace XCore {
COND_INVALID
};
}
-} // namespace llvm
+}
// Pin the vtable to this file.
void XCoreInstrInfo::anchor() {}
@@ -196,15 +196,10 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
+
if (!isUnpredicatedTerminator(I))
return false;
@@ -312,14 +307,10 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
unsigned
XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
return 0;
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 70beb4179118..b958c361f5a2 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -88,6 +88,6 @@ public:
unsigned Reg, uint64_t Value) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index f866ab063396..996c6f59346d 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -50,7 +50,7 @@ namespace {
bool runOnModule(Module &M) override;
};
-} // namespace
+}
char XCoreLowerThreadLocal::ID = 0;
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
index 74a7f20570e8..569147872f23 100644
--- a/lib/Target/XCore/XCoreMCInstLower.h
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -37,6 +37,6 @@ private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 8cce75fd0a73..078ffde18fb9 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -101,6 +101,6 @@ public:
return SpillLabels;
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 622484374a42..cfd80b3f3172 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -35,6 +35,6 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 74ee594e9c5a..f01fb6714d86 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -61,6 +61,6 @@ public:
return &InstrInfo.getRegisterInfo();
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index a82702fc99fc..3563dbc5cb7b 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -22,6 +22,6 @@ public:
virtual void emitCCBottomData(StringRef Name) = 0;
virtual void emitCCBottomFunction(StringRef Name) = 0;
};
-} // namespace llvm
+}
#endif