aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.td10
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp106
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h6
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp540
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h29
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp35
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h5
-rw-r--r--lib/Target/ARM/ARMCallingConv.td31
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp76
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp76
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp15
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp362
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp26
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp417
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp669
-rw-r--r--lib/Target/ARM/ARMISelLowering.h30
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td548
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td895
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td48
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td292
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td93
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td32
-rw-r--r--lib/Target/ARM/ARMSchedule.td24
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td50
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td58
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp37
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp43
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp341
-rw-r--r--lib/Target/ARM/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp199
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp140
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp177
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h106
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp28
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp114
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp49
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp13
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp4
-rw-r--r--lib/Target/ARM/README.txt21
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp11
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp54
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h5
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp1
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt2
-rw-r--r--lib/Target/CellSPU/README.txt14
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp4
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.cpp6
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.h6
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp73
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h9
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp94
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td4
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp3
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h7
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp6
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp33
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h4
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt4
-rw-r--r--lib/Target/Hexagon/Hexagon.h8
-rw-r--r--lib/Target/Hexagon/Hexagon.td14
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp58
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td8
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp44
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp54
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp408
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h12
-rw-r--r--lib/Target/Hexagon/HexagonImmediates.td2
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td136
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td27
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp1253
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h13
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td2285
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td55
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td3306
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td626
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td1247
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsDerived.td34
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV5.td395
-rw-r--r--lib/Target/Hexagon/HexagonMCInst.h41
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp647
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp23
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td16
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td37
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td41
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp146
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h8
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp3646
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp67
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h13
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h31
-rw-r--r--lib/Target/LLVMBuild.txt2
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt2
-rw-r--r--lib/Target/MBlaze/MBlaze.td2
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp12
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp60
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h8
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td4
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.h6
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td5
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.cpp7
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp4
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp1
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h2
-rw-r--r--lib/Target/MSP430/CMakeLists.txt2
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp56
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h8
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h1
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td6
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp3
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td6
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--lib/Target/Mips/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/Mips/CMakeLists.txt11
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp193
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp20
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/Makefile1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp44
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h21
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp29
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h3
-rw-r--r--lib/Target/Mips/Mips.h4
-rw-r--r--lib/Target/Mips/Mips.td4
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp87
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h43
-rw-r--r--lib/Target/Mips/Mips16InstrFormats.td663
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp132
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h76
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td419
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp111
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h37
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td169
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp254
-rw-r--r--lib/Target/Mips/MipsCallingConv.td52
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp6
-rw-r--r--lib/Target/Mips/MipsCondMov.td94
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp75
-rw-r--r--lib/Target/Mips/MipsEmitGPRestore.cpp97
-rw-r--r--lib/Target/Mips/MipsExpandPseudo.cpp123
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp244
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h20
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp141
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp809
-rw-r--r--lib/Target/Mips/MipsISelLowering.h42
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td184
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td37
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp289
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h105
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td530
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp53
-rw-r--r--lib/Target/Mips/MipsJITInfo.h6
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp419
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp226
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h9
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp16
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h24
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp145
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h15
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td236
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp210
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h44
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp320
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h86
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp138
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h39
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp8
-rw-r--r--lib/Target/Mips/MipsSubtarget.h6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp49
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h121
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt34
-rw-r--r--lib/Target/NVPTX/InstPrinter/CMakeLists.txt7
-rw-r--r--lib/Target/NVPTX/InstPrinter/LLVMBuild.txt (renamed from lib/Target/PTX/InstPrinter/LLVMBuild.txt)8
-rw-r--r--lib/Target/NVPTX/InstPrinter/Makefile (renamed from lib/Target/PTX/InstPrinter/Makefile)5
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp1
-rw-r--r--lib/Target/NVPTX/LLVMBuild.txt (renamed from lib/Target/PTX/LLVMBuild.txt)12
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt9
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt (renamed from lib/Target/PTX/MCTargetDesc/LLVMBuild.txt)10
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/Makefile (renamed from lib/Target/PTX/MCTargetDesc/Makefile)4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h88
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp63
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h (renamed from lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h)24
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp91
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h (renamed from lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h)18
-rw-r--r--lib/Target/NVPTX/Makefile (renamed from lib/Target/PTX/Makefile)16
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h49
-rw-r--r--lib/Target/NVPTX/NVPTX.h137
-rw-r--r--lib/Target/NVPTX/NVPTX.td44
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp48
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h49
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp2064
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h315
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp76
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h40
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp683
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h105
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1291
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h144
-rw-r--r--lib/Target/NVPTX/NVPTXInstrFormats.td43
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp326
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h83
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td2837
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td1675
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp208
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h47
-rw-r--r--lib/Target/NVPTX/NVPTXNumRegisters.h (renamed from lib/Target/PTX/PTXMachineFunctionInfo.cpp)14
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp325
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h92
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td108
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h45
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.cpp77
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.h41
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp57
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h92
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp133
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h125
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h105
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp514
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h94
-rw-r--r--lib/Target/NVPTX/NVPTXVector.td1481
-rw-r--r--lib/Target/NVPTX/NVPTXutil.cpp92
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h25
-rw-r--r--lib/Target/NVPTX/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/NVPTX/TargetInfo/LLVMBuild.txt (renamed from lib/Target/PTX/TargetInfo/LLVMBuild.txt)8
-rw-r--r--lib/Target/NVPTX/TargetInfo/Makefile (renamed from lib/Target/PTX/TargetInfo/Makefile)4
-rw-r--r--lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp23
-rw-r--r--lib/Target/NVPTX/VectorElementize.cpp1248
-rw-r--r--lib/Target/NVPTX/cl_common_defines.h125
-rw-r--r--lib/Target/NVPTX/gen-register-defs.py202
-rw-r--r--lib/Target/PTX/CMakeLists.txt32
-rw-r--r--lib/Target/PTX/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp249
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.h45
-rw-r--r--lib/Target/PTX/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h134
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp37
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp98
-rw-r--r--lib/Target/PTX/PTX.h43
-rw-r--r--lib/Target/PTX/PTX.td141
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp561
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.h57
-rw-r--r--lib/Target/PTX/PTXFPRoundingModePass.cpp181
-rw-r--r--lib/Target/PTX/PTXFrameLowering.cpp24
-rw-r--r--lib/Target/PTX/PTXFrameLowering.h44
-rw-r--r--lib/Target/PTX/PTXISelDAGToDAG.cpp356
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp522
-rw-r--r--lib/Target/PTX/PTXISelLowering.h82
-rw-r--r--lib/Target/PTX/PTXInstrFormats.td51
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp359
-rw-r--r--lib/Target/PTX/PTXInstrInfo.h133
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td1031
-rw-r--r--lib/Target/PTX/PTXInstrLoadStore.td278
-rw-r--r--lib/Target/PTX/PTXIntrinsicInstrInfo.td110
-rw-r--r--lib/Target/PTX/PTXMCAsmStreamer.cpp556
-rw-r--r--lib/Target/PTX/PTXMCInstLower.cpp32
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp85
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.h202
-rw-r--r--lib/Target/PTX/PTXParamManager.cpp73
-rw-r--r--lib/Target/PTX/PTXParamManager.h87
-rw-r--r--lib/Target/PTX/PTXRegAlloc.cpp53
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.cpp38
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.h56
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.td36
-rw-r--r--lib/Target/PTX/PTXSelectionDAGInfo.cpp150
-rw-r--r--lib/Target/PTX/PTXSelectionDAGInfo.h53
-rw-r--r--lib/Target/PTX/PTXSubtarget.cpp68
-rw-r--r--lib/Target/PTX/PTXSubtarget.h131
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp165
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h104
-rw-r--r--lib/Target/PTX/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp25
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp27
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h2
-rw-r--r--lib/Target/PowerPC/PPC.h19
-rw-r--r--lib/Target/PowerPC/PPC.td43
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp36
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp724
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp40
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp61
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp228
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h14
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td190
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp137
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h3
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td222
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp20
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp35
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h7
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td14
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td39
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td29
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td66
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td1
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td1
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td1
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp62
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h8
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp27
-rw-r--r--lib/Target/PowerPC/README.txt1
-rw-r--r--lib/Target/PowerPC/TargetInfo/Makefile2
-rw-r--r--lib/Target/README.txt6
-rw-r--r--lib/Target/Sparc/CMakeLists.txt2
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp11
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp4
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h5
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp35
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h7
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp12
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp3
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp9
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h2
-rw-r--r--lib/Target/TargetData.cpp4
-rw-r--r--lib/Target/TargetInstrInfo.cpp60
-rw-r--r--lib/Target/TargetLibraryInfo.cpp105
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp2
-rw-r--r--lib/Target/TargetMachine.cpp54
-rw-r--r--lib/Target/TargetRegisterInfo.cpp146
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp162
-rw-r--r--lib/Target/X86/CMakeLists.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp67
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h10
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c16
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h74
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h25
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp34
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h71
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp24
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp68
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h1
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp90
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h18
-rw-r--r--lib/Target/X86/X86.h7
-rw-r--r--lib/Target/X86/X86.td52
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp10
-rw-r--r--lib/Target/X86/X86AsmPrinter.h8
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp1
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h4
-rw-r--r--lib/Target/X86/X86CallingConv.td13
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp786
-rw-r--r--lib/Target/X86/X86FastISel.cpp169
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp20
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp167
-rw-r--r--lib/Target/X86/X86FrameLowering.h2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp316
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp2939
-rw-r--r--lib/Target/X86/X86ISelLowering.h73
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td6
-rw-r--r--lib/Target/X86/X86InstrBuilder.h16
-rw-r--r--lib/Target/X86/X86InstrCompiler.td24
-rw-r--r--lib/Target/X86/X86InstrControl.td48
-rw-r--r--lib/Target/X86/X86InstrExtension.td8
-rw-r--r--lib/Target/X86/X86InstrFMA.td326
-rw-r--r--lib/Target/X86/X86InstrFPStack.td185
-rw-r--r--lib/Target/X86/X86InstrFormats.td33
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td46
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1099
-rw-r--r--lib/Target/X86/X86InstrInfo.h46
-rw-r--r--lib/Target/X86/X86InstrInfo.td586
-rw-r--r--lib/Target/X86/X86InstrMMX.td421
-rw-r--r--lib/Target/X86/X86InstrSSE.td1390
-rw-r--r--lib/Target/X86/X86InstrSystem.td293
-rw-r--r--lib/Target/X86/X86InstrVMX.td8
-rw-r--r--lib/Target/X86/X86InstrXOP.td109
-rw-r--r--lib/Target/X86/X86JITInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp110
-rw-r--r--lib/Target/X86/X86MCInstLower.h6
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h20
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp111
-rw-r--r--lib/Target/X86/X86RegisterInfo.h14
-rw-r--r--lib/Target/X86/X86RegisterInfo.td111
-rw-r--r--lib/Target/X86/X86Relocations.h2
-rw-r--r--lib/Target/X86/X86Schedule.td218
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td229
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp8
-rw-r--r--lib/Target/X86/X86Subtarget.cpp89
-rw-r--r--lib/Target/X86/X86Subtarget.h18
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp23
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp13
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h10
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp6
-rw-r--r--lib/Target/XCore/CMakeLists.txt2
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp14
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp10
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h3
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp50
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h11
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td16
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp10
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
426 files changed, 45746 insertions, 18578 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 9b0cb0c9e575..69e2346dc0b8 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
FeatureAvoidPartialCPSR]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, GenericItineraries, Features>;
+ : Processor<Name, NoItineraries, Features>;
// V4 Processors.
def : ProcNoItin<"generic", []>;
@@ -204,13 +204,13 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
FeatureDSPThumb2]>;
// V7a Processors.
-def : Processor<"cortex-a8", CortexA8Itineraries,
+def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9", CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9-mp", CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
FeatureHasRAS]>;
@@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops,
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP2,
+ FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 410790a7baa0..8536b94d716f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,8 +23,8 @@
#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Assembly/Writer.h"
@@ -283,9 +283,16 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
}
}
-void ARMAsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer.ForceCodeRegion();
+void ARMAsmPrinter::EmitFunctionBodyEnd() {
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+}
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
if (AFI->isThumbFunction()) {
OutStreamer.EmitAssemblerFlag(MCAF_Code16);
OutStreamer.EmitThumbFunc(CurrentFnSym);
@@ -415,7 +422,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
case 'a': // Print as a memory address.
if (MI->getOperand(OpNum).isReg()) {
O << "["
@@ -434,15 +443,18 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printOperand(MI, OpNum, O);
return false;
case 'y': // Print a VFP single precision register as indexed double.
- // This uses the ordering of the alias table to get the first 'd' register
- // that overlaps the 's' register. Also, s0 is an odd register, hence the
- // odd modulus check below.
if (MI->getOperand(OpNum).isReg()) {
unsigned Reg = MI->getOperand(OpNum).getReg();
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
- O << ARMInstPrinter::getRegisterName(TRI->getAliasSet(Reg)[0]) <<
- (((Reg % 2) == 1) ? "[0]" : "[1]");
- return false;
+ // Find the 'd' register that has this 's' register as a sub-register,
+ // and determine the lane number.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (!ARM::DPRRegClass.contains(*SR))
+ continue;
+ bool Lane0 = TRI->getSubReg(*SR, ARM::ssub_0) == Reg;
+ O << ARMInstPrinter::getRegisterName(*SR) << (Lane0 ? "[0]" : "[1]");
+ return false;
+ }
}
return true;
case 'B': // Bitwise inverse of integer or symbol without a preceding #.
@@ -517,10 +529,23 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
+ // This modifier is not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'H': // The highest-numbered register of a pair.
return true;
+ case 'H': // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
+ RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
+
+ unsigned Reg = RC.getRegister(RegIdx);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
}
}
@@ -934,13 +959,13 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
- // Tag the jump table appropriately for precise disassembly.
- OutStreamer.EmitJumpTable32Region();
-
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+
// Emit each entry of the table.
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -969,6 +994,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
OutContext);
OutStreamer.EmitValue(Expr, 4);
}
+ // Mark the end of jump table data-in-code region.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
@@ -978,15 +1005,6 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
- // Emit a label for the jump table.
- if (MI->getOpcode() == ARM::t2TBB_JT) {
- OutStreamer.EmitJumpTable8Region();
- } else if (MI->getOpcode() == ARM::t2TBH_JT) {
- OutStreamer.EmitJumpTable16Region();
- } else {
- OutStreamer.EmitJumpTable32Region();
- }
-
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
@@ -995,10 +1013,15 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
unsigned OffsetWidth = 4;
- if (MI->getOpcode() == ARM::t2TBB_JT)
+ if (MI->getOpcode() == ARM::t2TBB_JT) {
OffsetWidth = 1;
- else if (MI->getOpcode() == ARM::t2TBH_JT)
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT8);
+ } else if (MI->getOpcode() == ARM::t2TBH_JT) {
OffsetWidth = 2;
+ // Mark the jump table as data-in-code.
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT16);
+ }
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
MachineBasicBlock *MBB = JTBBs[i];
@@ -1031,6 +1054,11 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
OutContext);
OutStreamer.EmitValue(Expr, OffsetWidth);
}
+ // Mark the end of jump table data-in-code region. 32-bit offsets use
+ // actual branch instructions here, so we don't mark those as a data-region
+ // at all.
+ if (OffsetWidth != 4)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
@@ -1121,8 +1149,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(SrcReg == ARM::SP &&
"Only stack pointer as a source reg is supported");
for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
- i != NumOps; ++i)
- RegList.push_back(MI->getOperand(i).getReg());
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
@@ -1208,8 +1242,11 @@ extern cl::opt<bool> EnableARMEHABI;
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY)
- OutStreamer.EmitCodeRegion();
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+ InConstantPool = false;
+ }
// Emit unwinding stuff for frame-related instructions
if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
@@ -1565,9 +1602,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- // Mark the constant pool entry as data if we're not already in a data
- // region.
- OutStreamer.EmitDataRegion();
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegion);
+ InConstantPool = true;
+ }
+
OutStreamer.EmitLabel(GetCPISymbol(LabelId));
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index af3f75a0e892..3555e8f50a0d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -44,9 +44,12 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// MachineFunction.
const MachineConstantPool *MCP;
+ /// InConstantPool - Maintain state when emitting a sequence of constant
+ /// pool entries so we can properly mark them as data regions.
+ bool InConstantPool;
public:
explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
}
@@ -70,6 +73,7 @@ public:
bool runOnMachineFunction(MachineFunction &F);
virtual void EmitConstantPool() {} // we emit constant pools customly!
+ virtual void EmitFunctionBodyEnd();
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c6280f819a4f..057fd718fdb5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -51,9 +51,9 @@ WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
- unsigned MLxOpc; // MLA / MLS opcode
- unsigned MulOpc; // Expanded multiplication opcode
- unsigned AddSubOpc; // Expanded add / sub opcode
+ uint16_t MLxOpc; // MLA / MLS opcode
+ uint16_t MulOpc; // Expanded multiplication opcode
+ uint16_t AddSubOpc; // Expanded add / sub opcode
bool NegAcc; // True if the acc is negated before the add / sub.
bool HasLane; // True if instruction has an extra "lane" operand.
};
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
}
break;
case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
- case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
}
break;
case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -1531,11 +1575,11 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
/// This will go away once we can teach tblgen how to set the optional CPSR def
/// operand itself.
struct AddSubFlagsOpcodePair {
- unsigned PseudoOpc;
- unsigned MachineOpc;
+ uint16_t PseudoOpc;
+ uint16_t MachineOpc;
};
-static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::ADDSri, ARM::ADDri},
{ARM::ADDSrr, ARM::ADDrr},
{ARM::ADDSrsi, ARM::ADDrsi},
@@ -1563,14 +1607,9 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
};
unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
- static const int NPairs =
- sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
- for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
- *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
- if (OldOpc == OpcPair->PseudoOpc) {
- return OpcPair->MachineOpc;
- }
- }
+ for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
+ if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
+ return AddSubFlagsOpcodeMap[i].MachineOpc;
return 0;
}
@@ -1742,20 +1781,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
- int &CmpValue) const {
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ case ARM::CMPrr:
+ case ARM::t2CMPrr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case ARM::TSTri:
case ARM::t2TSTri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = MI->getOperand(1).getImm();
CmpValue = 0;
return true;
@@ -1793,20 +1845,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register.
-bool ARMBaseInstrInfo::
-OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
- if (CmpValue != 0)
- return false;
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
- return false;
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
- MachineInstr *MI = &*DI;
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
@@ -1825,32 +1924,49 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
}
- // Conservatively refuse to convert an instruction which isn't in the same BB
- // as the comparison.
- if (MI->getParent() != CmpInstr->getParent())
- return false;
-
- // Check that CPSR isn't set between the comparison instruction and the one we
- // want to change.
- MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
+ // There are two possible candidates which can be changed to set CPSR:
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
+ MachineInstr *Sub = NULL;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = NULL;
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ // Conservatively refuse to convert an instruction which isn't in the same
+ // BB as the comparison.
+ // For CMPri, we need to check Sub, thus we can't return here.
+ if (CmpInstr->getOpcode() == ARM::CMPri ||
+ CmpInstr->getOpcode() == ARM::t2CMPri)
+ MI = NULL;
+ else
+ return false;
+ }
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
- return false;
- if (!MO.isReg()) continue;
-
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
- if (MO.getReg() == ARM::CPSR)
- return false;
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ Sub = &*I;
+ break;
}
if (I == B)
@@ -1858,7 +1974,13 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
return false;
}
- // Set the "zero" bit in CPSR.
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
@@ -1894,13 +2016,17 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
case ARM::EORri:
case ARM::t2EORrr:
case ARM::t2EORri: {
- // Scan forward for the use of CPSR, if it's a conditional code requires
- // checking of V bit, then this is not safe to do. If we can't find the
- // CPSR use (i.e. used in another block), then it's not safe to perform
- // the optimization.
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code requires
+ // checking of V bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
- E = MI->getParent()->end();
+ E = CmpInstr->getParent()->end();
while (!isSafe && ++I != E) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands();
@@ -1918,28 +2044,56 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
// Condition code is after the operand before CPSR.
ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
- switch (CC) {
- default:
- isSafe = true;
- break;
- case ARMCC::VS:
- case ARMCC::VC:
- case ARMCC::GE:
- case ARMCC::LT:
- case ARMCC::GT:
- case ARMCC::LE:
- return false;
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg)
+ OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
+ NewCC));
}
+ else
+ switch (CC) {
+ default:
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
}
}
- if (!isSafe)
- return false;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
+ }
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
CmpInstr->eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
return true;
}
}
@@ -2071,9 +2225,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (UOps)
- return UOps;
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0)
+ return ItinUOps;
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2088,7 +2242,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
//
// On Cortex-A8, each pair of register loads / stores can be scheduled on the
// same cycle. The scheduling for the first load / store must be done
- // separately by assuming the the address is not 64-bit aligned.
+ // separately by assuming the address is not 64-bit aligned.
//
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
@@ -2147,19 +2301,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
- UOps = (NumRegs / 2);
+ int A8UOps = (NumRegs / 2);
if (NumRegs % 2)
- ++UOps;
- return UOps;
+ ++A8UOps;
+ return A8UOps;
} else if (Subtarget.isCortexA9()) {
- UOps = (NumRegs / 2);
+ int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) ||
!MI->hasOneMemOperand() ||
(*MI->memoperands_begin())->getAlignment() < 8)
- ++UOps;
- return UOps;
+ ++A9UOps;
+ return A9UOps;
} else {
// Assume the worst.
return NumRegs;
@@ -2478,82 +2632,14 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
return II;
}
-int
-ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
- return 1;
-
- if (!ItinData || ItinData->isEmpty())
- return DefMI->mayLoad() ? 3 : 1;
-
- const MCInstrDesc *DefMCID = &DefMI->getDesc();
- const MCInstrDesc *UseMCID = &UseMI->getDesc();
- const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- unsigned Reg = DefMO.getReg();
- if (Reg == ARM::CPSR) {
- if (DefMI->getOpcode() == ARM::FMSTAT) {
- // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
- return Subtarget.isCortexA9() ? 1 : 20;
- }
-
- // CPSR set and branch can be paired in the same cycle.
- if (UseMI->isBranch())
- return 0;
-
- // Otherwise it takes the instruction latency (generally one).
- int Latency = getInstrLatency(ItinData, DefMI);
-
- // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
- // its uses. Instructions which are otherwise scheduled between them may
- // incur a code size penalty (not able to use the CPSR setting 16-bit
- // instructions).
- if (Latency > 0 && Subtarget.isThumb2()) {
- const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- --Latency;
- }
- return Latency;
- }
-
- unsigned DefAlign = DefMI->hasOneMemOperand()
- ? (*DefMI->memoperands_begin())->getAlignment() : 0;
- unsigned UseAlign = UseMI->hasOneMemOperand()
- ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-
- unsigned DefAdj = 0;
- if (DefMI->isBundle()) {
- DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
- return 1;
- DefMCID = &DefMI->getDesc();
- }
- unsigned UseAdj = 0;
- if (UseMI->isBundle()) {
- unsigned NewUseIdx;
- const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
- Reg, NewUseIdx, UseAdj);
- if (NewUseMI) {
- UseMI = NewUseMI;
- UseIdx = NewUseIdx;
- UseMCID = &UseMI->getDesc();
- }
- }
-
- int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
- *UseMCID, UseIdx, UseAlign);
- int Adj = DefAdj + UseAdj;
- if (Adj) {
- Latency -= (int)(DefAdj + UseAdj);
- if (Latency < 1)
- return 1;
- }
-
- if (Latency > 1 &&
- (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+/// Return the number of cycles to add to (or subtract from) the static
+/// itinerary based on the def opcode and alignment. The caller will ensure that
+/// adjusted latency is at least one cycle.
+static int adjustDefLatency(const ARMSubtarget &Subtarget,
+ const MachineInstr *DefMI,
+ const MCInstrDesc *DefMCID, unsigned DefAlign) {
+ int Adjust = 0;
+ if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID->getOpcode()) {
@@ -2564,7 +2650,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
- --Latency;
+ --Adjust;
break;
}
case ARM::t2LDRs:
@@ -2574,13 +2660,13 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// Thumb2 mode: lsl only.
unsigned ShAmt = DefMI->getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 2)
- --Latency;
+ --Adjust;
break;
}
}
}
- if (DefAlign < 8 && Subtarget.isCortexA9())
+ if (DefAlign < 8 && Subtarget.isCortexA9()) {
switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -2689,10 +2775,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4LNq32_UPD:
// If the address is not 64-bit aligned, the latencies of these
// instructions increases by one.
- ++Latency;
+ ++Adjust;
break;
}
+ }
+ return Adjust;
+}
+
+
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ // No operand latency. The caller may fall back to getInstrLatency.
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ DefMCID = &DefMI->getDesc();
+ }
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef()) {
+ return 1;
+ }
+
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+
+ if (Reg == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseMI->isBranch())
+ return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ unsigned Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
+ }
+
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ // Get the itinerary's latency if possible, and handle variable_ops.
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ // Unable to find operand latency. The caller may resort to getInstrLatency.
+ if (Latency < 0)
+ return Latency;
+
+ // Adjust for IT block position.
+ int Adj = DefAdj + UseAdj;
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ // Return the itinerary latency, which may be zero but not less than zero.
return Latency;
}
@@ -2892,22 +3069,20 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
return 1;
// If the second MI is predicated, then there is an implicit use dependency.
- return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
- DepMI->getNumOperands());
+ return getInstrLatency(ItinData, DefMI);
}
-int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
if (MI->isCopyLike() || MI->isInsertSubreg() ||
MI->isRegSequence() || MI->isImplicitDef())
return 1;
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
+ // An instruction scheduler typically runs on unbundled instructions, however
+ // other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
- int Latency = 0;
+ unsigned Latency = 0;
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
@@ -2918,15 +3093,33 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &MCID = MI->getDesc();
- unsigned Class = MCID.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)))
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
- if (UOps)
- return ItinData->getStageLatency(Class);
- return getNumMicroOps(ItinData, MI);
+ }
+ // Be sure to call getStageLatency for an empty itinerary in case it has a
+ // valid MinLatency property.
+ if (!ItinData)
+ return MI->mayLoad() ? 3 : 1;
+
+ unsigned Class = MCID.getSchedClass();
+
+ // For instructions with variable uops, use uops as latency.
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
+ return getNumMicroOps(ItinData, MI);
+
+ // For the common case, fall back on the itinerary's latency.
+ unsigned Latency = ItinData->getStageLatency(Class);
+
+ // Adjust for dynamic def-side opcode variants not captured by the itinerary.
+ unsigned DefAlign = MI->hasOneMemOperand()
+ ? (*MI->memoperands_begin())->getAlignment() : 0;
+ int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
+ if (Adj >= 0 || (int)Latency > -Adj) {
+ return Latency + Adj;
+ }
+ return Latency;
}
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
@@ -2960,7 +3153,10 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
- int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx,
+ /*FindMin=*/false);
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
if (Latency <= 3)
return false;
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2fe85072a330..1a10a4ab1c52 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -186,16 +186,20 @@ public:
return NumCycles == 1;
}
- /// AnalyzeCompare - For a comparison instruction, return the source register
- /// in SrcReg and the value it compares against in CmpValue. Return true if
- /// the comparison instruction can be analyzed.
- virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- int &CmpMask, int &CmpValue) const;
-
- /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
- /// that we can remove a "comparison with zero".
- virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
- int CmpMask, int CmpValue,
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const;
+
+ /// optimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero"; Remove a redundant CMP
+ /// instruction if the flags can be updated in the same way by an earlier
+ /// instruction such as SUB.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
@@ -249,8 +253,9 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
- int getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI, unsigned *PredCost = 0) const;
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3907f7535260..9deb96ea9e01 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,12 +62,26 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ bool ghcCall = false;
+
+ if (MF) {
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ if (ghcCall) {
+ return CSR_GHC_SaveList;
+ }
+ else {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ }
}
const uint32_t*
ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
- return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
BitVector ARMBaseRegisterInfo::
@@ -257,8 +271,9 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
}
const TargetRegisterClass *
-ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
- return ARM::GPRRegisterClass;
+ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::GPRRegClass;
}
const TargetRegisterClass *
@@ -369,7 +384,7 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
};
// We only support even/odd hints for GPR and rGPR.
- if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+ if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass)
return RC->getRawAllocationOrder(MF);
if (HintType == ARMRI::RegPairEven) {
@@ -712,6 +727,11 @@ requiresRegisterScavenging(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
+trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+}
+
+bool ARMBaseRegisterInfo::
requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
@@ -932,7 +952,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
+ const MachineFunction &MF = *MBB->getParent();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset));
@@ -1110,7 +1131,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Must be addrmode4/6.
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
- ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+ ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index af7935147e48..da29f7e711d6 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -109,7 +109,8 @@ public:
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const;
- const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
const TargetRegisterClass*
getCrossCopyRegClass(const TargetRegisterClass *RC) const;
@@ -173,6 +174,8 @@ public:
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b9a25126ba67..bda1517685b1 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 32ef345c058f..e81b4cc282c9 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -254,7 +254,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
uint32_t Binary;
Binary = Imm12 & 0xfff;
@@ -296,7 +296,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
// Special value for #-0
@@ -352,6 +352,12 @@ namespace {
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
intptr_t JTBase = 0) const;
+ unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
};
}
@@ -440,7 +446,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
if (MO.isReg())
- return getARMRegisterNumbering(MO.getReg());
+ return II->getRegisterInfo().getEncodingValue(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isGlobal())
@@ -776,7 +782,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement.
Binary |= 1 << ARMII::I_BitShift;
@@ -963,7 +969,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
if (Rs) {
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
// Encode shift_imm bit[11:7].
@@ -1014,7 +1020,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
else if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
if (MCID.Opcode == ARM::MOVi16) {
// Get immediate from MI.
@@ -1064,7 +1070,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (!isUnary) {
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else {
Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
++OpIdx;
@@ -1081,7 +1087,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (MO.isReg()) {
// Encode register Rm.
- emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
return;
}
@@ -1124,14 +1130,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
// Set first operand
if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1158,7 +1164,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
Binary |= 1 << ARMII::I_BitShift;
assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
// Set bit[3:0] to the corresponding Rm register
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
// If this instr is in scaled register offset/index instruction, set
// shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1202,7 +1208,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1221,7 +1227,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// If this instr is in register offset/index encoding, set bit[3:0]
// to the corresponding Rm register.
if (MO2.getReg()) {
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
emitWordLE(Binary);
return;
}
@@ -1287,7 +1293,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
break;
- unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
RegNum < 16);
Binary |= 0x1 << RegNum;
@@ -1530,7 +1536,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
// The return register is LR.
- Binary |= getARMRegisterNumbering(ARM::LR);
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
else
// otherwise, set the return register
Binary |= getMachineOpValue(MI, 0);
@@ -1538,11 +1544,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
- RegD = getARMRegisterNumbering(RegD);
+ bool isSPVFP = ARM::SPRRegClass.contains(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
if (!isSPVFP)
Binary |= RegD << ARMII::RegRdShift;
else {
@@ -1552,11 +1559,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
- RegN = getARMRegisterNumbering(RegN);
+ bool isSPVFP = ARM::SPRRegClass.contains(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
if (!isSPVFP)
Binary |= RegN << ARMII::RegRnShift;
else {
@@ -1566,11 +1574,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
- RegM = getARMRegisterNumbering(RegM);
+ bool isSPVFP = ARM::SPRRegClass.contains(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
if (!isSPVFP)
Binary |= RegM;
else {
@@ -1757,28 +1766,31 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegD = getARMRegisterNumbering(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
Binary |= (RegD & 0xf) << ARMII::RegRdShift;
Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
return Binary;
}
-static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegN = getARMRegisterNumbering(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
Binary |= (RegN & 0xf) << ARMII::RegRnShift;
Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
return Binary;
}
-static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegM = getARMRegisterNumbering(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
Binary |= (RegM & 0xf);
Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
return Binary;
@@ -1812,7 +1824,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, RegNOpIdx);
@@ -1841,7 +1853,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(1).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, 0);
emitWordLE(Binary);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index fc35c7cb0200..a9539850247f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -69,27 +69,6 @@ static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
return 0;
}
-/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
-/// add padding such that:
-///
-/// 1. The result is aligned to 1 << LogAlign.
-///
-/// 2. No other value of the unknown bits would require more padding.
-///
-/// This may add more padding than is required to satisfy just one of the
-/// constraints. It is necessary to compute alignment this way to guarantee
-/// that we don't underestimate the padding before an aligned block. If the
-/// real padding before a block is larger than we think, constant pool entries
-/// may go out of range.
-static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
- unsigned KnownBits) {
- // Add the worst possible padding that the unknown bits could cause.
- Offset += UnknownPadding(LogAlign, KnownBits);
-
- // Then align the result.
- return RoundUpToAlignment(Offset, 1u << LogAlign);
-}
-
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -109,7 +88,12 @@ namespace {
/// Offset - Distance from the beginning of the function to the beginning
/// of this basic block.
///
- /// The offset is always aligned as required by the basic block.
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
unsigned Offset;
/// Size - Size of the basic block in bytes. If the block contains
@@ -140,7 +124,12 @@ namespace {
/// This number should be used to predict worst case padding when
/// splitting the block.
unsigned internalKnownBits() const {
- return Unalign ? Unalign : KnownBits;
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = CountTrailingZeros_32(Size);
+ return Bits;
}
/// Compute the offset immediately following this block. If LogAlign is
@@ -152,7 +141,7 @@ namespace {
if (!LA)
return PO;
// Add alignment padding from the terminator.
- return WorstCaseAlign(PO, LA, internalKnownBits());
+ return PO + UnknownPadding(LA, internalKnownBits());
}
/// Compute the number of known low bits of postOffset. If this block
@@ -342,9 +331,7 @@ void ARMConstantIslands::verify() {
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- unsigned Align = MBB->getAlignment();
unsigned MBBId = MBB->getNumber();
- assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
@@ -428,7 +415,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF->EnsureAlignment(2); // 2 = log2(4)
+ MF->ensureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
@@ -529,7 +516,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// The function needs to be as aligned as the basic blocks. The linker may
// move functions around based on their alignment.
- MF->EnsureAlignment(BB->getAlignment());
+ MF->ensureAlignment(BB->getAlignment());
// Order the entries in BB by descending alignment. That ensures correct
// alignment of all entries as long as BB is sufficiently aligned. Keep
@@ -828,7 +815,7 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
// tBR_JTr contains a .align 2 directive.
if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
BBI.PostAlign = 2;
- MBB->getParent()->EnsureAlignment(2);
+ MBB->getParent()->ensureAlignment(2);
}
}
@@ -1045,7 +1032,6 @@ bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = getOffsetOf(CPEMI);
- assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
DEBUG({
@@ -1256,11 +1242,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (BBHasFallthrough(UserMBB)) {
// Size of branch to insert.
unsigned Delta = isThumb1 ? 2 : 4;
- // End of UserBlock after adding a branch.
- unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
// Compute the offset where the CPE will begin.
- unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
- UserBBI.postKnownBits());
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
@@ -1299,20 +1282,16 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// up the insertion point.
// Try to split the block so it's fully aligned. Compute the latest split
- // point where we can add a 4-byte branch instruction, and then
- // WorstCaseAlign to LogAlign.
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
unsigned LogAlign = MF->getAlignment();
assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
unsigned KnownBits = UserBBI.internalKnownBits();
unsigned UPad = UnknownPadding(LogAlign, KnownBits);
- unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
DEBUG(dbgs() << format("Split in middle of big block before %#x",
BaseInsertOffset));
- // Account for alignment and unknown padding.
- BaseInsertOffset &= ~((1u << LogAlign) - 1);
- BaseInsertOffset -= UPad;
-
// The 4 in the following is for the unconditional branch we'll be inserting
// (allows for long branch on Thumb1). Alignment of the island is handled
// inside isOffsetInRange.
@@ -1327,11 +1306,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// pool entries following this block; only the last one is in the water list.
// Back past any possible branches (allow for a conditional and a maximally
// long unconditional).
- if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
- BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset =
- WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
CPEMI->getOperand(2).getImm();
MachineBasicBlock::iterator MI = UserMI;
++MI;
@@ -1342,6 +1321,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
MI = llvm::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
@@ -1353,9 +1333,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// reused within the block, but it doesn't matter much. Also assume CPEs
// are added in order with alignment padding. We may eventually be able
// to pack the aligned CPEs better.
- EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
- 1u << getCPELogAlign(U.CPEMI)) +
- U.CPEMI->getOperand(2).getImm();
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
CPUIndex++;
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 5fc0360528cc..15bb32eb149f 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -459,22 +459,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0);
+ MIB.addReg(D0, getUndefRegState(SrcIsUndef));
if (NumRegs > 1 && TableEntry->copyAllListRegs)
- MIB.addReg(D1);
+ MIB.addReg(D1, getUndefRegState(SrcIsUndef));
if (NumRegs > 2 && TableEntry->copyAllListRegs)
- MIB.addReg(D2);
+ MIB.addReg(D2, getUndefRegState(SrcIsUndef));
if (NumRegs > 3 && TableEntry->copyAllListRegs)
- MIB.addReg(D3);
+ MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
- if (SrcIsKill) // Add an implicit kill for the super-reg.
+ if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
@@ -925,7 +926,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (isARM) {
AddDefaultPred(MIB3);
if (Opcode == ARM::MOV_ga_pcrel_ldr)
- MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
@@ -1008,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
&ARM::DPR_VFP2RegClass);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 2e1eaca85b5b..57f811603945 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -47,11 +47,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static cl::opt<bool>
-DisableARMFastISel("disable-arm-fast-isel",
- cl::desc("Turn off experimental ARM fast-isel support"),
- cl::init(false), cl::Hidden);
-
extern cl::opt<bool> EnableARMLongCalls;
namespace {
@@ -92,8 +87,9 @@ class ARMFastISel : public FastISel {
LLVMContext *Context;
public:
- explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
- : FastISel(funcInfo),
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
TM(funcInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()) {
@@ -172,6 +168,7 @@ class ARMFastISel : public FastISel {
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
+ bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
private:
@@ -182,7 +179,6 @@ class ARMFastISel : public FastISel {
bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
-
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
@@ -195,21 +191,25 @@ class ARMFastISel : public FastISel {
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
- unsigned ARMSelectCallOp(const GlobalValue *GV);
+ unsigned ARMSelectCallOp(bool UseReg);
// Call handling routines.
private:
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
- unsigned &NumBytes);
+ unsigned &NumBytes,
+ bool isVarArg);
+ unsigned getLibcallReg(const Twine &Name);
bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes);
+ unsigned &NumBytes, bool isVarArg);
bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
// OptionalDef handling routines.
@@ -719,7 +719,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
MVT VT;
- if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
@@ -910,8 +910,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
- const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass
- : ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -1005,7 +1006,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
useAM3 = true;
}
}
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
break;
case MVT::i16:
if (isThumb2) {
@@ -1017,7 +1018,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
useAM3 = true;
}
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
break;
case MVT::i32:
if (isThumb2) {
@@ -1028,7 +1029,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
} else {
Opc = ARM::LDRi12;
}
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
@@ -1037,7 +1038,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
needVMOV = true;
VT = MVT::i32;
Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
} else {
Opc = ARM::VLDRS;
RC = TLI.getRegClassFor(VT);
@@ -1106,8 +1107,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
- ARM::GPRRegisterClass);
+ unsigned Res = createResultReg(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
@@ -1358,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(AddrReg));
- return true;
+ return true;
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -1423,12 +1425,12 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
if (!UseImm)
CmpOpc = ARM::t2CMPrr;
else
- CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+ CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
} else {
if (!UseImm)
CmpOpc = ARM::CMPrr;
else
- CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+ CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
}
break;
}
@@ -1491,8 +1493,9 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
// Now set a register based on the comparison. Explicitly set the predicates
// here.
unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
- const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
@@ -1516,7 +1519,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
unsigned Op = getRegForValue(V);
if (Op == 0) return false;
- unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ unsigned Result = createResultReg(&ARM::DPRRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VCVTDS), Result)
.addReg(Op));
@@ -1535,7 +1538,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
unsigned Op = getRegForValue(V);
if (Op == 0) return false;
- unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ unsigned Result = createResultReg(&ARM::SPRRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VCVTSD), Result)
.addReg(Op));
@@ -1736,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
// type and the target independent selector doesn't know how to handle it.
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
-
+
unsigned Opc;
switch (ISDOpcode) {
default: return false;
@@ -1809,10 +1812,11 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// Call Handling Code
-// This is largely taken directly from CCAssignFnForNode - we don't support
-// varargs in FastISel so that part has been removed.
+// This is largely taken directly from CCAssignFnForNode
// TODO: We may not support all of this.
-CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
@@ -1825,18 +1829,26 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- TM.Options.FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
} else
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
case CallingConv::ARM_AAPCS_VFP:
- return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ // Fall through to soft float variant, variadic functions don't
+ // use hard floating point ABI.
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::GHC:
+ if (Return)
+ llvm_unreachable("Can't return in GHC call convention");
+ else
+ return CC_ARM_APCS_GHC;
}
}
@@ -1846,10 +1858,12 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
- unsigned &NumBytes) {
+ unsigned &NumBytes,
+ bool isVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
+ CCAssignFnForCall(CC, false, isVarArg));
// Check that we can handle all of the arguments. If we can't, then bail out
// now before we add code to the MBB.
@@ -1981,7 +1995,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes) {
+ unsigned &NumBytes, bool isVarArg) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -1991,8 +2005,8 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
- CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
// Copy all of the result registers out of their specified physreg.
if (RVLocs.size() == 2 && RetVT == MVT::f64) {
@@ -2041,9 +2055,6 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
- if (F.isVarArg())
- return false;
-
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
@@ -2053,7 +2064,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
+ F.isVarArg()));
const Value *RV = Ret->getOperand(0);
unsigned Reg = getRegForValue(RV);
@@ -2110,12 +2122,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return true;
}
-unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
- if (isThumb2) {
- return ARM::tBL;
- } else {
- return ARM::BL;
- }
+unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
+ if (UseReg)
+ return isThumb2 ? ARM::tBLXr : ARM::BLX;
+ else
+ return isThumb2 ? ARM::tBL : ARM::BL;
+}
+
+unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0, Name);
+ return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
}
// A quick function that will emit a call for a named libcall in F with the
@@ -2136,8 +2153,14 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
else if (!isTypeLegal(RetTy, RetVT))
return false;
- // TODO: For now if we have long calls specified we don't handle the call.
- if (EnableARMLongCalls) return false;
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
@@ -2170,23 +2193,36 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Handle the arguments now that we've gotten them.
SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, false))
return false;
+ unsigned CalleeReg = 0;
+ if (EnableARMLongCalls) {
+ CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
+ if (CalleeReg == 0) return false;
+ }
+
// Issue the call.
- MachineInstrBuilder MIB;
- unsigned CallOpc = ARMSelectCallOp(NULL);
- if (isThumb2)
- // Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc)))
- .addExternalSymbol(TLI.getLibcallName(Call));
- else
+ unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(CallOpc));
+ if (isThumb2) {
// Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addExternalSymbol(TLI.getLibcallName(Call)));
+ AddDefaultPred(MIB);
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
+ } else {
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
+ // Explicitly adding the predicate here.
+ AddDefaultPred(MIB);
+ }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@@ -2197,7 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
@@ -2213,22 +2249,15 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Can't handle inline asm.
if (isa<InlineAsm>(Callee)) return false;
- // Only handle global variable Callees.
- const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
- return false;
-
// Check the calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
// TODO: Avoid some calling conventions?
- // Let SDISel handle vararg functions.
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
- return false;
+ bool isVarArg = FTy->isVarArg();
// Handle *simple* calls for now.
Type *RetTy = I->getType();
@@ -2239,8 +2268,15 @@ bool ARMFastISel::SelectCall(const Instruction *I,
RetVT != MVT::i8 && RetVT != MVT::i1)
return false;
- // TODO: For now if we have long calls specified we don't handle the call.
- if (EnableARMLongCalls) return false;
+ // Can't handle non-double multi-reg retvals.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
+ RetVT != MVT::i16 && RetVT != MVT::i32) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
+ if (RVLocs.size() >= 2 && RetVT != MVT::f64)
+ return false;
+ }
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
@@ -2295,33 +2331,49 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Handle the arguments now that we've gotten them.
SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, isVarArg))
return false;
+ bool UseReg = false;
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || EnableARMLongCalls) UseReg = true;
+
+ unsigned CalleeReg = 0;
+ if (UseReg) {
+ if (IntrMemName)
+ CalleeReg = getLibcallReg(IntrMemName);
+ else
+ CalleeReg = getRegForValue(Callee);
+
+ if (CalleeReg == 0) return false;
+ }
+
// Issue the call.
- MachineInstrBuilder MIB;
- unsigned CallOpc = ARMSelectCallOp(GV);
- // Explicitly adding the predicate here.
+ unsigned CallOpc = ARMSelectCallOp(UseReg);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(CallOpc));
if(isThumb2) {
// Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc)));
- if (!IntrMemName)
+ AddDefaultPred(MIB);
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
- else
+ else
MIB.addExternalSymbol(IntrMemName, 0);
} else {
- if (!IntrMemName)
- // Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addGlobalAddress(GV, 0, 0));
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
else
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addExternalSymbol(IntrMemName, 0));
+ MIB.addExternalSymbol(IntrMemName, 0);
+
+ // Explicitly adding the predicate here.
+ AddDefaultPred(MIB);
}
-
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@@ -2332,7 +2384,8 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
+ return false;
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
@@ -2383,6 +2436,42 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ unsigned LdrOpc;
+ const TargetRegisterClass *RC;
+ if (isThumb2) {
+ LdrOpc = ARM::t2LDRi12;
+ RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
+ } else {
+ LdrOpc = ARM::LDRi12;
+ RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
+ }
+
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = FramePtr;
+
+ // Recursively load frame address
+ // ldr r0 [fp]
+ // ldr r0 [r0]
+ // ldr r0 [r0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(LdrOpc), DestReg)
+ .addReg(SrcReg).addImm(0));
+ SrcReg = DestReg;
+ }
+ UpdateValueMap(&I, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const MemTransferInst &MTI = cast<MemTransferInst>(I);
@@ -2406,10 +2495,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return true;
}
}
-
+
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
return false;
@@ -2421,20 +2510,24 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// Don't handle volatile.
if (MSI.isVolatile())
return false;
-
+
if (!MSI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MSI.getDestAddressSpace() > 255)
return false;
-
+
return SelectCall(&I, "memset");
}
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP));
+ return true;
+ }
}
}
bool ARMFastISel::SelectTrunc(const Instruction *I) {
- // The high bits for a type smaller than the register size are assumed to be
+ // The high bits for a type smaller than the register size are assumed to be
// undefined.
Value *Op = I->getOperand(0);
@@ -2522,6 +2615,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
return true;
}
+bool ARMFastISel::SelectShift(const Instruction *I,
+ ARM_AM::ShiftOpc ShiftTy) {
+ // We handle thumb2 mode by target independent selector
+ // or SelectionDAG ISel.
+ if (isThumb2)
+ return false;
+
+ // Only handle i32 now.
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+ if (DestVT != MVT::i32)
+ return false;
+
+ unsigned Opc = ARM::MOVsr;
+ unsigned ShiftImm;
+ Value *Src2Value = I->getOperand(1);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+ ShiftImm = CI->getZExtValue();
+
+ // Fall back to selection DAG isel if the shift amount
+ // is zero or greater than the width of the value type.
+ if (ShiftImm == 0 || ShiftImm >=32)
+ return false;
+
+ Opc = ARM::MOVsi;
+ }
+
+ Value *Src1Value = I->getOperand(0);
+ unsigned Reg1 = getRegForValue(Src1Value);
+ if (Reg1 == 0) return false;
+
+ unsigned Reg2;
+ if (Opc == ARM::MOVsr) {
+ Reg2 = getRegForValue(Src2Value);
+ if (Reg2 == 0) return false;
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ if(ResultReg == 0) return false;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg1);
+
+ if (Opc == ARM::MOVsi)
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+ else if (Opc == ARM::MOVsr) {
+ MIB.addReg(Reg2);
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+ }
+
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
@@ -2582,6 +2730,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
+ case Instruction::Shl:
+ return SelectShift(I, ARM_AM::lsl);
+ case Instruction::LShr:
+ return SelectShift(I, ARM_AM::lsr);
+ case Instruction::AShr:
+ return SelectShift(I, ARM_AM::asr);
default: break;
}
return false;
@@ -2625,7 +2779,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
// See if we can handle this address.
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
-
+
unsigned ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
@@ -2634,15 +2788,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
}
namespace llvm {
- FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
// Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() &&
- !DisableARMFastISel)
- return new ARMFastISel(funcInfo);
+ if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
+ return new ARMFastISel(funcInfo, libInfo);
return 0;
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 402ecb0c5ffd..aee72d21e2cc 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,6 +15,8 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
int D8SpillFI = 0;
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
@@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -790,7 +800,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// The writeback is only needed when emitting two vst1.64 instructions.
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
ARM::R4)
@@ -808,7 +818,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// 16-byte aligned vst1.64 with 4 d-regs, no writeback.
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
.addReg(ARM::R4).addImm(16).addReg(NextReg)
@@ -820,7 +830,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// 16-byte aligned vst1.64 with 2 d-regs.
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QPRRegisterClass);
+ &ARM::QPRRegClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
.addReg(ARM::R4).addImm(16).addReg(SupReg));
@@ -908,7 +918,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// 16-byte aligned vld1.64 with 4 d-regs and writeback.
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
.addReg(ARM::R4, RegState::Define)
.addReg(ARM::R4, RegState::Kill).addImm(16)
@@ -924,7 +934,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// 16-byte aligned vld1.64 with 4 d-regs, no writeback.
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QQPRRegisterClass);
+ &ARM::QQPRRegClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
.addReg(ARM::R4).addImm(16)
.addReg(SupReg, RegState::ImplicitDefine));
@@ -935,7 +945,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// 16-byte aligned vld1.64 with 2 d-regs.
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
- ARM::QPRRegisterClass);
+ &ARM::QPRRegClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
.addReg(ARM::R4).addImm(16));
NextReg += 2;
@@ -1244,7 +1254,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
CanEliminateFrame = false;
}
- if (!ARM::GPRRegisterClass->contains(Reg))
+ if (!ARM::GPRRegClass.contains(Reg))
continue;
if (Spilled) {
@@ -1404,7 +1414,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
- const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1eafbbc8f64d..ee349a753fef 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
-static cl::opt<bool>
-DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
- cl::desc("Enable / disable ARM integer abs transform"),
- cl::init(false));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -210,29 +205,29 @@ private:
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes,
- unsigned *QOpcodes0, unsigned *QOpcodes1);
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes,
- unsigned *QOpcodes0, unsigned *QOpcodes1);
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes);
+ const uint16_t *DOpcodes, const uint16_t *QOpcodes);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers. (Q registers are not supported.)
SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *Opcodes);
+ const uint16_t *Opcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -583,8 +578,6 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
-
-
//-----
AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
@@ -1597,8 +1590,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1729,8 +1723,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
}
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1875,8 +1870,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
bool isUpdating, unsigned NumVecs,
- unsigned *DOpcodes,
- unsigned *QOpcodes) {
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1994,7 +1989,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
}
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
- unsigned NumVecs, unsigned *Opcodes) {
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -2491,14 +2487,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (DisableARMIntABS)
- return NULL;
-
if (Subtarget->isThumb1Only())
return NULL;
- if (XORSrc0.getOpcode() != ISD::ADD ||
- XORSrc1.getOpcode() != ISD::SRA)
+ if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
return NULL;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
@@ -2509,16 +2501,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 &&
- ADDSrc0 == SRASrc0 &&
- XType.isInteger() &&
- SRAConstant != NULL &&
+ if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+ XType.isInteger() && SRAConstant != NULL &&
Size == SRAConstant->getZExtValue()) {
-
- unsigned Opcode = ARM::ABS;
- if (Subtarget->isThumb2())
- Opcode = ARM::t2ABS;
-
+ unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
@@ -2893,176 +2879,199 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
- ARM::VLD2DUPd32 };
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
case ARMISD::VLD3DUP: {
- unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
- ARM::VLD3DUPd32Pseudo };
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
+ ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
return SelectVLDDup(N, false, 3, Opcodes);
}
case ARMISD::VLD4DUP: {
- unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
- ARM::VLD4DUPd32Pseudo };
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
+ ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
return SelectVLDDup(N, false, 4, Opcodes);
}
case ARMISD::VLD2DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
- ARM::VLD2DUPd32wb_fixed };
+ static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
+ ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
case ARMISD::VLD3DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
- ARM::VLD3DUPd32Pseudo_UPD };
+ static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
+ ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
return SelectVLDDup(N, true, 3, Opcodes);
}
case ARMISD::VLD4DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
- ARM::VLD4DUPd32Pseudo_UPD };
+ static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
+ ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
return SelectVLDDup(N, true, 4, Opcodes);
}
case ARMISD::VLD1_UPD: {
- unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed,
- ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed };
- unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed,
- ARM::VLD1q16wb_fixed,
- ARM::VLD1q32wb_fixed,
- ARM::VLD1q64wb_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
+ ARM::VLD1d16wb_fixed,
+ ARM::VLD1d32wb_fixed,
+ ARM::VLD1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
+ ARM::VLD1q16wb_fixed,
+ ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed };
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed,
- ARM::VLD2d16wb_fixed,
- ARM::VLD2d32wb_fixed,
- ARM::VLD1q64wb_fixed};
- unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
- ARM::VLD2q16PseudoWB_fixed,
- ARM::VLD2q32PseudoWB_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
+ ARM::VLD2d16wb_fixed,
+ ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD3_UPD: {
- unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
- ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
- unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
- ARM::VLD3q16Pseudo_UPD,
- ARM::VLD3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
- ARM::VLD3q16oddPseudo_UPD,
- ARM::VLD3q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
+ ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VLD4_UPD: {
- unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
- ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
- unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
- ARM::VLD4q16Pseudo_UPD,
- ARM::VLD4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
- ARM::VLD4q16oddPseudo_UPD,
- ARM::VLD4q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
+ ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VLD2LN_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
- ARM::VLD2LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
- ARM::VLD2LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
+ ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
}
case ARMISD::VLD3LN_UPD: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
- ARM::VLD3LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
- ARM::VLD3LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
+ ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
}
case ARMISD::VLD4LN_UPD: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
- ARM::VLD4LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
- ARM::VLD4LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
+ ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
}
case ARMISD::VST1_UPD: {
- unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed,
- ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed };
- unsigned QOpcodes[] = { ARM::VST1q8wb_fixed,
- ARM::VST1q16wb_fixed,
- ARM::VST1q32wb_fixed,
- ARM::VST1q64wb_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
+ ARM::VST1d16wb_fixed,
+ ARM::VST1d32wb_fixed,
+ ARM::VST1d64wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
+ ARM::VST1q16wb_fixed,
+ ARM::VST1q32wb_fixed,
+ ARM::VST1q64wb_fixed };
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8wb_fixed,
- ARM::VST2d16wb_fixed,
- ARM::VST2d32wb_fixed,
- ARM::VST1q64wb_fixed};
- unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
- ARM::VST2q16PseudoWB_fixed,
- ARM::VST2q32PseudoWB_fixed };
+ static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
+ ARM::VST2d16wb_fixed,
+ ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
- unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
- unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
- ARM::VST3q16Pseudo_UPD,
- ARM::VST3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
- ARM::VST3q16oddPseudo_UPD,
- ARM::VST3q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
+ ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD,
+ ARM::VST1d64TPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VST4_UPD: {
- unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
- unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
- ARM::VST4q16Pseudo_UPD,
- ARM::VST4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
- ARM::VST4q16oddPseudo_UPD,
- ARM::VST4q32oddPseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
+ ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD,
+ ARM::VST1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case ARMISD::VST2LN_UPD: {
- unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
- ARM::VST2LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
- ARM::VST2LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
+ ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
}
case ARMISD::VST3LN_UPD: {
- unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
- ARM::VST3LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
- ARM::VST3LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
+ ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
}
case ARMISD::VST4LN_UPD: {
- unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
- ARM::VST4LNd32Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
- ARM::VST4LNq32Pseudo_UPD };
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
+ ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
}
@@ -3179,124 +3188,144 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case Intrinsic::arm_neon_vld1: {
- unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
- ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64};
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64};
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
- ARM::VLD2q32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+ ARM::VLD2d32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
- ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
- unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
- ARM::VLD3q16Pseudo_UPD,
- ARM::VLD3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
- ARM::VLD3q16oddPseudo,
- ARM::VLD3q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
+ ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
- ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
- unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
- ARM::VLD4q16Pseudo_UPD,
- ARM::VLD4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
- ARM::VLD4q16oddPseudo,
- ARM::VLD4q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
+ ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
- ARM::VLD2LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
+ ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
+ ARM::VLD2LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
- ARM::VLD3LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
+ ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
+ ARM::VLD3LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
- ARM::VLD4LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
+ ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
+ ARM::VLD4LNq32Pseudo };
return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
- unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
- ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
- ARM::VST2q32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST1q64 };
+ static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
- ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
- unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
- ARM::VST3q16Pseudo_UPD,
- ARM::VST3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
- ARM::VST3q16oddPseudo,
- ARM::VST3q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
+ ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo,
+ ARM::VST1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
- ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
- unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
- ARM::VST4q16Pseudo_UPD,
- ARM::VST4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
- ARM::VST4q16oddPseudo,
- ARM::VST4q32oddPseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
+ ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo,
+ ARM::VST1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
- unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
- ARM::VST2LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
+ ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
+ ARM::VST2LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
- unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
- ARM::VST3LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
+ ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
+ ARM::VST3LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
- unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
- ARM::VST4LNd32Pseudo };
- unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
+ ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
+ ARM::VST4LNq32Pseudo };
return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index a103c94cede4..c66618a8ef5f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -52,6 +52,7 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
// This option should go away when tail calls fully work.
static cl::opt<bool>
@@ -89,76 +90,71 @@ static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
-void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
- EVT PromotedBitwiseVT) {
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
}
- EVT ElemTy = VT.getVectorElementType();
+ MVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
- setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
if (ElemTy == MVT::i32) {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
} else {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
- }
- setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
- setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
}
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
- setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::AND, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::OR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
}
// Neon does not support vector divide/remainder operations.
- setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
}
-void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
- addRegisterClass(VT, ARM::DPRRegisterClass);
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
-void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
- addRegisterClass(VT, ARM::QPRRegisterClass);
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &ARM::QPRRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
@@ -431,14 +427,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
if (Subtarget->isThumb1Only())
- addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
- addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ addRegisterClass(MVT::i32, &ARM::GPRRegClass);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
- addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f32, &ARM::SPRRegClass);
if (!Subtarget->isFPOnlySP())
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -824,6 +820,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
+ // Prefer likely predicted branches to selects on out-of-order cores.
+ predictableSelectIsExpensive = Subtarget->isCortexA9();
+
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -849,7 +848,7 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
// the cost is 1 for both f32 and f64.
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
// When NEON is used for SP, only half of the register file is available
// because operations that define both SP and DP results will be constrained
// to the VFP2 class (D0-D15). We currently model this constraint prior to
@@ -859,15 +858,15 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 2;
break;
case MVT::v4i64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 4;
break;
case MVT::v8i64:
- RRC = ARM::DPRRegisterClass;
+ RRC = &ARM::DPRRegClass;
Cost = 8;
break;
}
@@ -891,6 +890,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMN: return "ARMISD::CMN";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
@@ -1027,17 +1027,18 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// load / store 4 to 8 consecutive D registers.
if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
- return ARM::QQPRRegisterClass;
- else if (VT == MVT::v8i64)
- return ARM::QQQQPRRegisterClass;
+ return &ARM::QQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::QQQQPRRegClass;
}
return TargetLowering::getRegClassFor(VT);
}
// Create a fast isel object.
FastISel *
-ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return ARM::createFastISel(funcInfo);
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return ARM::createFastISel(funcInfo, libInfo);
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
@@ -1166,6 +1167,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::GHC:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
}
@@ -1286,14 +1289,20 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
/// nodes.
SDValue
-ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool doesNotRet = CLI.DoesNotReturn;
+ bool isVarArg = CLI.IsVarArg;
+
MachineFunction &MF = DAG.getMachineFunction();
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool IsSibCall = false;
@@ -1415,21 +1424,22 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CCInfo.clearFirstByValReg();
}
- unsigned LocMemOffset = VA.getLocMemOffset();
- SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
- SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
- StkPtrOff);
- SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
- SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
- MVT::i32);
- MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
- Flags.getByValAlign(),
- /*isVolatile=*/false,
- /*AlwaysInline=*/false,
- MachinePointerInfo(0),
- MachinePointerInfo(0)));
-
+ if (Flags.getByValSize() - 4*offset > 0) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
+
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
+ MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
+ Ops, array_lengthof(Ops)));
+ }
} else if (!IsSibCall) {
assert(VA.isMemLoc());
@@ -2095,12 +2105,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
// FIXME: is there useful debug info available here?
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
@@ -2108,7 +2119,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
// "local exec" model.
SDValue
ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG,
+ TLSModel::Model model) const {
const GlobalValue *GV = GA->getGlobal();
DebugLoc dl = GA->getDebugLoc();
SDValue Offset;
@@ -2117,7 +2129,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
- if (GV->isDeclaration()) {
+ if (model == TLSModel::InitialExec) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
@@ -2142,6 +2154,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
false, false, false, 0);
} else {
// local exec model
+ assert(model == TLSModel::LocalExec);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
@@ -2162,12 +2175,18 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- // If the relocation model is PIC, use the "General Dynamic" TLS Model,
- // otherwise use the "Local Exec" TLS Model
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
- return LowerToTLSGeneralDynamicModel(GA, DAG);
- else
- return LowerToTLSExecModels(GA, DAG);
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic:
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModels(GA, DAG, model);
+ }
+ llvm_unreachable("bogus TLS model");
}
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
@@ -2457,9 +2476,9 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
+ RC = &ARM::tGPRRegClass;
else
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
@@ -2543,9 +2562,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
+ RC = &ARM::tGPRRegClass;
else
- RC = ARM::GPRRegisterClass;
+ RC = &ARM::GPRRegClass;
unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
@@ -2627,14 +2646,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
const TargetRegisterClass *RC;
if (RegVT == MVT::f32)
- RC = ARM::SPRRegisterClass;
+ RC = &ARM::SPRRegClass;
else if (RegVT == MVT::f64)
- RC = ARM::DPRRegisterClass;
+ RC = &ARM::DPRRegClass;
else if (RegVT == MVT::v2f64)
- RC = ARM::QPRRegisterClass;
+ RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = (AFI->isThumb1OnlyFunction() ?
- ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+ RC = AFI->isThumb1OnlyFunction() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
@@ -4249,6 +4269,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Record this extraction against the appropriate vector if possible...
SDValue SourceVec = V.getOperand(0);
+ // If the element number isn't a constant, we can't effectively
+ // analyze what's going on.
+ if (!isa<ConstantSDNode>(V.getOperand(1)))
+ return SDValue();
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
bool FoundSource = false;
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
@@ -4791,7 +4815,9 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
const APInt &CInt = C->getAPIntValue();
- Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+ // Element types smaller than 32 bits are not legal, so use i32 elements.
+ // The values are implicitly truncated so sext vs. zext doesn't matter.
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
@@ -5252,14 +5278,14 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned scratch =
- MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass);
+ unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
@@ -5362,8 +5388,8 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
@@ -5394,8 +5420,9 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5469,8 +5496,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
@@ -5504,8 +5531,9 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -5531,7 +5559,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+ oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
@@ -5586,9 +5614,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
- MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
- MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
@@ -5614,8 +5642,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- const TargetRegisterClass *TRC =
- isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned storesuccess = MRI.createVirtualRegister(TRC);
// thisMBB:
@@ -5722,8 +5751,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC =
- isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = isThumb ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
@@ -5827,8 +5857,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC =
- Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
@@ -6176,14 +6207,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
if (Subtarget->isThumb2() &&
- !ARM::tGPRRegisterClass->contains(Reg) &&
- !ARM::hGPRRegisterClass->contains(Reg))
+ !ARM::tGPRRegClass.contains(Reg) &&
+ !ARM::hGPRRegClass.contains(Reg))
continue;
- else if (Subtarget->isThumb1Only() &&
- !ARM::tGPRRegisterClass->contains(Reg))
+ if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
continue;
- else if (!Subtarget->isThumb() &&
- !ARM::GPRRegisterClass->contains(Reg))
+ if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
continue;
if (!DefRegs[Reg])
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
@@ -6214,6 +6243,304 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
+MachineBasicBlock *ARMTargetLowering::
+EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+ // This pseudo instruction has 3 operands: dst, src, size
+ // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
+ // Otherwise, we will generate unrolled scalar copies.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned src = MI->getOperand(1).getReg();
+ unsigned SizeVal = MI->getOperand(2).getImm();
+ unsigned Align = MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned ldrOpc, strOpc, UnitSize = 0;
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC_Vec = 0;
+
+ if (Align & 1) {
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ UnitSize = 1;
+ } else if (Align & 2) {
+ ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
+ strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
+ UnitSize = 2;
+ } else {
+ // Check whether we can use NEON instructions.
+ if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if ((Align % 16 == 0) && SizeVal >= 16) {
+ ldrOpc = ARM::VLD1q32wb_fixed;
+ strOpc = ARM::VST1q32wb_fixed;
+ UnitSize = 16;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
+ }
+ else if ((Align % 8 == 0) && SizeVal >= 8) {
+ ldrOpc = ARM::VLD1d32wb_fixed;
+ strOpc = ARM::VST1d32wb_fixed;
+ UnitSize = 8;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
+ }
+ }
+ // Can't use NEON instructions.
+ if (UnitSize == 0) {
+ ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ UnitSize = 4;
+ }
+ }
+
+ unsigned BytesLeft = SizeVal % UnitSize;
+ unsigned LoopSize = SizeVal - BytesLeft;
+
+ if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
+ // Use LDR and STR to copy.
+ // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
+ // [destOut] = STR_POST(scratch, destIn, UnitSize)
+ unsigned srcIn = src;
+ unsigned destIn = dest;
+ for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(destIn).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(UnitSize));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ // Handle the leftover bytes with LDRB and STRB.
+ // [scratch, srcOut] = LDRB_POST(srcIn, 1)
+ // [destOut] = STRB_POST(scratch, destIn, 1)
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+ }
+
+ // Expand the pseudo op to a loop.
+ // thisMBB:
+ // ...
+ // movw varEnd, # --> with thumb2
+ // movt varEnd, #
+ // ldrcp varEnd, idx --> without thumb2
+ // fallthrough --> loopMBB
+ // loopMBB:
+ // PHI varPhi, varEnd, varLoop
+ // PHI srcPhi, src, srcLoop
+ // PHI destPhi, dst, destLoop
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
+ // subs varLoop, varPhi, #UnitSize
+ // bne loopMBB
+ // fallthrough --> exitMBB
+ // exitMBB:
+ // epilogue to handle left-over bytes
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Load an immediate to varEnd.
+ unsigned varEnd = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ unsigned VReg1 = varEnd;
+ if ((LoopSize & 0xFFFF0000) != 0)
+ VReg1 = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(LoopSize & 0xFFFF));
+
+ if ((LoopSize & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
+ .addReg(VReg1)
+ .addImm(LoopSize >> 16));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ }
+ BB->addSuccessor(loopMBB);
+
+ // Generate the loop body:
+ // varPhi = PHI(varLoop, varEnd)
+ // srcPhi = PHI(srcLoop, src)
+ // destPhi = PHI(destLoop, dst)
+ MachineBasicBlock *entryBB = BB;
+ BB = loopMBB;
+ unsigned varLoop = MRI.createVirtualRegister(TRC);
+ unsigned varPhi = MRI.createVirtualRegister(TRC);
+ unsigned srcLoop = MRI.createVirtualRegister(TRC);
+ unsigned srcPhi = MRI.createVirtualRegister(TRC);
+ unsigned destLoop = MRI.createVirtualRegister(TRC);
+ unsigned destPhi = MRI.createVirtualRegister(TRC);
+
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
+ .addReg(varLoop).addMBB(loopMBB)
+ .addReg(varEnd).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
+ .addReg(srcLoop).addMBB(loopMBB)
+ .addReg(src).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
+ .addReg(destLoop).addMBB(loopMBB)
+ .addReg(dest).addMBB(entryBB);
+
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(destPhi).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addReg(0).addImm(UnitSize));
+ }
+
+ // Decrement loop variable by UnitSize.
+ MachineInstrBuilder MIB = BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ // loopMBB can loop back to loopMBB or fall through to exitMBB.
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // Add epilogue to handle BytesLeft.
+ BB = exitMBB;
+ MachineInstr *StartOfExit = exitMBB->begin();
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ unsigned srcIn = srcLoop;
+ unsigned destIn = destLoop;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -6517,10 +6844,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewMovDstReg = MRI.createVirtualRegister(
- isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
- unsigned NewRsbDstReg = MRI.createVirtualRegister(
- isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
@@ -6534,12 +6860,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// fall through to SinkMBB
RSBBB->addSuccessor(SinkBB);
- // insert a movs at the end of BB
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
- NewMovDstReg)
- .addReg(ABSSrcReg, RegState::Kill)
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::CPSR, RegState::Define);
+ // insert a cmp at the end of BB
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg).addImm(0));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -6551,7 +6875,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(NewMovDstReg, RegState::Kill)
+ .addReg(ABSSrcReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
// insert PHI in SinkBB,
@@ -6559,7 +6883,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
.addReg(NewRsbDstReg).addMBB(RSBBB)
- .addReg(NewMovDstReg).addMBB(BB);
+ .addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
MI->eraseFromParent();
@@ -6567,6 +6891,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// return last added BB
return SinkBB;
}
+ case ARM::COPY_STRUCT_BYVAL_I32:
+ ++NumLoopByVals;
+ return EmitStructByval(MI, BB);
}
}
@@ -7095,8 +7422,12 @@ static SDValue PerformORCombine(SDNode *N,
return COR;
}
+
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND)
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);
@@ -7353,7 +7684,7 @@ static SDValue PerformSTORECombine(SDNode *N,
if (St->isVolatile())
return SDValue();
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
// pack all of the elements in one place. Next, store to memory in fewer
// chunks.
SDValue StVal = St->getValue();
@@ -8721,12 +9052,19 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return Imm >= 0 && Imm <= 255;
}
-/// isLegalAddImmediate - Return true if the specified immediate is legal
-/// add immediate, that is the target has add instructions which can add
-/// a register with the immediate without having to materialize the
+/// isLegalAddImmediate - Return true if the specified immediate is a legal add
+/// *or sub* immediate, that is the target has add or sub instructions which can
+/// add a register with the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
- return ARM_AM::getSOImmVal(Imm) != -1;
+ // Same encoding for add/sub, just flip the sign.
+ int64_t AbsImm = llvm::abs64(Imm);
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(AbsImm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(AbsImm) != -1;
+ // Thumb1 only has 8-bit unsigned immediate.
+ return AbsImm >= 0 && AbsImm <= 255;
}
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
@@ -9030,39 +9368,38 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
switch (Constraint[0]) {
case 'l': // Low regs or general regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::tGPRRegisterClass);
- else
- return RCPair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, &ARM::tGPRRegClass);
+ return RCPair(0U, &ARM::GPRRegClass);
case 'h': // High regs or no regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::hGPRRegisterClass);
+ return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
- return RCPair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPRRegisterClass);
+ return RCPair(0U, &ARM::DPRRegClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPRRegisterClass);
+ return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPR_8RegisterClass);
+ return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPR_8RegisterClass);
+ return RCPair(0U, &ARM::DPR_8RegClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPR_8RegisterClass);
+ return RCPair(0U, &ARM::QPR_8RegClass);
break;
case 't':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, &ARM::SPRRegClass);
break;
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 352d98001ddc..51d120507482 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -41,6 +41,9 @@ namespace llvm {
// PIC mode.
WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+ // Add pseudo op to model memcpy for struct byval.
+ COPY_STRUCT_BYVAL,
+
CALL, // Function call.
CALL_PRED, // Function call that's predicable.
CALL_NOLINK, // Function call with branch not branch-and-link.
@@ -53,6 +56,7 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
CMP, // ARM compare instructions.
+ CMN, // ARM CMN instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
@@ -357,7 +361,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
Sched::Preference getSchedulingPreference(SDNode *N) const;
@@ -389,9 +394,9 @@ namespace llvm {
///
unsigned ARMPCLabelIndex;
- void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
- void addDRTypeForNEON(EVT VT);
- void addQRTypeForNEON(EVT VT);
+ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -422,7 +427,8 @@ namespace llvm {
SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG,
+ TLSModel::Model model) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -462,13 +468,7 @@ namespace llvm {
unsigned &VARegSize, unsigned &VARegSaveSize) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
/// HandleByVal - Target-specific cleanup for ByVal support.
@@ -532,6 +532,9 @@ namespace llvm {
MachineBasicBlock *MBB) const;
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
@@ -542,7 +545,8 @@ namespace llvm {
namespace ARM {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index f04926aaceba..c8966fb97a4c 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -827,6 +827,8 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{7-4} = 0b0111;
let Inst{9-8} = 0b00;
let Inst{27-20} = opcod;
+
+ let Unpredictable{9-8} = 0b11;
}
// Misc Arithmetic instructions.
@@ -1862,7 +1864,6 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
-
// Instruction operands.
bits<5> Vd;
bits<5> Vn;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index b8f607eb4c55..31b0c41f08f5 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
if (hasNOP()) {
- NopInst.setOpcode(ARM::NOP);
+ NopInst.setOpcode(ARM::HINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
} else {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1eb561d69016..6340a58f1a0f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -18,6 +18,9 @@
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+def SDT_ARMStructByVal : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
@@ -90,6 +93,10 @@ def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
+ SDT_ARMStructByVal,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -121,6 +128,9 @@ def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutGlue]>;
+def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp,
+ [SDNPOutGlue]>;
+
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
[SDNPOutGlue, SDNPCommutative]>;
@@ -161,53 +171,59 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
// ARM Instruction Predicate Definitions.
//
def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
- AssemblerPredicate<"HasV4TOps">;
+ AssemblerPredicate<"HasV4TOps", "armv4t">;
def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
- AssemblerPredicate<"HasV5TEOps">;
+ AssemblerPredicate<"HasV5TEOps", "armv5te">;
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
- AssemblerPredicate<"HasV6Ops">;
+ AssemblerPredicate<"HasV6Ops", "armv6">;
def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
- AssemblerPredicate<"HasV6T2Ops">;
+ AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
- AssemblerPredicate<"HasV7Ops">;
+ AssemblerPredicate<"HasV7Ops", "armv7">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
- AssemblerPredicate<"FeatureVFP2">;
+ AssemblerPredicate<"FeatureVFP2", "VFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
- AssemblerPredicate<"FeatureVFP3">;
+ AssemblerPredicate<"FeatureVFP3", "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
- AssemblerPredicate<"FeatureVFP4">;
+ AssemblerPredicate<"FeatureVFP4", "VFP4">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
- AssemblerPredicate<"FeatureNEON">;
+ AssemblerPredicate<"FeatureNEON", "NEON">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16">;
+ AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv">;
+ AssemblerPredicate<"FeatureHWDiv", "divide">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
- AssemblerPredicate<"FeatureT2XtPk">;
+ AssemblerPredicate<"FeatureT2XtPk",
+ "pack/extract">;
def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
- AssemblerPredicate<"FeatureDSPThumb2">;
+ AssemblerPredicate<"FeatureDSPThumb2",
+ "thumb2-dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
- AssemblerPredicate<"FeatureDB">;
+ AssemblerPredicate<"FeatureDB",
+ "data-barriers">;
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
- AssemblerPredicate<"FeatureMP">;
+ AssemblerPredicate<"FeatureMP",
+ "mp-extensions">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
- AssemblerPredicate<"ModeThumb">;
+ AssemblerPredicate<"ModeThumb", "thumb">;
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
- AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+ AssemblerPredicate<"ModeThumb,FeatureThumb2",
+ "thumb2">;
def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass">;
+ AssemblerPredicate<"FeatureMClass", "armv7m">;
def IsARClass : Predicate<"!Subtarget->isMClass()">,
- AssemblerPredicate<"!FeatureMClass">;
+ AssemblerPredicate<"!FeatureMClass",
+ "armv7a/r">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
- AssemblerPredicate<"!ModeThumb">;
+ AssemblerPredicate<"!ModeThumb", "arm-mode">;
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -220,7 +236,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast) && "
"!Subtarget->isTargetDarwin()">;
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
@@ -236,9 +253,9 @@ class RegConstraint<string C> {
// ARM specific transformation functions and pattern fragments.
//
-// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
-// so_imm_neg def below.
-def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+// imm_neg_XFORM - Return a imm value packed into the format described for
+// imm_neg defs below.
+def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -257,7 +274,7 @@ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
int64_t Value = -(int)N->getZExtValue();
return Value && ARM_AM::getSOImmVal(Value) != -1;
- }], so_imm_neg_XFORM> {
+ }], imm_neg_XFORM> {
let ParserMatchClass = so_imm_neg_asmoperand;
}
@@ -399,8 +416,11 @@ def pclabel : Operand<i32> {
}
// ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
def adrlabel : Operand<i32> {
let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrLabelAsmOperand;
+ let PrintMethod = "printAdrLabelOperand";
}
def neon_vcvt_imm32 : Operand<i32> {
@@ -570,7 +590,10 @@ def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
}
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
+def Imm0_15AsmOperand: ImmAsmOperand {
+ let Name = "Imm0_15";
+ let DiagnosticType = "ImmRange0_15";
+}
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
}]> {
@@ -615,6 +638,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_65535AsmOperand;
}
+// imm0_65535_neg - An immediate whose negative value is in the range [0.65535].
+def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
+ return -Imm >= 0 && -Imm < 65536;
+}]>;
+
// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
// a relocatable expression.
//
@@ -940,9 +968,10 @@ include "ARMInstrFormats.td"
/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
/// binop that produces a value.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1003,38 +1032,15 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let Inst{4} = 1;
let Inst{3-0} = shift{3-0};
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
-
}
/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
/// reversed. The 'rr' form is only defined for the disassembler; for codegen
/// it is equivalent to the AsI1_bin_irs counterpart.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1094,30 +1100,6 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
let Inst{4} = 1;
let Inst{3-0} = shift{3-0};
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
-
}
/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
@@ -1304,8 +1286,9 @@ class AI_exta_rrot_np<bits<8> opcod, string opc>
}
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc, bit Commutable = 0> {
+ bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1351,7 +1334,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
- [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
+ [(set GPRnopc:$Rd, CPSR,
+ (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
@@ -1366,34 +1350,11 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{3-0} = shift{3-0};
}
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
}
/// AI1_rsc_irs - Define instructions and patterns for rsc
-multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc> {
+let TwoOperandAliasConstraint = "$Rn = $Rd" in
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1450,29 +1411,6 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{3-0} = shift{3-0};
}
}
-
- // Assembly aliases for optional destination operand when it's the same
- // as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
- so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
- GPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
- so_reg_imm:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
- so_reg_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsARM]>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1511,9 +1449,10 @@ multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
// Note: We use the complex addrmode_imm12 rather than just an input
// GPR and a constrained immediate so that we can use this to match
// frame index references and avoid matching constant pool references.
- def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr),
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt),
+ (ins addrmode_imm12:$addr),
AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
- [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
bits<4> Rt;
bits<17> addr;
let Inst{23} = addr{12}; // U (add = ('U' == 1))
@@ -1521,9 +1460,10 @@ multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm12
}
- def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift),
- AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
- [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt),
+ (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
bits<4> Rt;
bits<17> shift;
let shift{4} = 0; // Inst{4} = 0
@@ -1581,9 +1521,10 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm12
}
- def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift),
- AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
- [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ def rs : AI2ldst<0b011, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
bits<4> Rt;
bits<17> shift;
let shift{4} = 0; // Inst{4} = 0
@@ -1655,33 +1596,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
NoItinerary, []>;
}
-def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000000;
+def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+ "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
}
-def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000001;
-}
-
-def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000010;
-}
-
-def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000011;
-}
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1694,16 +1620,10 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
let Inst{27-20} = 0b01101000;
let Inst{7-4} = 0b1011;
let Inst{11-8} = 0b1111;
+ let Unpredictable{11-8} = 0b1111;
}
-def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
- []>, Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000100;
-}
-
-// The i32imm operand $val can be used by a debugger to store more information
+// The 16-bit operand $val can be used by a debugger to store more information
// about the breakpoint.
def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
"bkpt", "\t$val", []>, Requires<[IsARM]> {
@@ -1922,7 +1842,7 @@ let isCall = 1,
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
Defs = [LR], Uses = [SP] in {
- def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsARM]> {
@@ -1932,7 +1852,7 @@ let isCall = 1,
let DecoderMethod = "DecodeBranchImmInstruction";
}
- def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
Requires<[IsARM]> {
@@ -1942,7 +1862,7 @@ let isCall = 1,
}
// ARMv5T and above
- def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
Requires<[IsARM, HasV5T]> {
@@ -1951,7 +1871,7 @@ let isCall = 1,
let Inst{3-0} = func;
}
- def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx", "\t$func",
[(ARMcall_pred GPR:$func)]>,
Requires<[IsARM, HasV5T]> {
@@ -1962,19 +1882,18 @@ let isCall = 1,
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
- def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, HasV4T]>;
// ARMv4
- def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, NoV4T]>;
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
- def BMOVPCB_CALL : ARMPseudoInst<(outs),
- (ins bl_target:$func, variable_ops),
+ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
Requires<[IsARM]>;
}
@@ -2044,18 +1963,16 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Tail calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
- def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
- IIC_Br, []>;
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>;
- def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
- IIC_Br, []>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>;
- def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
4, IIC_Br, [],
(Bcc br_target:$dst, (ops 14, zero_reg))>,
Requires<[IsARM]>;
- def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
(BX GPR:$dst)>,
Requires<[IsARM]>;
@@ -2509,6 +2426,7 @@ multiclass AI2_stridx<bit isByte, string opc,
let Inst{23} = offset{12};
let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
@@ -2768,7 +2686,7 @@ defm STRHT : AI3strT<0b1011, "strht">;
multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
- // mnemonic here. Without it is the cannonical spelling.
+ // mnemonic here. Without it is the canonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
@@ -2900,9 +2818,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
- (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
-
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -3113,10 +3028,10 @@ def UBFX : I<(outs GPR:$Rd),
defm ADD : AsI1_bin_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>;
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
//
@@ -3134,15 +3049,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
- "ADC", 1>;
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "SBC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
-defm RSB : AsI1_rbin_irs <0b0011, "rsb",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
+defm RSB : AsI1_rbin_irs<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
@@ -3150,8 +3063,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm RSC : AI1_rsc_irs<0b0111, "rsc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "RSC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -3163,6 +3075,11 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
(SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
@@ -3190,7 +3107,7 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{3-0} = Rm;
-
+
let Unpredictable{11-8} = 0b1111;
}
@@ -3355,16 +3272,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
defm AND : AsI1_bin_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
// like in the actual instruction encoding. The complexity of mapping the mask
@@ -3482,27 +3399,28 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
// FIXME: The v5 pseudos are only necessary for the additional Constraint
// property. Remove them when it's possible to add those properties
-// on an individual MachineInstr, not just an instuction description.
-let isCommutable = 1 in {
-def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
- [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+// on an individual MachineInstr, not just an instruction description.
+let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in {
+def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
let Inst{15-12} = 0b0000;
let Unpredictable{15-12} = 0b1111;
}
let Constraints = "@earlyclobber $Rd" in
def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
- pred:$p, cc_out:$s),
- 4, IIC_iMUL32,
- [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
- (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ pred:$p, cc_out:$s),
+ 4, IIC_iMUL32,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
Requires<[IsARM, HasV6]> {
bits<4> Ra;
@@ -3511,8 +3429,8 @@ def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
let Constraints = "@earlyclobber $Rd" in
def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
- 4, IIC_iMAC32,
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ 4, IIC_iMAC32,
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
(MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
@@ -3630,8 +3548,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
@@ -3912,49 +3829,85 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
(CMPrsr GPR:$src, so_reg_reg:$rhs)>;
-// FIXME: We have to be careful when using the CMN instruction and comparison
-// with 0. One would expect these two pieces of code should give identical
-// results:
-//
-// rsbs r1, r1, 0
-// cmp r0, r1
-// mov r0, #0
-// it ls
-// mov r0, #1
-//
-// and:
-//
-// cmn r0, r1
-// mov r0, #0
-// it ls
-// mov r0, #1
-//
-// However, the CMN gives the *opposite* result when r1 is 0. This is because
-// the carry flag is set in the CMP case but not in the CMN case. In short, the
-// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
-// value of r0 and the carry bit (because the "carry bit" parameter to
-// AddWithCarry is defined as 1 in this case, the carry flag will always be set
-// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
-// never a "carry" when this AddWithCarry is performed (because the "carry bit"
-// parameter to AddWithCarry is defined as 0).
-//
-// When x is 0 and unsigned:
-//
-// x = 0
-// ~x = 0xFFFF FFFF
-// ~x + 1 = 0x1 0000 0000
-// (-x = 0) != (0x1 0000 0000 = ~x + 1)
-//
-// Therefore, we should disable CMN when comparing against zero, until we can
-// limit when the CMN instruction is used (when we know that the RHS is not 0 or
-// when it's a comparison which doesn't look at the 'carry' flag).
-//
-// (See the ARM docs for the "AddWithCarry" pseudo-code.)
-//
-// This is related to <rdar://problem/7569620>.
-//
-//defm CMN : AI1_cmp_irs<0b1011, "cmn",
-// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+// CMN register-integer
+let isCompare = 1, Defs = [CPSR] in {
+def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
+ "cmn", "\t$Rn, $imm",
+ [(ARMcmn GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+// CMN register-register/shift
+def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = 1;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsi : AI1<0b1011, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPR:$Rn, so_reg_imm:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+def CMNzrsr : AI1<0b1011, (outs),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
+ "cmn", "\t$Rn, $shift",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+
+ let Unpredictable{15-12} = 0b1111;
+}
+
+}
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
@@ -3964,16 +3917,6 @@ defm TEQ : AI1_cmp_irs<0b1001, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
-defm CMNz : AI1_cmp_irs<0b1011, "cmn",
- IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
- BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
-
-//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
-// (CMNri GPR:$src, so_imm_neg:$imm)>;
-
-def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
- (CMNzri GPR:$src, so_imm_neg:$imm)>;
-
// Pseudo i64 compares for some floating point compares.
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
@@ -4121,11 +4064,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
-def ABS : ARMPseudoInst<
- (outs GPR:$dst), (ins GPR:$src),
- 8, NoItinerary, []>;
-}
+let usesCustomInserter = 1, Defs = [CPSR] in
+def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
let usesCustomInserter = 1 in {
let Defs = [CPSR] in {
@@ -4242,6 +4182,13 @@ let usesCustomInserter = 1 in {
}
}
+let usesCustomInserter = 1 in {
+ def COPY_STRUCT_BYVAL_I32 : PseudoInst<
+ (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
+ NoItinerary,
+ [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
+}
+
let mayLoad = 1 in {
def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary,
@@ -4280,10 +4227,10 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
// SWP/SWPB are deprecated in V6/V7.
let mayLoad = 1, mayStore = 1 in {
-def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr),
- "swp", []>;
-def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr),
- "swpb", []>;
+def SWP : AIswp<0, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>;
+def SWPB: AIswp<1, (outs GPRnopc:$Rt),
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>;
}
//===----------------------------------------------------------------------===//
@@ -4609,8 +4556,8 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
}
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
- [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2,
- imm:$CRm)]>;
+ [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
@@ -4637,8 +4584,8 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
}
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
- [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2,
- imm:$CRm)]>;
+ [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, imm:$CRm)]>;
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
//===----------------------------------------------------------------------===//
@@ -4658,7 +4605,8 @@ def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
let Unpredictable{11-0} = 0b110100001111;
}
-def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, Requires<[IsARM]>;
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>,
+ Requires<[IsARM]>;
// The MRSsys instruction is the MRS instruction from the ARM ARM,
// section B9.3.9, with the R bit set to 1.
@@ -5114,7 +5062,7 @@ def : ARMInstAlias<"add${s}${p} $Rd, $imm",
(SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via so_imm_neg
def : ARMInstAlias<"cmp${p} $Rd, $imm",
- (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+ (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
def : ARMInstAlias<"cmn${p} $Rd, $imm",
(CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
@@ -5123,6 +5071,7 @@ def : ARMInstAlias<"cmn${p} $Rd, $imm",
// FIXME: We need C++ parser hooks to map the alias to the MOV
// encoding. It seems we should be able to do that sort of thing
// in tblgen, but it could get ugly.
+let TwoOperandAliasConstraint = "$Rm = $Rd" in {
def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
(ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
cc_out:$s)>;
@@ -5135,8 +5084,10 @@ def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
(ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
cc_out:$s)>;
+}
def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+let TwoOperandAliasConstraint = "$Rn = $Rd" in {
def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
cc_out:$s)>;
@@ -5149,32 +5100,7 @@ def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
cc_out:$s)>;
-// shifter instructions also support a two-operand form.
-def : ARMInstAlias<"asr${s}${p} $Rm, $imm",
- (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"lsr${s}${p} $Rm, $imm",
- (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"lsl${s}${p} $Rm, $imm",
- (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"ror${s}${p} $Rm, $imm",
- (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"asr${s}${p} $Rn, $Rm",
- (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm",
- (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm",
- (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
- (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
- cc_out:$s)>;
-
-
-// 'mul' instruction can be specified with only two operands.
-def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
- (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+}
// "neg" is and alias for "rsb rd, rn, #0"
def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index fd8ac0b328eb..31340881920d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1962,7 +1962,7 @@ def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
let Inst{4} = Rn{5};
}
-def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
@@ -2300,14 +2300,14 @@ class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -2325,7 +2325,7 @@ class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+ ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
@@ -2343,7 +2343,7 @@ class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
(ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
@@ -2368,6 +2368,8 @@ class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
// Same as N3VD but no data type.
@@ -2379,6 +2381,8 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
@@ -2391,6 +2395,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
@@ -2401,6 +2407,8 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
@@ -2411,6 +2419,8 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2420,6 +2430,8 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
@@ -2432,6 +2444,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
@@ -2443,21 +2457,25 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
}
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2468,7 +2486,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
let isCommutable = 0;
}
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+ string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2479,26 +2497,29 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
let isCommutable = 0;
}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2510,7 +2531,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2522,11 +2543,12 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let TwoOperandAliasConstraint = "$Vm = $Vd";
let isCommutable = 0;
}
@@ -2606,7 +2628,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2614,7 +2636,7 @@ class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2625,7 +2647,7 @@ class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2633,7 +2655,7 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2678,7 +2700,7 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
// Long Intrinsic-Op vector operations with explicit extend (VABAL).
class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
@@ -2691,7 +2713,7 @@ class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
@@ -2699,7 +2721,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
(TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
@@ -2712,7 +2734,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))]>;
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -2727,7 +2749,7 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
// Narrowing 3-register intrinsics.
class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
- Intrinsic IntOp, bit Commutable>
+ SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
@@ -2780,7 +2802,7 @@ class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// Long 3-register intrinsics with explicit extend (VABDL).
class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
@@ -2793,7 +2815,7 @@ class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+ ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
@@ -2802,7 +2824,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
}
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2812,7 +2834,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))]>;
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
@@ -2830,6 +2852,8 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
(TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+ // All of these have a two-operand InstAlias.
+ let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
@@ -2837,14 +2861,14 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -2855,7 +2879,7 @@ class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
@@ -2863,7 +2887,7 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
@@ -2871,6 +2895,7 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
// Shift by immediate,
// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, InstrItinClass itin, Operand ImmTy,
string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
@@ -2885,6 +2910,7 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
(outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+}
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
@@ -2908,6 +2934,7 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// Shift right by immediate and accumulate,
// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
@@ -2924,9 +2951,11 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+}
// Shift by immediate and insert,
// both double- and quad-register.
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Operand ImmTy, Format f, string OpcodeStr, string Dt,
ValueType Ty,SDNode ShOp>
@@ -2941,19 +2970,20 @@ class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
(ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+}
// Convert, with fractional bits immediate,
// both double- and quad-register.
class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
@@ -3023,7 +3053,7 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
@@ -3064,7 +3094,7 @@ multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp> {
+ SDPatternOperator IntOp> {
def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
itin, OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp>;
@@ -3152,7 +3182,7 @@ multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, bit Commutable = 0> {
// 64-bit vector types.
def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
@@ -3173,7 +3203,7 @@ multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp> {
+ SDPatternOperator IntOp> {
// 64-bit vector types.
def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
@@ -3194,7 +3224,7 @@ multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
multiclass N3VIntSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
@@ -3210,7 +3240,7 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0>
+ SDPatternOperator IntOp, bit Commutable = 0>
: N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
@@ -3224,7 +3254,7 @@ multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp> {
def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
@@ -3241,7 +3271,7 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0>
+ SDPatternOperator IntOp, bit Commutable = 0>
: N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
@@ -3255,7 +3285,7 @@ multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- Intrinsic IntOp>
+ SDPatternOperator IntOp>
: N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp> {
def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
@@ -3270,7 +3300,7 @@ multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, bit Commutable = 0> {
def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
OpcodeStr, !strconcat(Dt, "16"),
v8i8, v8i16, IntOp, Commutable>;
@@ -3330,7 +3360,7 @@ multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, bit Commutable = 0> {
def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, Commutable>;
@@ -3341,7 +3371,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp> {
+ SDPatternOperator IntOp> {
def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
@@ -3352,7 +3382,7 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0>
+ SDPatternOperator IntOp, bit Commutable = 0>
: N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
IntOp, Commutable> {
def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
@@ -3363,7 +3393,7 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// ....with explicit extend (VABDL).
multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, IntOp, ExtOp, Commutable>;
@@ -3436,7 +3466,7 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
// element sizes of 8, 16 and 32 bits:
multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp,
SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
@@ -3459,7 +3489,7 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
@@ -3506,7 +3536,7 @@ multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
// First with only element sizes of 16 and 32 bits:
multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
@@ -3514,7 +3544,7 @@ multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
@@ -3524,7 +3554,7 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
// ....then also with element size of 8 bits:
multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
- string OpcodeStr, string Dt, Intrinsic IntOp>
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
: N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
@@ -3533,7 +3563,7 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// ....with explicit extend (VABAL).
multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
IntOp, ExtOp, OpNode>;
@@ -3550,7 +3580,7 @@ multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
@@ -3573,7 +3603,7 @@ multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
+ string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
@@ -3668,33 +3698,6 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
-
- // Aliases for two-operand forms (source and dest regs the same).
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8"))
- DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16"))
- DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v2i32"))
- DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v1i64"))
- DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8"))
- QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16"))
- QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32"))
- QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
- def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "v2i64"))
- QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
}
// Neon Shift-Accumulate vector operations,
@@ -4133,16 +4136,16 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
Requires<[HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
+def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
+def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
(VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
(VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
@@ -4305,6 +4308,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
// VBIC : Vector Bitwise Bit Clear (AND NOT)
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
"vbic", "$Vd, $Vn, $Vm", "",
@@ -4315,6 +4319,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
"vbic", "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (v4i32 (and QPR:$Vn,
(vnotq QPR:$Vm))))]>;
+}
def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
(outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
@@ -4820,14 +4825,14 @@ defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
// VCLZ : Vector Count Leading Zeros
defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
- int_arm_neon_vclz>;
+ ctlz>;
// VCNT : Vector Count One Bits
def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiD, "vcnt", "8",
- v8i8, v8i8, int_arm_neon_vcnt>;
+ v8i8, v8i8, ctpop>;
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
- v16i8, v16i8, int_arm_neon_vcnt>;
+ v16i8, v16i8, ctpop>;
// Vector Swap
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
@@ -5308,6 +5313,9 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
+
+// All of these have a two-operand InstAlias.
+let TwoOperandAliasConstraint = "$Vn = $Vd" in {
class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
@@ -5327,6 +5335,7 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
bits<4> index;
let Inst{11-8} = index{3-0};
}
+}
def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
@@ -5588,82 +5597,87 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
-// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
-// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+// Lengthen_Single<"8", "i16", "8"> =
+// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
+// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0)))>;
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ let AddedComplexity = 10 in {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ }
}
// extload, zextload and sextload for a lengthening load which only uses
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
-// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
+// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
string InsnLanes, string InsnTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
}
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length.
//
-// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
-// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv4i32
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
+// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)),
+// (i32 0))),
// dsub_0)),
-// qsub_0)>;
+// dsub_0)>;
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
}
// extload, zextload and sextload for a lengthening load followed by another
@@ -5671,45 +5685,43 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
//
// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
-// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv4i32
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
-// dsub_0)),
-// dsub_0)>;
+// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)),
+// dsub_0)>;
multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
}
-defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
-defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
-defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
+defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
-defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
// Double lengthening - v4i8 -> v4i16 -> v4i32
@@ -5720,18 +5732,18 @@ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
//===----------------------------------------------------------------------===//
// Assembler aliases
@@ -5742,69 +5754,6 @@ def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
(VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
-
-// VADD two-operand aliases.
-def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
- (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
- (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
- (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
- (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
- (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
- (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
- (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
- (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
- (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
- (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VSUB two-operand aliases.
-def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
- (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
- (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
- (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
- (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
- (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
- (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
- (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
- (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
- (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
- (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VADDW two-operand aliases.
-def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
- (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
- (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
- (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
- (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
- (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
- (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
-
// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
@@ -5823,23 +5772,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
// ... two-operand aliases
-def : NEONInstAlias<"vand${p} $Vdn, $Vm",
- (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vand${p} $Vdn, $Vm",
- (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
- (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
- (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"veor${p} $Vdn, $Vm",
- (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"veor${p} $Vdn, $Vm",
- (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
- (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
- (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
(VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
@@ -5853,212 +5785,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-// VMUL two-operand aliases.
-def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
- (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
- (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
- (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
- (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
- (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
- (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
- (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
- (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
- (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
- (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
- (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
- VectorIndex16:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
- (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
- VectorIndex16:$lane, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
- (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
- (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-
-def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
- (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
- (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
- VectorIndex32:$lane, pred:$p)>;
-
-// VQADD (register) two-operand aliases.
-def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
- (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
- (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
- (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
- (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
- (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
- (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
- (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
- (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
- (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
- (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
- (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
- (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
- (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
- (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
- (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
- (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VSHL (immediate) two-operand aliases.
-def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
- (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
- (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
- (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
- (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
- (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
- (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
- (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
- (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
-
-// VSHL (register) two-operand aliases.
-def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
- (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
- (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
- (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
- (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
- (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
- (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
- (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
- (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
- (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
- (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
- (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
- (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
- (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
- (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
- (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
- (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// VSHR (immediate) two-operand aliases.
-def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
- (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
- (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
- (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
- (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
- (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
- (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
- (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
- (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
- (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
- (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
- (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
- (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
- (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
- (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
- (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
- (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
-
-// VRSHL two-operand aliases.
-def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm",
- (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm",
- (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm",
- (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm",
- (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm",
- (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm",
- (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm",
- (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm",
- (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm",
- (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm",
- (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm",
- (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm",
- (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm",
- (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm",
- (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm",
- (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm",
- (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
@@ -6223,17 +5949,17 @@ def VST2LNqWB_register_Asm_32 :
// VLD3 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
-def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
(ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD3DUPdWB_fixed_Asm_8 :
@@ -6499,17 +6225,17 @@ def VST3qWB_register_Asm_32 :
// VLD4 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
-def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
-def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
(ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
def VLD4DUPdWB_fixed_Asm_8 :
@@ -6845,277 +6571,6 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
(VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
-// Two-operand variants for VEXT
-def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
- (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
- (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
- (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
- (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
- (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
- (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
-def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
- (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
-
-// Two-operand variants for VQDMULH
-def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
- (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
- (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
- (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
- (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VMAX.
-def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
- (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
- (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
- (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
- (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
- (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
- (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
- (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
- (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
- (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
- (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
- (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
- (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
- (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
- (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VMIN.
-def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
- (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
- (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
- (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
- (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
- (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
- (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
- (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
- (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
- (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
- (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
- (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
- (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
- (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
- (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VPADD.
-def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm",
- (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm",
- (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm",
- (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm",
- (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VSRA.
- // Signed.
-def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
- (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
- (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
- (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
- (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
- (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
- (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
- (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
- (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
- (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
- (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
- (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
- (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
- (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
- (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
- (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
- (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-// Two-operand variants for VSRI.
-def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
- (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
- (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
- (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
- (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
- (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
- (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
- (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
- (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-// Two-operand variants for VSLI.
-def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
- (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
- (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
- (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
- (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
- (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
- (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
- (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
-def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
- (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
-
-// Two-operand variants for VHSUB.
- // Signed.
-def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm",
- (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm",
- (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm",
- (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm",
- (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm",
- (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm",
- (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm",
- (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm",
- (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm",
- (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm",
- (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm",
- (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm",
- (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-
-// Two-operand variants for VHADD.
- // Signed.
-def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm",
- (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm",
- (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm",
- (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm",
- (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm",
- (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm",
- (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm",
- (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm",
- (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm",
- (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
-
-def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm",
- (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm",
- (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm",
- (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
-
-// Two-operand variants for VRHADD.
- // Signed.
-def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm",
- (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm",
- (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm",
- (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-
-def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm",
- (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm",
- (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm",
- (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-
- // Unsigned.
-def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm",
- (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm",
- (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm",
- (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>;
-
-def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm",
- (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm",
- (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm",
- (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>;
-
// VSWP allows, but does not require, a type suffix.
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
(VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 6335229d3c2a..554f6d9f94e1 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -32,9 +32,6 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = ThumbSRImmAsmOperand;
}
-def imm_neg_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
-}]>;
def imm_comp_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
}]>;
@@ -258,16 +255,20 @@ def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
Requires<[IsThumb2]>;
def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
- T1SystemEncoding<0x10>; // A8.6.410
+ T1SystemEncoding<0x10>, // A8.6.410
+ Requires<[IsThumb2]>;
def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
- T1SystemEncoding<0x20>; // A8.6.408
+ T1SystemEncoding<0x20>, // A8.6.408
+ Requires<[IsThumb2]>;
def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
- T1SystemEncoding<0x30>; // A8.6.409
+ T1SystemEncoding<0x30>, // A8.6.409
+ Requires<[IsThumb2]>;
def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
- T1SystemEncoding<0x40>; // A8.6.157
+ T1SystemEncoding<0x40>, // A8.6.157
+ Requires<[IsThumb2]>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -363,8 +364,8 @@ def : tInstAlias<"sub${p} sp, sp, $imm",
(tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
// ADD <Rm>, sp
-def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr,
- "add", "\t$Rdn, $sp, $Rn", []>,
+def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T1
bits<4> Rdn;
@@ -419,34 +420,35 @@ let isCall = 1,
Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
+ (outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
"bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb]> {
- bits<22> func;
- let Inst{26} = func{21};
+ bits<24> func;
+ let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
let Inst{10-0} = func{10-0};
}
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
+ (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T]> {
- bits<21> func;
+ bits<24> func;
+ let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
+ let Inst{13} = func{22};
+ let Inst{11} = func{21};
let Inst{10-1} = func{10-1};
let Inst{0} = 0; // func{0} is assumed zero
}
// Also used for Thumb2
- def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
"blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T]>,
@@ -457,7 +459,7 @@ let isCall = 1,
}
// ARMv4T
- def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb, IsThumb1Only]>;
@@ -504,7 +506,7 @@ let isBranch = 1, isTerminator = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
let Uses = [SP] in {
- def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
(tBX GPR:$dst, (ops 14, zero_reg))>,
Requires<[IsThumb]>;
@@ -514,7 +516,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Non-IOS version:
let Uses = [SP] in {
def tTAILJMPdND : tPseudoExpand<(outs),
- (ins t_brtarget:$dst, pred:$p, variable_ops),
+ (ins t_brtarget:$dst, pred:$p),
4, IIC_Br, [],
(tB t_brtarget:$dst, pred:$p)>,
Requires<[IsThumb, IsNotIOS]>;
@@ -1398,7 +1400,7 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// For round-trip assembly/disassembly, we have to handle a CPS instruction
// without any iflags. That's not, strictly speaking, valid syntax, but it's
-// a useful extention and assembles to defined behaviour (the insn does
+// a useful extension and assembles to defined behaviour (the insn does
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e6fb9d5f01eb..307006f413a8 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -62,6 +62,15 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
}]>;
+// so_imm_notSext_XFORM - Return a so_imm value packed into the format
+// described for so_imm_notSext def below, with sign extension from 16
+// bits.
+def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
+ APInt apIntN = N->getAPIntValue();
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32);
+}]>;
+
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
@@ -86,6 +95,17 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = t2_so_imm_not_asmoperand;
}
+// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm
+// if the upper 16 bits are zero.
+def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
+ APInt apIntN = N->getAPIntValue();
+ if (!apIntN.isIntN(16)) return false;
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1;
+ }], t2_so_imm_notSext16_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
@@ -152,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> {
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
let EncoderMethod = "getT2AdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand";
}
@@ -509,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0,
+ PatFrag opnode, bit Commutable = 0,
string wide = ""> {
// shifted imm
def ri : T2sTwoRegImm<
@@ -545,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
t2_so_reg:$shift, pred:$p,
cc_out:$s)>;
}
@@ -562,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// the ".w" suffix to indicate that they are wide.
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
// Assembler aliases w/ the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
}
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
@@ -668,16 +683,16 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass T2I_rbin_s_is<PatFrag opnode> {
// shifted imm
def ri : t2PseudoInst<(outs rGPR:$Rd),
- (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
4, IIC_iALUi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
- GPRnopc:$Rn))]>;
+ rGPR:$Rn))]>;
// shifted register
def rs : t2PseudoInst<(outs rGPR:$Rd),
- (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
4, IIC_iALUsi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
- GPRnopc:$Rn))]>;
+ rGPR:$Rn))]>;
}
}
@@ -788,8 +803,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
- string baseOpc> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
@@ -814,33 +828,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
// Optional destination register
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
@@ -848,7 +856,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
/// a explicit result, only implicitly set CPSR.
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc> {
+ PatFrag opnode> {
let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
@@ -893,12 +901,9 @@ let isCompare = 1, Defs = [CPSR] in {
// No alias here for 'rr' version as not all instantiations of this
// multiclass want one (CMP in particular, does not).
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
- t2_so_imm:$imm, pred:$p)>;
+ (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
- t2_so_reg:$shift,
- pred:$p)>;
+ (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -1911,11 +1916,16 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
(t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
+ (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
let AddedComplexity = 1 in
def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
(t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
+ (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
@@ -1924,6 +1934,8 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
// Select Bytes -- for disassembly only
@@ -2125,17 +2137,17 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
//
defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
- BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+ BinOpFrag<(shl node:$LHS, node:$RHS)>>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
- BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+ BinOpFrag<(srl node:$LHS, node:$RHS)>>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
- BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+ BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
- BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
-def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
- (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
@@ -2187,18 +2199,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
defm t2AND : T2I_bin_w_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>,
- "t2BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
class T2BitFI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -2278,8 +2289,7 @@ let Constraints = "$src = $Rd" in {
defm t2ORN : T2I_bin_irs<0b0011, "orn",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, (not node:$RHS))>,
- "t2ORN", 0, "">;
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// unary operation that produces a value. These are predicable and can be
@@ -2332,6 +2342,17 @@ let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 rGPR:$src), [{
+ return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// so_imm_notSext is needed instead of so_imm_not, as the value of imm
+// will match the extended, not the original bitWidth for $src.
+def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
+
+
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
(t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
@@ -2840,7 +2861,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
(t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
@@ -2849,29 +2870,68 @@ def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
(t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
-//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
-// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
-defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
- IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>,
- "t2CMNz">;
+let isCompare = 1, Defs = [CPSR] in {
+ // shifted imm
+ def t2CMNri : T2OneRegCmpImm<
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
+ "cmn", ".w\t$Rn, $imm",
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{15} = 0;
+ let Inst{11-8} = 0b1111; // Rd
+ }
+ // register
+ def t2CMNzrr : T2TwoRegCmp<
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
+ "cmn", ".w\t$Rn, $Rm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, rGPR:$Rm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{11-8} = 0b1111; // Rd
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def t2CMNzrs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
+ "cmn", ".w\t$Rn, $ShiftedRm",
+ [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = 0b1000;
+ let Inst{20} = 1; // The S bit.
+ let Inst{11-8} = 0b1111; // Rd
+ }
+}
-//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
-// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+// Assembler aliases w/o the ".w" suffix.
+// No alias here for 'rr' version as not all instantiations of this multiclass
+// want one (CMP in particular, does not).
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+ (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+ (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
-def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
- (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>;
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
- "t2TST">;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
- "t2TEQ">;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
@@ -3017,7 +3077,7 @@ def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
"isb", "\t$opt",
- []>, Requires<[IsThumb2, HasDB]> {
+ []>, Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
let Inst{3-0} = opt;
@@ -3271,7 +3331,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
- (ins uncondbrtarget:$dst, pred:$p, variable_ops),
+ (ins uncondbrtarget:$dst, pred:$p),
4, IIC_Br, [],
(t2B uncondbrtarget:$dst, pred:$p)>,
Requires<[IsThumb2, IsIOS]>;
@@ -3281,7 +3341,7 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
def t2BMOVPCB_CALL : tPseudoInst<(outs),
- (ins t_bltarget:$func, variable_ops),
+ (ins t_bltarget:$func),
6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
Requires<[IsThumb]>;
}
@@ -3382,21 +3442,18 @@ let imod = 0, iflags = 0, M = 1 in
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-class T2I_hint<bits<8> op7_0, string opc, string asm>
- : T2I<(outs), (ins), NoItinerary, opc, asm, []> {
- let Inst{31-20} = 0xf3a;
- let Inst{19-16} = 0b1111;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
- let Inst{10-8} = 0b000;
- let Inst{7-0} = op7_0;
+def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
+ bits<8> imm;
+ let Inst{31-8} = 0b111100111010111110000000;
+ let Inst{7-0} = imm;
}
-def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
-def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
-def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
-def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
-def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
+def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
+def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
+def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
+def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
bits<4> opt;
@@ -3622,8 +3679,8 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
// A/R class MRS.
//
// A/R class can only move from CPSR or SPSR.
-def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>,
- Requires<[IsThumb2,IsARClass]> {
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
+ []>, Requires<[IsThumb2,IsARClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
@@ -3632,8 +3689,8 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
-def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>,
- Requires<[IsThumb2,IsARClass]> {
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ []>, Requires<[IsThumb2,IsARClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
@@ -3646,7 +3703,7 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", [
// the A/R class (a full msr_mask).
def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
"mrs", "\t$Rd, $mask", []>,
- Requires<[IsThumb2,IsMClass]> {
+ Requires<[IsThumb,IsMClass]> {
bits<4> Rd;
bits<8> mask;
let Inst{31-12} = 0b11110011111011111000;
@@ -3682,14 +3739,14 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
// Move from ARM core register to Special Register
def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
NoItinerary, "msr", "\t$SYSm, $Rn", []>,
- Requires<[IsThumb2,IsMClass]> {
- bits<8> SYSm;
+ Requires<[IsThumb,IsMClass]> {
+ bits<12> SYSm;
bits<4> Rn;
let Inst{31-21} = 0b11110011100;
let Inst{20} = 0b0;
let Inst{19-16} = Rn;
let Inst{15-12} = 0b1000;
- let Inst{7-0} = SYSm;
+ let Inst{11-0} = SYSm;
}
@@ -3969,6 +4026,17 @@ def : t2InstAlias<"add${s}${p} $Rdn, $imm",
def : t2InstAlias<"add${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"addw${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
// Aliases for SUB without the ".w" optional width specifier.
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
@@ -4002,9 +4070,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
-def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
@@ -4213,7 +4281,7 @@ def : t2InstAlias<"add${s}${p} $Rd, $imm",
pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via t2_so_imm_neg
def : t2InstAlias<"cmp${p} $Rd, $imm",
- (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+ (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
def : t2InstAlias<"cmn${p} $Rd, $imm",
(t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 3600b889d6f1..23c132e4f6a8 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -221,11 +221,13 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
// FP Binary Operations.
//
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
@@ -235,11 +237,13 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
@@ -249,21 +253,25 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
[(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
@@ -559,8 +567,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
bits<4> Rt2;
// Encode instruction operands.
- let Inst{3-0} = src1{3-0};
- let Inst{5} = src1{4};
+ let Inst{3-0} = src1{4-1};
+ let Inst{5} = src1{0};
let Inst{15-12} = Rt;
let Inst{19-16} = Rt2;
@@ -609,8 +617,8 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
bits<4> src2;
// Encode instruction operands.
- let Inst{3-0} = dst1{3-0};
- let Inst{5} = dst1{4};
+ let Inst{3-0} = dst1{4-1};
+ let Inst{5} = dst1{0};
let Inst{15-12} = src1;
let Inst{19-16} = src2;
@@ -819,9 +827,9 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
// Single Precision register
-class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
- dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
@@ -830,9 +838,9 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bi
}
// Double Precision register
-class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
- dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
+ bit op5, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
@@ -1081,10 +1089,11 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
+// (fma x, y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1115,18 +1124,18 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-// (fma (fneg x), y, z) -> (vfms x, y, z)
-def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+// (fma (fneg x), y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
-def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+// (fma x, (fneg y), z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1157,18 +1166,18 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-// (fneg (fma x, y, z)) -> (vfnma x, y, z)
-def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
+// (fneg (fma x, y, z)) -> (vfnma z, x, y)
+def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
+def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
-def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1198,18 +1207,26 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
-def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+
+// (fma x, y, (fneg z)) -> (vfnms z, x, y))
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-// (fma x, (fneg y), z) -> (vnfms x, y, z)
-def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1426,22 +1443,6 @@ def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
(VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
-// VMUL has a two-operand form (implied destination operand)
-def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
- (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
-def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm",
- (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
-// VADD has a two-operand form (implied destination operand)
-def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm",
- (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
-def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm",
- (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
-// VSUB has a two-operand form (implied destination operand)
-def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm",
- (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
-def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm",
- (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
-
// VMOV can accept optional 32-bit or less data type suffix suffix.
def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 98930ccdb194..3f99cce14669 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -289,9 +289,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
ResultPtr = ResultPtr >> 2;
*((intptr_t*)RelocPos) |= ResultPtr;
- // Set register Rn to PC.
- *((intptr_t*)RelocPos) |=
- getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ // Set register Rn to PC (which is register 15 on all architectures).
+ // FIXME: This avoids the need for register info in the JIT class.
+ *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_pic_jt:
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 9ef2ace29cff..897ceb624bf5 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
DebugLoc dl = Loc->getDebugLoc();
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(PReg);
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
@@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
int NewOffset = MemOps[i].Offset;
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(Reg);
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
// Register numbers must be in ascending order. For VFP / NEON load and
// store multiples, the registers must also be consecutive and within the
// limit on the number of registers per instruction.
@@ -1177,8 +1175,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
- if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0)
- NewOpc = ARM::t2LDRi12;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1326,7 +1322,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
// Find a scratch register.
- unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+ unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
// Process the load / store instructions.
RS->forward(prior(MBBI));
@@ -1739,7 +1735,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
Ops.pop_back();
const MCInstrDesc &MCID = TII->get(NewOpc);
- const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
MRI->constrainRegClass(EvenReg, TRC);
MRI->constrainRegClass(OddReg, TRC);
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 1466e983f3be..6f974fd17d8c 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
- field bits<4> Num;
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
let SubRegs = subregs;
// All bits of ARM registers with sub-registers are covered by sub-registers.
let CoveredBySubRegs = 1;
}
-class ARMFReg<bits<6> num, string n> : Register<n> {
- field bits<6> Num;
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
}
@@ -267,21 +267,16 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
- (trunc DPR, 16)> {
- let SubRegClasses = [(SPR ssub_0, ssub_1)];
-}
+ (trunc DPR, 16)>;
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
- (trunc DPR, 8)> {
- let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
-}
+ (trunc DPR, 8)>;
// Generic 128-bit vector register class.
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
(sequence "Q%u", 0, 15)> {
- let SubRegClasses = [(DPR dsub_0, dsub_1)];
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8)];
let AltOrderSelect = [{ return 1; }];
@@ -289,17 +284,11 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
// Subset of QPR that have 32-bit SPR subregs.
def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (trunc QPR, 8)> {
- let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
- (DPR_VFP2 dsub_0, dsub_1)];
-}
+ 128, (trunc QPR, 8)>;
// Subset of QPR that have DPR_8 and SPR_8 subregs.
def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (trunc QPR, 4)> {
- let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
- (DPR_8 dsub_0, dsub_1)];
-}
+ 128, (trunc QPR, 4)>;
// Pseudo-registers representing odd-even pairs of D registers. The even-odd
// pairs are already represented by the Q registers.
@@ -338,8 +327,6 @@ def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
// Pseudo 256-bit vector register class to model pairs of Q registers
// (4 consecutive D registers).
def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
- let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
- (QPR qsub_0, qsub_1)];
// Allocate non-VFP2 aliases first.
let AltOrders = [(rotl QQPR, 8)];
let AltOrderSelect = [{ return 1; }];
@@ -363,9 +350,6 @@ def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
// Pseudo 512-bit vector register class to model 4 consecutive Q registers
// (8 consecutive D registers).
def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
- let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
- dsub_4, dsub_5, dsub_6, dsub_7),
- (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
// Allocate non-VFP2 aliases first.
let AltOrders = [(rotl QQQQPR, 8)];
let AltOrderSelect = [{ return 1; }];
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 45486fd0b6dd..81d2fa37c2d1 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass;
def IIC_iLoad_d_i : InstrItinClass;
def IIC_iLoad_d_r : InstrItinClass;
def IIC_iLoad_d_ru : InstrItinClass;
-def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
-def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
-def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
-def IIC_iPop : InstrItinClass<0>; // micro-coded
-def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_m : InstrItinClass;
+def IIC_iLoad_mu : InstrItinClass;
+def IIC_iLoad_mBr : InstrItinClass;
+def IIC_iPop : InstrItinClass;
+def IIC_iPop_Br : InstrItinClass;
def IIC_iLoadiALU : InstrItinClass;
def IIC_iStore_i : InstrItinClass;
def IIC_iStore_r : InstrItinClass;
@@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass;
def IIC_iStore_d_i : InstrItinClass;
def IIC_iStore_d_r : InstrItinClass;
def IIC_iStore_d_ru : InstrItinClass;
-def IIC_iStore_m : InstrItinClass<0>; // micro-coded
-def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_iStore_m : InstrItinClass;
+def IIC_iStore_mu : InstrItinClass;
def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
@@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass;
def IIC_fpSQRT64 : InstrItinClass;
def IIC_fpLoad32 : InstrItinClass;
def IIC_fpLoad64 : InstrItinClass;
-def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
-def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_m : InstrItinClass;
+def IIC_fpLoad_mu : InstrItinClass;
def IIC_fpStore32 : InstrItinClass;
def IIC_fpStore64 : InstrItinClass;
-def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
-def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_m : InstrItinClass;
+def IIC_fpStore_mu : InstrItinClass;
def IIC_VLD1 : InstrItinClass;
def IIC_VLD1x2 : InstrItinClass;
def IIC_VLD1x3 : InstrItinClass;
@@ -258,8 +258,6 @@ def IIC_VTBX4 : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
-def GenericItineraries : ProcessorItineraries<[], [], []>;
-
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8b1fb9386ad5..2c6382542ab9 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -151,28 +151,30 @@ def CortexA8Itineraries : ProcessorItineraries<
// Load multiple, def is the 5th operand. Pipeline 0 only.
// FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
//
// Load multiple + update, defs are the 1st and 5th operands.
InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
- InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
//
// Load multiple plus branch
InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
InstrStage<3, [A8_LSPipe]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
- [1, 2, 1, 1, 3]>,
+ [1, 2, 1, 1, 3], [], -1>, // dynamic uops
//
// Pop, def is the 3rd operand.
InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
- InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 3], [], -1>, // dynamic uops
//
// Push, def is the 3th operand.
InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
InstrStage<3, [A8_LSPipe]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
- [1, 1, 3]>,
-
+ [1, 1, 3], [], -1>, // dynamic uops
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -227,12 +229,13 @@ def CortexA8Itineraries : ProcessorItineraries<
// Store multiple. Pipeline 0 only.
// FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>]>,
+ InstrStage<2, [A8_LSPipe]>],
+ [], [], -1>, // dynamic uops
//
// Store multiple + update
InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>], [2]>,
-
+ InstrStage<2, [A8_LSPipe]>],
+ [2], [], -1>, // dynamic uops
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
@@ -393,14 +396,16 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 2], [], -1>, // dynamic uops
//
// FP Load Multiple + update
InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 2], [], -1>, // dynamic uops
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -419,15 +424,16 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Store Multiple + update
InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
-
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1], [], -1>, // dynamic uops
// NEON
// Issue through integer pipeline, and execute in NEON unit.
//
@@ -1051,3 +1057,19 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
+def CortexA8Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA8Itineraries;
+}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 0d710cc1acee..7bc590f94756 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -11,6 +11,10 @@
//
//===----------------------------------------------------------------------===//
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
//
// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
// Reference Manual".
@@ -280,7 +284,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Load multiple + update, defs are the 1st and 5th operands.
InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -288,7 +293,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Load multiple plus branch
InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -297,7 +303,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_LSUnit]>,
InstrStage<1, [A9_Branch]>],
[1, 2, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Pop, def is the 3rd operand.
InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -305,7 +312,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 3],
- [NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Pop + branch, def is the 3rd operand.
InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -314,8 +322,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_LSUnit]>,
InstrStage<1, [A9_Branch]>],
[1, 1, 3],
- [NoBypass, NoBypass, A9_LdBypass]>,
-
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -409,14 +417,15 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
- InstrStage<2, [A9_LSUnit]>]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [], [], -1>, // dynamic uops
//
// Store multiple + update
InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
- InstrStage<2, [A9_LSUnit]>], [2]>,
-
+ InstrStage<2, [A9_LSUnit]>],
+ [2], [], -1>, // dynamic uops
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
@@ -713,7 +722,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Load Multiple + update
// FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -722,7 +732,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -749,7 +760,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Store Multiple + update
// FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -758,7 +770,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
// NEON
// VLD1
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -1861,3 +1874,22 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_NPipe]>],
[4, 1, 2, 2, 3, 3, 1]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
+def CortexA9Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 2; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 8; // Based on estimate of pipeline depth.
+
+ let Itineraries = CortexA9Itineraries;
+}
+
+// TODO: Add Cortex-A9 processor and scheduler resources.
+
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index e2530d07e237..31d5d38d84f3 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -179,8 +179,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Args.push_back(Entry);
// Emit __eabi_memset call
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain,
+ TargetLowering::CallLoweringInfo CLI(Chain,
Type::getVoidTy(*DAG.getContext()), // return type
false, // return sign ext
false, // return zero ext
@@ -193,7 +192,9 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
false, // is return val used
DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()), // callee
- Args, DAG, dl); // arg list, DAG and debug
+ Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e247b76ad43b..4762854c12dd 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -67,6 +67,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasDataBarrier(false)
, Pref32BitThumb(false)
, AvoidCPSRPartialUpdate(false)
+ , HasRAS(false)
, HasMPExtension(false)
, FPOnlySP(false)
, AllowsUnalignedMem(false)
@@ -82,7 +83,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
ArchFS = ArchFS + "," + FS;
@@ -96,13 +97,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
if (!HasV6T2Ops && hasThumb2())
HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- // After parsing Itineraries, set ItinData.IssueWidth.
- computeIssueWidth();
-
- if (TT.find("eabi") != std::string::npos)
+ if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass()))
// FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
// Darwin-EABI conforms to AACPS but not the rest of EABI.
TargetABI = ARM_ABI_AAPCS;
@@ -181,31 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- // If we have a reasonable estimate of the pipeline depth, then we can
- // estimate the penalty of a misprediction based on that.
- if (isCortexA8())
- return 13;
- else if (isCortexA9())
- return 8;
-
- // Otherwise, just return a sensible default.
- return 10;
-}
-
-void ARMSubtarget::computeIssueWidth() {
- unsigned allStage1Units = 0;
- for (const InstrItinerary *itin = InstrItins.Itineraries;
- itin->FirstStage != ~0U; ++itin) {
- const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
- allStage1Units |= IS->getUnits();
- }
- InstrItins.IssueWidth = 0;
- while (allStage1Units) {
- ++InstrItins.IssueWidth;
- // clear the lowest bit
- allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
- }
- assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+ return SchedModel->MispredictPenalty;
}
bool ARMSubtarget::enablePostRAScheduler(
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e72b06fa3fcc..b3940613000c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -74,7 +74,7 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
/// v6m, v7m for example.
bool IsMClass;
@@ -155,6 +155,9 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+ /// SchedModel - Processor specific instruction costs.
+ const MCSchedModel *SchedModel;
+
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 9aa83089202d..171c9adfa40f 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -136,22 +136,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
- PM->add(createGlobalMergePass(TM->getTargetLowering()));
+ addPass(createGlobalMergePass(TM->getTargetLowering()));
return false;
}
bool ARMPassConfig::addInstSelector() {
- PM->add(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+ addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
return false;
}
bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
- PM->add(createARMLoadStoreOptimizationPass(true));
+ addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
- PM->add(createMLxExpansionPass());
+ addPass(createMLxExpansionPass());
return true;
}
@@ -159,23 +159,23 @@ bool ARMPassConfig::addPreSched2() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None) {
if (!getARMSubtarget().isThumb1Only()) {
- PM->add(createARMLoadStoreOptimizationPass());
+ addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
if (getARMSubtarget().hasNEON())
- PM->add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
// proper scheduling.
- PM->add(createARMExpandPseudoPass());
+ addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
if (!getARMSubtarget().isThumb1Only())
- addPass(IfConverterID);
+ addPass(&IfConverterID);
}
if (getARMSubtarget().isThumb2())
- PM->add(createThumb2ITBlockPass());
+ addPass(createThumb2ITBlockPass());
return true;
}
@@ -183,13 +183,13 @@ bool ARMPassConfig::addPreSched2() {
bool ARMPassConfig::addPreEmitPass() {
if (getARMSubtarget().isThumb2()) {
if (!getARMSubtarget().prefers32BitThumb())
- PM->add(createThumb2SizeReductionPass());
+ addPass(createThumb2SizeReductionPass());
// Constant island pass work on unbundled instructions.
- addPass(UnpackMachineBundlesID);
+ addPass(&UnpackMachineBundlesID);
}
- PM->add(createARMConstantIslandPass());
+ addPass(createARMConstantIslandPass());
return true;
}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index a5ea1c202e2c..3d85ca7d6995 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -24,20 +24,11 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
+ bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
+ InitializeELF(isAAPCS_ABI);
if (isAAPCS_ABI) {
- StaticCtorSection =
- getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
- StaticDtorSection =
- getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
LSDASection = NULL;
}
@@ -47,33 +38,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
-
-const MCSection *
-ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const {
- if (!isAAPCS_ABI)
- return TargetLoweringObjectFileELF::getStaticCtorSection(Priority);
-
- if (Priority == 65535)
- return StaticCtorSection;
-
- // Emit ctors in priority order.
- std::string Name = std::string(".init_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
-}
-
-const MCSection *
-ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const {
- if (!isAAPCS_ABI)
- return TargetLoweringObjectFileELF::getStaticDtorSection(Priority);
-
- if (Priority == 65535)
- return StaticDtorSection;
-
- // Emit dtors in priority order.
- std::string Name = std::string(".fini_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
-}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index ff210604148d..c6a7261439d7 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -20,7 +20,6 @@ class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
protected:
const MCSection *AttributesSection;
- bool isAAPCS_ABI;
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
@@ -32,9 +31,6 @@ public:
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
-
- const MCSection * getStaticCtorSection(unsigned Priority) const;
- const MCSection * getStaticDtorSection(unsigned Priority) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2c53e3f8f8cd..3a5957b24107 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -236,7 +236,10 @@ public:
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
Match_RequiresNotITBlock,
Match_RequiresV6,
- Match_RequiresThumb2
+ Match_RequiresThumb2,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "ARMGenAsmMatcher.inc"
+
};
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
@@ -793,6 +796,13 @@ public:
int64_t Value = CE->getValue();
return Value > 0 && Value <= 32;
}
+ bool isAdrLabel() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, but it can't fit
+ // into shift immediate encoding, we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
+ else return (isARMSOImm() || isARMSOImmNeg());
+ }
bool isARMSOImm() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -914,7 +924,9 @@ public:
// Immediate offset in range [-255, 255].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 256;
+ // The #-0 offset is encoded as INT32_MIN, and we have to check
+ // for this too.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
}
bool isAM3Offset() const {
if (Kind != k_Immediate && Kind != k_PostIndexRegister)
@@ -1028,7 +1040,8 @@ public:
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
- return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
}
bool isMemImm0_1020s4Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1446,8 +1459,10 @@ public:
assert(isRegShiftedImm() &&
"addRegShiftedImmOperands() on non RegShiftedImm!");
Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
+ // Shift of #32 is encoded as 0 where permitted
+ unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm);
Inst.addOperand(MCOperand::CreateImm(
- ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, Imm)));
}
void addShifterImmOperands(MCInst &Inst, unsigned N) const {
@@ -1637,6 +1652,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(Imm));
}
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(isImm() && "Not an immediate!");
+
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (!isa<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ return;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -2301,7 +2332,7 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<ccout " << getReg() << ">";
break;
case k_ITCondMask: {
- static const char *MaskStr[] = {
+ static const char *const MaskStr[] = {
"()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
"(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
};
@@ -2672,7 +2703,7 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned CC = StringSwitch<unsigned>(Tok.getString())
+ unsigned CC = StringSwitch<unsigned>(Tok.getString().lower())
.Case("eq", ARMCC::EQ)
.Case("ne", ARMCC::NE)
.Case("hs", ARMCC::HS)
@@ -2877,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(EndReg))
return Error(EndLoc, "invalid register in register list");
// Ranges must go from low to high.
- if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
return Error(EndLoc, "bad range in register list");
// Add all the registers in the range to the register list.
@@ -2904,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
Warning(RegLoc, "register list not in ascending order");
else
return Error(RegLoc, "register list not in ascending order");
}
- if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
Warning(RegLoc, "duplicated register (" + RegTok.getString() +
") in register list");
continue;
@@ -3249,28 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
- StringRef OptStr = Tok.getString();
-
- unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
- .Case("sy", ARM_MB::SY)
- .Case("st", ARM_MB::ST)
- .Case("sh", ARM_MB::ISH)
- .Case("ish", ARM_MB::ISH)
- .Case("shst", ARM_MB::ISHST)
- .Case("ishst", ARM_MB::ISHST)
- .Case("nsh", ARM_MB::NSH)
- .Case("un", ARM_MB::NSH)
- .Case("nshst", ARM_MB::NSHST)
- .Case("unst", ARM_MB::NSHST)
- .Case("osh", ARM_MB::OSH)
- .Case("oshst", ARM_MB::OSHST)
- .Default(~0U);
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
- if (Opt == ~0U)
- return MatchOperand_NoMatch;
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *MemBarrierID;
+ if (getParser().ParseExpression(MemBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_MB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
- Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
return MatchOperand_Success;
}
@@ -3280,7 +3342,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
StringRef IFlagsStr = Tok.getString();
// An iflags string of "none" is interpreted to mean that none of the AIF
@@ -3320,26 +3383,51 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// See ARMv6-M 10.1.1
std::string Name = Mask.lower();
unsigned FlagsVal = StringSwitch<unsigned>(Name)
- .Case("apsr", 0)
- .Case("iapsr", 1)
- .Case("eapsr", 2)
- .Case("xpsr", 3)
- .Case("ipsr", 5)
- .Case("epsr", 6)
- .Case("iepsr", 7)
- .Case("msp", 8)
- .Case("psp", 9)
- .Case("primask", 16)
- .Case("basepri", 17)
- .Case("basepri_max", 18)
- .Case("faultmask", 19)
- .Case("control", 20)
+ // Note: in the documentation:
+ // ARM deprecates using MSR APSR without a _<bits> qualifier as an alias
+ // for MSR APSR_nzcvq.
+ // but we do make it an alias here. This is so to get the "mask encoding"
+ // bits correct on MSR APSR writes.
+ //
+ // FIXME: Note the 0xc00 "mask encoding" bits version of the registers
+ // should really only be allowed when writing a special register. Note
+ // they get dropped in the MRS instruction reading a special register as
+ // the SYSm field is only 8 bits.
+ //
+ // FIXME: the _g and _nzcvqg versions are only allowed if the processor
+ // includes the DSP extension but that is not checked.
+ .Case("apsr", 0x800)
+ .Case("apsr_nzcvq", 0x800)
+ .Case("apsr_g", 0x400)
+ .Case("apsr_nzcvqg", 0xc00)
+ .Case("iapsr", 0x801)
+ .Case("iapsr_nzcvq", 0x801)
+ .Case("iapsr_g", 0x401)
+ .Case("iapsr_nzcvqg", 0xc01)
+ .Case("eapsr", 0x802)
+ .Case("eapsr_nzcvq", 0x802)
+ .Case("eapsr_g", 0x402)
+ .Case("eapsr_nzcvqg", 0xc02)
+ .Case("xpsr", 0x803)
+ .Case("xpsr_nzcvq", 0x803)
+ .Case("xpsr_g", 0x403)
+ .Case("xpsr_nzcvqg", 0xc03)
+ .Case("ipsr", 0x805)
+ .Case("epsr", 0x806)
+ .Case("iepsr", 0x807)
+ .Case("msp", 0x808)
+ .Case("psp", 0x809)
+ .Case("primask", 0x810)
+ .Case("basepri", 0x811)
+ .Case("basepri_max", 0x812)
+ .Case("faultmask", 0x813)
+ .Case("control", 0x814)
.Default(~0U);
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
- if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
+ if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
@@ -5216,8 +5304,8 @@ validateInstruction(MCInst &Inst,
case ARM::LDRD_POST:
case ARM::LDREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
@@ -5225,8 +5313,8 @@ validateInstruction(MCInst &Inst,
}
case ARM::STRD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
@@ -5236,8 +5324,8 @@ validateInstruction(MCInst &Inst,
case ARM::STRD_POST:
case ARM::STREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
@@ -5315,6 +5403,16 @@ validateInstruction(MCInst &Inst,
"registers must be in range r0-r7");
break;
}
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need thumb2 (for the wide encoding), or we have an error.
+ if (!isThumbTwo() &&
+ Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ return Error(Operands[4]->getStartLoc(),
+ "source register must be the same as destination");
+ }
+ break;
+ }
}
return false;
@@ -6750,8 +6848,8 @@ processInstruction(MCInst &Inst,
case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
}
- unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
- if (Ammount == 32) Ammount = 0;
+ unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Amount == 32) Amount = 0;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0)); // Rd
if (isNarrow)
@@ -6759,7 +6857,7 @@ processInstruction(MCInst &Inst,
Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
TmpInst.addOperand(Inst.getOperand(1)); // Rn
if (newOpc != ARM::t2RRX)
- TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(MCOperand::CreateImm(Amount));
TmpInst.addOperand(Inst.getOperand(3)); // CondCode
TmpInst.addOperand(Inst.getOperand(4));
if (!isNarrow)
@@ -6809,6 +6907,9 @@ processInstruction(MCInst &Inst,
// A shift by zero is a plain MOVr, not a MOVsi.
unsigned Amt = Inst.getOperand(2).getImm();
unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+ // A shift by 32 should be encoded as 0 when permitted
+ if (Amt == 32 && (ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr))
+ Amt = 0;
unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
MCInst TmpInst;
TmpInst.setOpcode(Opc);
@@ -6985,6 +7086,16 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
return true;
}
+ case ARM::tADDrSP: {
+ // If the non-SP source operand and the destination operand are not the
+ // same, we need to use the 32-bit encoding if it's available.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
+ Inst.setOpcode(ARM::t2ADDrr);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ return true;
+ }
+ break;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
@@ -7154,7 +7265,9 @@ processInstruction(MCInst &Inst,
}
case ARM::MOVsi: {
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
- if (SOpc == ARM_AM::rrx) return false;
+ // rrx shifts and asr/lsr of #32 is encoded as 0
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ return false;
if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
// Shifting by zero is accepted as a vanilla 'MOVr'
MCInst TmpInst;
@@ -7188,7 +7301,9 @@ processInstruction(MCInst &Inst,
case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
}
// If the shift is by zero, use the non-shifted instruction definition.
- if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) {
+ // The exception is for right shifts, where 0 == 32
+ if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0 &&
+ !(SOpc == ARM_AM::lsr || SOpc == ARM_AM::asr)) {
MCInst TmpInst;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0));
@@ -7207,9 +7322,7 @@ processInstruction(MCInst &Inst,
// The mask bits for all but the first condition are represented as
// the low bit of the condition code value implies 't'. We currently
// always have 1 implies 't', so XOR toggle the bits if the low bit
- // of the condition code is zero. The encoding also expects the low
- // bit of the condition to be encoded as bit 4 of the mask operand,
- // so mask that in if needed
+ // of the condition code is zero.
MCOperand &MO = Inst.getOperand(1);
unsigned Mask = MO.getImm();
unsigned OrigMask = Mask;
@@ -7218,8 +7331,7 @@ processInstruction(MCInst &Inst,
assert(Mask && TZ <= 3 && "illegal IT mask value!");
for (unsigned i = 3; i != TZ; --i)
Mask ^= 1 << i;
- } else
- Mask |= 0x10;
+ }
MO.setImm(Mask);
// Set up the IT block state according to the IT instruction we just
@@ -7231,6 +7343,86 @@ processInstruction(MCInst &Inst,
ITState.FirstCond = true;
break;
}
+ case ARM::t2LSLrr:
+ case ARM::t2LSRrr:
+ case ARM::t2ASRrr:
+ case ARM::t2SBCrr:
+ case ARM::t2RORrr:
+ case ARM::t2BICrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
+ !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLrr: NewOpc = ARM::tLSLrr; break;
+ case ARM::t2LSRrr: NewOpc = ARM::tLSRrr; break;
+ case ARM::t2ASRrr: NewOpc = ARM::tASRrr; break;
+ case ARM::t2SBCrr: NewOpc = ARM::tSBC; break;
+ case ARM::t2RORrr: NewOpc = ARM::tROR; break;
+ case ARM::t2BICrr: NewOpc = ARM::tBIC; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::t2ANDrr:
+ case ARM::t2EORrr:
+ case ARM::t2ADCrr:
+ case ARM::t2ORRrr:
+ {
+ // Assemblers should use the narrow encodings of these instructions when permissible.
+ // These instructions are special in that they are commutable, so shorter encodings
+ // are available more often.
+ if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg())) &&
+ (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
+ Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
+ ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
+ !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2ADCrr: NewOpc = ARM::tADC; break;
+ case ARM::t2ANDrr: NewOpc = ARM::tAND; break;
+ case ARM::t2EORrr: NewOpc = ARM::tEOR; break;
+ case ARM::t2ORRrr: NewOpc = ARM::tORR; break;
+ }
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ } else {
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ }
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
}
return false;
}
@@ -7277,6 +7469,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
+static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -7317,9 +7510,21 @@ MatchAndEmitInstruction(SMLoc IDLoc,
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
return false;
- case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
- return true;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing (Thumb vs. ARM, e.g.).
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
@@ -7336,7 +7541,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(IDLoc, "invalid instruction",
((ARMOperand*)Operands[0])->getLocRange());
case Match_ConversionFail:
- // The converter function will have already emited a diagnostic.
+ // The converter function will have already emitted a diagnostic.
return true;
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
@@ -7346,6 +7551,11 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_15: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ }
}
llvm_unreachable("Implement any new match types added!");
@@ -7582,5 +7792,6 @@ extern "C" void LLVMInitializeARMAsmParser() {
}
#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9a2aab5304ab..ac916ccaed63 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -49,6 +49,8 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
+add_dependencies(LLVMARMCodeGen intrinsics_gen)
+
# workaround for hanging compilation on MSVC9, 10
if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 912935db17ac..e47bf66e3afe 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -24,12 +24,66 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <vector>
using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+ // Handles the condition code status of instructions in IT blocks
+ class ITStatus
+ {
+ public:
+ // Returns the condition code for instruction in IT block
+ unsigned getITCC() {
+ unsigned CC = ARMCC::AL;
+ if (instrInITBlock())
+ CC = ITStates.back();
+ return CC;
+ }
+
+ // Advances the IT block state to the next T or E
+ void advanceITState() {
+ ITStates.pop_back();
+ }
+
+ // Returns true if the current instruction is in an IT block
+ bool instrInITBlock() {
+ return !ITStates.empty();
+ }
+
+ // Returns true if current instruction is the last instruction in an IT block
+ bool instrLastInITBlock() {
+ return ITStates.size() == 1;
+ }
+
+ // Called when decoding an IT instruction. Sets the IT state for the following
+ // instructions that for the IT block. Firstcond and Mask correspond to the
+ // fields in the IT instruction encoding.
+ void setITState(char Firstcond, char Mask) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned CondBit0 = Firstcond & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ // push condition codes onto the stack the correct order for the pops
+ for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITStates.push_back(CCBits);
+ else
+ ITStates.push_back(CCBits ^ 1);
+ }
+ ITStates.push_back(CCBits);
+ }
+
+ private:
+ std::vector<unsigned char> ITStates;
+ };
+}
+
+namespace {
/// ARMDisassembler - ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
@@ -78,7 +132,7 @@ public:
/// getEDInfo - See MCDisassembler.
const EDInstInfo *getEDInfo() const;
private:
- mutable std::vector<unsigned> ITBlock;
+ mutable ITStatus ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(MCInst&) const;
};
@@ -549,7 +603,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// These can often be values in a literal pool near the Address of the
/// instruction. The Address of the instruction and its immediate Value are
/// used as a possible literal pool entry. The SymbolLookUp call back will
-/// return the name of a symbol referenced by the the literal pool's entry if
+/// return the name of a symbol referenced by the literal pool's entry if
/// the referenced address is that of a symbol. Or it will return a pointer to
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
@@ -612,7 +666,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
case ARM::tSETEND:
// Some instructions (mostly conditional branches) are not
// allowed in IT blocks.
- if (!ITBlock.empty())
+ if (ITBlock.instrInITBlock())
S = SoftFail;
else
return Success;
@@ -623,7 +677,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
case ARM::t2TBH:
// Some instructions (mostly unconditional branches) can
// only appears at the end of, or outside of, an IT.
- if (ITBlock.size() > 1)
+ if (ITBlock.instrInITBlock() && !ITBlock.instrLastInITBlock())
S = SoftFail;
break;
default:
@@ -633,13 +687,11 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// If we're in an IT block, base the predicate on that. Otherwise,
// assume a predicate of AL.
unsigned CC;
- if (!ITBlock.empty()) {
- CC = ITBlock.back();
- if (CC == 0xF)
- CC = ARMCC::AL;
- ITBlock.pop_back();
- } else
+ CC = ITBlock.getITCC();
+ if (CC == 0xF)
CC = ARMCC::AL;
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
@@ -674,11 +726,9 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// context as a post-pass.
void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
unsigned CC;
- if (!ITBlock.empty()) {
- CC = ITBlock.back();
- ITBlock.pop_back();
- } else
- CC = ARMCC::AL;
+ CC = ITBlock.getITCC();
+ if (ITBlock.instrInITBlock())
+ ITBlock.advanceITState();
const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
MCInst::iterator I = MI.begin();
@@ -726,7 +776,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
if (result) {
Size = 2;
- bool InITBlock = !ITBlock.empty();
+ bool InITBlock = ITBlock.instrInITBlock();
Check(result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
return result;
@@ -739,7 +789,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Nested IT blocks are UNPREDICTABLE. Must be checked before we add
// the Thumb predicate.
- if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty())
+ if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
result = MCDisassembler::SoftFail;
Check(result, AddThumbPredicate(MI));
@@ -749,21 +799,9 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// to the subsequent instructions.
if (MI.getOpcode() == ARM::t2IT) {
- // (3 - the number of trailing zeros) is the number of then / else.
- unsigned firstcond = MI.getOperand(0).getImm();
+ unsigned Firstcond = MI.getOperand(0).getImm();
unsigned Mask = MI.getOperand(1).getImm();
- unsigned CondBit0 = Mask >> 4 & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
- bool T = ((Mask >> Pos) & 1) == CondBit0;
- if (T)
- ITBlock.insert(ITBlock.begin(), firstcond);
- else
- ITBlock.insert(ITBlock.begin(), firstcond ^ 1);
- }
-
- ITBlock.push_back(firstcond);
+ ITBlock.setITState(Firstcond, Mask);
}
return result;
@@ -783,7 +821,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
- bool InITBlock = ITBlock.size();
+ bool InITBlock = ITBlock.instrInITBlock();
Check(result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
return result;
@@ -1186,8 +1224,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 4);
- unsigned regs = Val & 0xFF;
+ unsigned Vd = fieldFromInstruction32(Val, 8, 5);
+ unsigned regs = fieldFromInstruction32(Val, 0, 8);
if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1203,8 +1241,10 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 4);
- unsigned regs = (Val & 0xFF) / 2;
+ unsigned Vd = fieldFromInstruction32(Val, 8, 5);
+ unsigned regs = fieldFromInstruction32(Val, 0, 8);
+
+ regs = regs >> 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2976,7 +3016,7 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
return MCDisassembler::Success;
@@ -3111,9 +3151,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- int imm = Val & 0xFF;
- if (!(Val & 0x100)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ if (Val == 0)
+ Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+ else {
+ int imm = Val & 0xFF;
+
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ }
return MCDisassembler::Success;
}
@@ -3258,9 +3303,9 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(ARM::SP));
} else if (Inst.getOpcode() == ARM::tADDspr) {
unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
@@ -3299,10 +3344,25 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
+ // Val is passed in as S:J1:J2:imm10H:imm10L:'0'
+ // Note only one trailing zero not two. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with two trailing zeros as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:'00', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
if (!tryAddingSymbolicOperand(Address,
- (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
+ (Address & ~2u) + imm32 + 4,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(imm32));
return MCDisassembler::Success;
}
@@ -3408,35 +3468,39 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<9>(Val << 1)));
return MCDisassembler::Success;
}
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ // Val is passed in as S:J1:J2:imm10:imm11
+ // Note no trailing zero after imm11. Also the J1 and J2 values are from
+ // the encoded instruction. So here change to I1 and I2 values via:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = (Val >> 23) & 1;
+ unsigned J1 = (Val >> 22) & 1;
+ unsigned J2 = (Val >> 21) & 1;
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+ int imm32 = SignExtend32<25>(tmp << 1);
+
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(imm32));
return MCDisassembler::Success;
}
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- switch (Val) {
- default:
+ if (Val & ~0xf)
return MCDisassembler::Fail;
- case 0xF: // SY
- case 0xE: // ST
- case 0xB: // ISH
- case 0xA: // ISHST
- case 0x7: // NSH
- case 0x6: // NSHST
- case 0x3: // OSH
- case 0x2: // OSHST
- break;
- }
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
@@ -4128,9 +4192,9 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4154,9 +4218,9 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4179,19 +4243,14 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction16(Insn, 4, 4);
- // The InstPrinter needs to have the low bit of the predicate in
- // the mask operand to be able to print it properly.
- unsigned mask = fieldFromInstruction16(Insn, 0, 5);
+ unsigned mask = fieldFromInstruction16(Insn, 0, 4);
if (pred == 0xF) {
pred = 0xE;
S = MCDisassembler::SoftFail;
}
- if ((mask & 0xF) == 0) {
- // Preserve the high bit of the mask, which is the low bit of
- // the predicate.
- mask &= 0x10;
+ if (mask == 0x0) {
mask |= 0x8;
S = MCDisassembler::SoftFail;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index cbd81c11a45b..8b9109ec9868 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -52,6 +52,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
unsigned Opcode = MI->getOpcode();
+ // Check for HINT instructions w/ canonical names.
+ if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ switch (MI->getOperand(0).getImm()) {
+ case 0: O << "\tnop"; break;
+ case 1: O << "\tyield"; break;
+ case 2: O << "\twfe"; break;
+ case 3: O << "\twfi"; break;
+ case 4: O << "\tsev"; break;
+ default:
+ // Anything else should just print normally.
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ printPredicateOperand(MI, 1, O);
+ if (Opcode == ARM::t2HINT)
+ O << ".w";
+ printAnnotation(O, Annot);
+ return;
+ }
+
// Check for MOVs and print canonical forms, instead.
if (Opcode == ARM::MOVsr) {
// FIXME: Thumb variants?
@@ -426,9 +447,13 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
return;
}
- if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ //If the op is sub we have to print the immediate even if it is 0
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
+
+ if (ImmOffs || (op == ARM_AM::sub))
O << ", #"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ARM_AM::getAddrOpcStr(op)
<< ImmOffs;
O << ']';
}
@@ -643,22 +668,50 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned Mask = Op.getImm() & 0xf;
if (getAvailableFeatures() & ARM::FeatureMClass) {
- switch (Op.getImm()) {
+ unsigned SYSm = Op.getImm();
+ unsigned Opcode = MI->getOpcode();
+ // For reads of the special registers ignore the "mask encoding" bits
+ // which are only for writes.
+ if (Opcode == ARM::t2MRS_M)
+ SYSm &= 0xff;
+ switch (SYSm) {
default: llvm_unreachable("Unexpected mask value!");
- case 0: O << "apsr"; return;
- case 1: O << "iapsr"; return;
- case 2: O << "eapsr"; return;
- case 3: O << "xpsr"; return;
- case 5: O << "ipsr"; return;
- case 6: O << "epsr"; return;
- case 7: O << "iepsr"; return;
- case 8: O << "msp"; return;
- case 9: O << "psp"; return;
- case 16: O << "primask"; return;
- case 17: O << "basepri"; return;
- case 18: O << "basepri_max"; return;
- case 19: O << "faultmask"; return;
- case 20: O << "control"; return;
+ case 0:
+ case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr
+ case 0x400: O << "apsr_g"; return;
+ case 0xc00: O << "apsr_nzcvqg"; return;
+ case 1:
+ case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr
+ case 0x401: O << "iapsr_g"; return;
+ case 0xc01: O << "iapsr_nzcvqg"; return;
+ case 2:
+ case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr
+ case 0x402: O << "eapsr_g"; return;
+ case 0xc02: O << "eapsr_nzcvqg"; return;
+ case 3:
+ case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
+ case 0x403: O << "xpsr_g"; return;
+ case 0xc03: O << "xpsr_nzcvqg"; return;
+ case 5:
+ case 0x805: O << "ipsr"; return;
+ case 6:
+ case 0x806: O << "epsr"; return;
+ case 7:
+ case 0x807: O << "iepsr"; return;
+ case 8:
+ case 0x808: O << "msp"; return;
+ case 9:
+ case 0x809: O << "psp"; return;
+ case 0x10:
+ case 0x810: O << "primask"; return;
+ case 0x11:
+ case 0x811: O << "basepri"; return;
+ case 0x12:
+ case 0x812: O << "basepri_max"; return;
+ case 0x13:
+ case 0x813: O << "faultmask"; return;
+ case 0x14:
+ case 0x814: O << "control"; return;
}
}
@@ -739,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
+
+ int32_t OffImm = (int32_t)MO.getImm();
+
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+}
+
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "#" << MI->getOperand(OpNum).getImm() * 4;
@@ -754,7 +826,8 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned Mask = MI->getOperand(OpNum).getImm();
- unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+ unsigned CondBit0 = Firstcond & 1;
unsigned NumTZ = CountTrailingZeros_32(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
@@ -899,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "[" << getRegisterName(MO1.getReg());
- int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ int32_t OffImm = (int32_t)MO2.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm < 0)
- O << ", #-" << -OffImm * 4;
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm * 4;
+ O << ", #" << OffImm;
O << "]";
}
@@ -936,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm != 0) {
- O << ", ";
- if (OffImm < 0)
- O << "#-" << -OffImm * 4;
- else if (OffImm > 0)
- O << "#" << OffImm * 4;
- }
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 8acb7eef019b..73d7bfd28502 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -73,6 +73,7 @@ public:
void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index d10bfc104a3c..ac6ce642dfa9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -12,6 +12,7 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
@@ -84,7 +85,8 @@ public:
{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
{ "fixup_arm_movt_hi16", 0, 20, 0 },
@@ -110,32 +112,7 @@ public:
void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
- const MCSymbolRefExpr *A = Target.getSymA();
- // Some fixups to thumb function symbols need the low bit (thumb bit)
- // twiddled.
- if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
- (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
- if (A) {
- const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
- if (Asm.isThumbFunc(&Sym))
- Value |= 1;
- }
- }
- // We must always generate a relocation for BL/BLX instructions if we have
- // a symbol to reference, as the linker relies on knowing the destination
- // symbol's thumb-ness to get interworking right.
- if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
- (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
- IsResolved = false;
- }
+ bool &IsResolved);
bool mayNeedRelaxation(const MCInst &Inst) const;
@@ -269,7 +246,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ MCContext *Ctx = NULL) {
+ unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
@@ -322,7 +301,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
Value = -Value;
isAdd = false;
}
- assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+ if (Ctx && Value >= 4096)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10,
@@ -345,8 +325,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
Value = -Value;
opc = 2; // 0b0010
}
- assert(ARM_AM::getSOImmVal(Value) != -1 &&
- "Out of range pc-relative fixup value!");
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
// Encode the immediate and shift the opcode into place.
return ARM_AM::getSOImmVal(Value) | (opc << 21);
}
@@ -414,39 +394,65 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
return swapped;
}
case ARM::fixup_arm_thumb_bl: {
- // The value doesn't encode the low bit (always zero) and is offset by
- // four. The value is encoded into disjoint bit positions in the destination
- // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
- //
- // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
- //
- // Note that the halfwords are stored high first, low second; so we need
- // to transpose the fixup value here to map properly.
- unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0;
- uint32_t Binary = 0;
- Value = 0x3fffff & ((Value - 4) >> 1);
- Binary = (Value & 0x7ff) << 16; // Low imm11 value.
- Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
- Binary |= isNeg << 10; // Sign bit.
- return Binary;
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 4) >> 1;
+ uint32_t signBit = (offset & 0x800000) >> 23;
+ uint32_t I1Bit = (offset & 0x400000) >> 22;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x200000) >> 21;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
+ uint32_t imm11Bits = (offset & 0x000007FF);
+
+ uint32_t Binary = 0;
+ uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ (uint16_t)imm11Bits);
+ Binary |= secondHalf << 16;
+ Binary |= firstHalf;
+ return Binary;
+
}
case ARM::fixup_arm_thumb_blx: {
- // The value doesn't encode the low two bits (always zero) and is offset by
- // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
- // positions in the destination opcode. x = unchanged, I = immediate value
- // bit, S = sign extension bit, 0 = zero.
- //
- // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
- //
- // Note that the halfwords are stored high first, low second; so we need
- // to transpose the fixup value here to map properly.
- unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0;
- uint32_t Binary = 0;
- Value = 0xfffff & ((Value - 2) >> 2);
- Binary = (Value & 0x3ff) << 17; // Low imm10L value.
- Binary |= (Value & 0xffc00) >> 10; // High imm10H value.
- Binary |= isNeg << 10; // Sign bit.
- return Binary;
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
+ // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
+ // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // J = either J1 or J2 bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ uint32_t offset = (Value - 2) >> 2;
+ uint32_t signBit = (offset & 0x400000) >> 22;
+ uint32_t I1Bit = (offset & 0x200000) >> 21;
+ uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+ uint32_t I2Bit = (offset & 0x100000) >> 20;
+ uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+ uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
+ uint32_t imm10LBits = (offset & 0x3FF);
+
+ uint32_t Binary = 0;
+ uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ ((uint16_t)imm10LBits) << 1);
+ Binary |= secondHalf << 16;
+ Binary |= firstHalf;
+ return Binary;
}
case ARM::fixup_arm_thumb_cp:
// Offset by 4, and don't encode the low two bits. Two bytes of that
@@ -473,7 +479,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ if (Ctx && Value >= 256)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
return Value | (isAdd << 23);
}
@@ -491,7 +498,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ if (Ctx && Value >= 256)
+ Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -507,6 +515,43 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
}
+void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ // Some fixups to thumb function symbols need the low bit (thumb bit)
+ // twiddled.
+ if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ if (A) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Asm.isThumbFunc(&Sym))
+ Value |= 1;
+ }
+ }
+ // We must always generate a relocation for BL/BLX instructions if we have
+ // a symbol to reference, as the linker relies on knowing the destination
+ // symbol's thumb-ness to get interworking right.
+ if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ IsResolved = false;
+
+ // Try to get the encoded value for the fixup as-if we're mapping it into
+ // the instruction. This allows adjustFixupValue() to issue a diagnostic
+ // if the value aren't invalid.
+ (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
+}
+
namespace {
// FIXME: This should be in a separate file.
@@ -530,7 +575,7 @@ public:
void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = 4; // FIXME: 2 for Thumb
- Value = adjustFixupValue(Fixup.getKind(), Value);
+ Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
@@ -615,7 +660,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup.getKind(), Value);
+ Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ae11be888137..de48a0e0f30a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -120,14 +120,22 @@ namespace ARM_MB {
// The Memory Barrier Option constants map directly to the 4-bit encoding of
// the option field for memory barrier operations.
enum MemBOpt {
- SY = 15,
- ST = 14,
- ISH = 11,
- ISHST = 10,
- NSH = 7,
- NSHST = 6,
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ OSHST = 2,
OSH = 3,
- OSHST = 2
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ NSHST = 6,
+ NSH = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ ISHST = 10,
+ ISH = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ ST = 14,
+ SY = 15
};
inline static const char *MemBOptToString(unsigned val) {
@@ -135,92 +143,24 @@ namespace ARM_MB {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_0: return "#0x0";
}
}
} // namespace ARM_MB
-/// getARMRegisterNumbering - Given the enum value for some register, e.g.
-/// ARM::LR, return the number that it corresponds to (e.g. 14).
-inline static unsigned getARMRegisterNumbering(unsigned Reg) {
- using namespace ARM;
- switch (Reg) {
- default:
- llvm_unreachable("Unknown ARM register!");
- case R0: case S0: case D0: case Q0: return 0;
- case R1: case S1: case D1: case Q1: return 1;
- case R2: case S2: case D2: case Q2: return 2;
- case R3: case S3: case D3: case Q3: return 3;
- case R4: case S4: case D4: case Q4: return 4;
- case R5: case S5: case D5: case Q5: return 5;
- case R6: case S6: case D6: case Q6: return 6;
- case R7: case S7: case D7: case Q7: return 7;
- case R8: case S8: case D8: case Q8: return 8;
- case R9: case S9: case D9: case Q9: return 9;
- case R10: case S10: case D10: case Q10: return 10;
- case R11: case S11: case D11: case Q11: return 11;
- case R12: case S12: case D12: case Q12: return 12;
- case SP: case S13: case D13: case Q13: return 13;
- case LR: case S14: case D14: case Q14: return 14;
- case PC: case S15: case D15: case Q15: return 15;
-
- case S16: case D16: return 16;
- case S17: case D17: return 17;
- case S18: case D18: return 18;
- case S19: case D19: return 19;
- case S20: case D20: return 20;
- case S21: case D21: return 21;
- case S22: case D22: return 22;
- case S23: case D23: return 23;
- case S24: case D24: return 24;
- case S25: case D25: return 25;
- case S26: case D26: return 26;
- case S27: case D27: return 27;
- case S28: case D28: return 28;
- case S29: case D29: return 29;
- case S30: case D30: return 30;
- case S31: case D31: return 31;
-
- // Composite registers use the regnum of the first register in the list.
- /* Q0 */ case D0_D2: return 0;
- case D1_D2: case D1_D3: return 1;
- /* Q1 */ case D2_D4: return 2;
- case D3_D4: case D3_D5: return 3;
- /* Q2 */ case D4_D6: return 4;
- case D5_D6: case D5_D7: return 5;
- /* Q3 */ case D6_D8: return 6;
- case D7_D8: case D7_D9: return 7;
- /* Q4 */ case D8_D10: return 8;
- case D9_D10: case D9_D11: return 9;
- /* Q5 */ case D10_D12: return 10;
- case D11_D12: case D11_D13: return 11;
- /* Q6 */ case D12_D14: return 12;
- case D13_D14: case D13_D15: return 13;
- /* Q7 */ case D14_D16: return 14;
- case D15_D16: case D15_D17: return 15;
- /* Q8 */ case D16_D18: return 16;
- case D17_D18: case D17_D19: return 17;
- /* Q9 */ case D18_D20: return 18;
- case D19_D20: case D19_D21: return 19;
- /* Q10 */ case D20_D22: return 20;
- case D21_D22: case D21_D23: return 21;
- /* Q11 */ case D22_D24: return 22;
- case D23_D24: case D23_D25: return 23;
- /* Q12 */ case D24_D26: return 24;
- case D25_D26: case D25_D27: return 25;
- /* Q13 */ case D26_D28: return 26;
- case D27_D28: case D27_D29: return 27;
- /* Q14 */ case D28_D30: return 28;
- case D29_D30: case D29_D31: return 29;
- /* Q15 */
- }
-}
-
/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
///
static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index aa649badaf82..7d6acbc5cfda 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -178,9 +178,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
break;
}
break;
- case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_blx:
- case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
switch (Modifier) {
case MCSymbolRefExpr::VK_ARM_PLT:
Type = ELF::R_ARM_PLT32;
@@ -192,6 +191,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
break;
case ARM::fixup_arm_condbl:
case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
@@ -252,10 +252,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_thumb_cp:
case ARM::fixup_arm_thumb_br:
llvm_unreachable("Unimplemented");
- case ARM::fixup_arm_uncondbranch:
- Type = ELF::R_ARM_CALL;
- break;
case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 03e8d5f83ae7..d32805e522a3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -22,40 +22,14 @@ EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
cl::init(false));
-static const char *const arm_asm_table[] = {
- "{r0}", "r0",
- "{r1}", "r1",
- "{r2}", "r2",
- "{r3}", "r3",
- "{r4}", "r4",
- "{r5}", "r5",
- "{r6}", "r6",
- "{r7}", "r7",
- "{r8}", "r8",
- "{r9}", "r9",
- "{r10}", "r10",
- "{r11}", "r11",
- "{r12}", "r12",
- "{r13}", "r13",
- "{r14}", "r14",
- "{lr}", "lr",
- "{sp}", "sp",
- "{ip}", "ip",
- "{fp}", "fp",
- "{sl}", "sl",
- "{memory}", "memory",
- "{cc}", "cc",
- 0,0
-};
-
void ARMMCAsmInfoDarwin::anchor() { }
ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
- AsmTransCBE = arm_asm_table;
Data64bitsDirective = 0;
CommentString = "@";
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
+ UseDataRegionDirectives = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 10d1c48876ed..94f1082b5f6d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -18,6 +18,7 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter {
void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : MCII(mcii), STI(sti) {
+ : MCII(mcii), STI(sti), CTX(ctx) {
}
~ARMMCCodeEmitter() {}
@@ -336,6 +338,7 @@ public:
} // end anonymous namespace
MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, STI, Ctx);
@@ -404,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
// Q registers are encoded as 2x their register number.
switch (Reg) {
@@ -433,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- Reg = getARMRegisterNumbering(MO.getReg());
+ Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
int32_t SImm = MO1.getImm();
bool isAdd = true;
@@ -640,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
-/// target.
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
uint32_t ARMMCCodeEmitter::
getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -651,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
Fixups);
int32_t offset = MO.getImm();
uint32_t Val = 0x2000;
- if (offset < 0) {
+
+ if (offset == INT32_MIN) {
+ Val = 0x1000;
+ offset = 0;
+ } else if (offset < 0) {
Val = 0x1000;
offset *= -1;
}
- Val |= offset;
+
+ int SoImmVal = ARM_AM::getSOImmVal(offset);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ Val |= SoImmVal;
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -669,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
Fixups);
int32_t Val = MO.getImm();
- if (Val < 0) {
+ if (Val == INT32_MIN)
+ Val = 0x1000;
+ else if (Val < 0) {
Val *= -1;
Val |= 0x1000;
}
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -698,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO1 = MI.getOperand(OpIdx);
const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO1.getReg());
- unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
return (Rm << 3) | Rn;
}
@@ -715,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm12 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -795,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -831,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// {7-0} = imm8
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm8 = MO1.getImm();
return (Reg << 8) | Imm8;
}
@@ -861,11 +874,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
// Handle :upper16: and :lower16: assembly prefixes.
const MCExpr *E = MO.getExpr();
+ MCFixupKind Kind;
if (E->getKind() == MCExpr::Target) {
const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
E = ARM16Expr->getSubExpr();
- MCFixupKind Kind;
switch (ARM16Expr->getKind()) {
default: llvm_unreachable("Unsupported ARMFixup");
case ARMMCExpr::VK_ARM_HI16:
@@ -891,9 +904,21 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
}
Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
return 0;
- };
-
- llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+ }
+ // If the expression doesn't have :upper16: or :lower16: on it,
+ // it's just a plain immediate expression, and those evaluate to
+ // the lower 16 bits of the expression regardless of whether
+ // we have a movt or a movw.
+ if (!isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+ return 0;
}
uint32_t ARMMCCodeEmitter::
@@ -902,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
- unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
@@ -933,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
// {12} isAdd
// {11-0} imm12/Rm
const MCOperand &MO = MI.getOperand(OpIdx);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
Binary |= Rn << 14;
return Binary;
@@ -956,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
Binary <<= 7; // Shift amount is bits [11:7]
Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
- Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
}
return Binary | (isAdd << 12) | (isReg << 13);
}
@@ -969,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
bool isAdd = MO1.getImm() != 0;
- return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
}
uint32_t ARMMCCodeEmitter::
@@ -987,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
return Imm8 | (isAdd << 8) | (isImm << 9);
}
@@ -1005,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
if (!MO.isReg()) {
- unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
@@ -1015,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
++MCNumCPRelocations;
return (Rn << 9) | (1 << 13);
}
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
bool isImm = MO1.getReg() == 0;
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO1.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
}
@@ -1050,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm5 = MO1.getImm();
return ((Imm5 & 0x1f) << 3) | Rn;
}
@@ -1077,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false; // 'U' bit is handled as part of the fixup.
@@ -1123,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1148,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
// Encode the shift operation Rs.
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
unsigned ARMMCCodeEmitter::
@@ -1167,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1192,8 +1217,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
// Encode shift_imm bit[11:7].
Binary |= SBits << 4;
unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
- assert(Offset && "Offset must be in range 1-32!");
- if (Offset == 32) Offset = 0;
+ assert(Offset < 32 && "Offset must be in range 0-31!");
return Binary | (Offset << 7);
}
@@ -1207,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
// Encoded as [Rn, Rm, imm].
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
Value <<= 4;
- Value |= getARMRegisterNumbering(MO2.getReg());
+ Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
Value <<= 2;
Value |= MO3.getImm();
@@ -1223,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
const MCOperand &MO2 = MI.getOperand(OpNum+1);
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
// Even though the immediate is 8 bits long, we need 9 bits in order
// to represent the (inverse of the) sign bit.
@@ -1285,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1341,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
if (SPRRegs || DPRRegs) {
// VLDM/VSTM
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
Binary |= (RegNo & 0x1f) << 8;
if (SPRRegs)
@@ -1350,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
Binary |= NumRegs * 2;
} else {
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
@@ -1366,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1389,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1415,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1434,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return getARMRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index e3512cda3ae3..5df84c8b103a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -35,7 +35,7 @@
using namespace llvm;
-std::string ARM_MC::ParseARMTriple(StringRef TT) {
+std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
unsigned Len = TT.size();
@@ -51,27 +51,48 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
Idx = 6;
}
+ bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
if (Idx) {
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ if (NoCPU)
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
} else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
- // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
- } else
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
- ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ if (NoCPU)
+ // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ } else {
+ // v7 CPUs have lots of different feature sets. If no CPU is specified,
+ // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
+ // the "minimum" feature set and use CPU string to figure out the exact
+ // features.
+ if (NoCPU)
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ }
} else if (SubVer == '6') {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
ARMArchFeature = "+v6t2";
- else if (Len >= Idx+2 && TT[Idx+1] == 'm')
- // v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6t2,+noarm,+mclass";
- else
+ else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ if (NoCPU)
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6,+noarm,+mclass";
+ else
+ ARMArchFeature = "+v6";
+ } else
ARMArchFeature = "+v6";
} else if (SubVer == '5') {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
@@ -94,7 +115,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
- std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
if (!FS.empty()) {
if (!ArchFS.empty())
ArchFS = ArchFS + "," + FS.str();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 88472d7ffc3f..510302d4b3d0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
class Target;
@@ -31,7 +32,7 @@ class raw_ostream;
extern Target TheARMTarget, TheThumbTarget;
namespace ARM_MC {
- std::string ParseARMTriple(StringRef TT);
+ std::string ParseARMTriple(StringRef TT, StringRef CPU);
/// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
/// This is exposed so Asm parser, etc. do not need to go through
@@ -41,6 +42,7 @@ namespace ARM_MC {
}
MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 8057cb6687a6..a51e0fa3fbc5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -190,7 +190,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// 0 - arm instructions
// 1 - thumb instructions
// the other half of the relocated expression is in the following pair
- // relocation entry in the the low 16 bits of r_address field.
+ // relocation entry in the low 16 bits of r_address field.
unsigned ThumbBit = 0;
unsigned MovtBit = 0;
switch ((unsigned)Fixup.getKind()) {
@@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
if (Type == macho::RIT_ARM_Half) {
- // The other-half value only gets populated for the movt relocation.
+ // The other-half value only gets populated for the movt and movw
+ // relocation entries.
uint32_t Value = 0;;
switch ((unsigned)Fixup.getKind()) {
default: break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Value = (FixedValue >> 16) & 0xffff;
+ break;
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- Value = FixedValue;
+ Value = FixedValue & 0xffff;
break;
}
macho::RelocationEntry MREPair;
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 28998361c7a0..ad60e3282e68 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -220,7 +220,9 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID1 = TII->get(MulOpc);
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
- unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ unsigned TmpReg = MRI->createVirtualRegister(
+ TII->getRegClass(MCID1, 0, TRI, MF));
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 3eddda812f84..57dc6cb88bed 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -710,3 +710,24 @@ targets, e.g., PPC, that share this behavior, it would be best to implement
this in a target-independent way: we should probably fold that (when using
"undefined at zero" semantics) to set the "defined at zero" bit and have
the code generator expand out the right code.
+
+
+//===---------------------------------------------------------------------===//
+
+Clean up the test/MC/ARM files to have more robust register choices.
+
+R0 should not be used as a register operand in the assembler tests as it's then
+not possible to distinguish between a correct encoding and a missing operand
+encoding, as zero is the default value for the binary encoder.
+e.g.,
+ add r0, r0 // bad
+ add r3, r5 // good
+
+Register operands should be distinct. That is, when the encoding does not
+require two syntactical operands to refer to the same register, two different
+registers should be used in the test so as to catch errors where the
+operands are swapped in the encoding.
+e.g.,
+ subs.w r1, r1, r1 // bad
+ subs.w r1, r2, r3 // good
+
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index e03e75815c7e..735b255759b7 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -53,11 +53,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert((RC == ARM::tGPRRegisterClass ||
+ assert((RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) && "Unknown regclass!");
- if (RC == ARM::tGPRRegisterClass ||
+ if (RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) {
DebugLoc DL;
@@ -81,11 +81,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert((RC == ARM::tGPRRegisterClass ||
+ assert((RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) && "Unknown regclass!");
- if (RC == ARM::tGPRRegisterClass ||
+ if (RC == &ARM::tGPRRegClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) {
DebugLoc DL;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index ef77bbd21a4e..a39b722caef5 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -49,13 +49,14 @@ const TargetRegisterClass*
Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
const {
if (ARM::tGPRRegClass.hasSubClassEq(RC))
- return ARM::tGPRRegisterClass;
+ return &ARM::tGPRRegClass;
return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
}
const TargetRegisterClass *
-Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
- return ARM::tGPRRegisterClass;
+Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ return &ARM::tGPRRegClass;
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -109,7 +110,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
unsigned LdReg = DestReg;
if (DestReg == ARM::SP) {
assert(BaseReg == ARM::SP && "Unexpected!");
- LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
}
if (NumBytes <= 255 && NumBytes >= 0)
@@ -693,7 +694,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
} else if (MI.mayStore()) {
- VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+ VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
if (Opcode == ARM::tSTRspi) {
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 69718424e735..f2e4b08f798e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -30,7 +30,8 @@ public:
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
- const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index ecb4c2f0e5da..d54aa935325c 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -24,8 +24,6 @@ STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
class Thumb2ITBlockPass : public MachineFunctionPass {
- bool PreRegAlloc;
-
public:
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
@@ -76,16 +74,14 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
unsigned Reg = LocalUses[i];
Uses.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
Uses.insert(*Subreg);
}
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
Defs.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
Defs.insert(*Subreg);
if (Reg == ARM::CPSR)
continue;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 8ab486b0c1bf..e9e20ddd8783 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -126,9 +126,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
- RC == ARM::GPRnopcRegisterClass) {
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -153,9 +153,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
- RC == ARM::GPRnopcRegisterClass) {
+ if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
+ RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
+ RC == &ARM::GPRnopcRegClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
-/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-/// two-addrss instruction inserted by two-address pass.
-void
-Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
- MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const {
- if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
- return;
-
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
- if (CC == ARMCC::AL || PredReg != ARM::CPSR)
- return;
-
- // Schedule the copy so it doesn't come between previous instructions
- // and UseMI which can form an IT block.
- unsigned SrcReg = SrcMI->getOperand(1).getReg();
- ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
- MachineBasicBlock *MBB = UseMI->getParent();
- MachineBasicBlock::iterator MBBI = SrcMI;
- unsigned NumInsts = 0;
- while (--MBBI != MBB->begin()) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
- if (!(NCC == CC || NCC == OCC) ||
- NMI->modifiesRegister(SrcReg, &TRI) ||
- NMI->modifiesRegister(ARM::CPSR, &TRI))
- break;
- if (++NumInsts == 4)
- // Too many in a row!
- return;
- }
-
- if (NumInsts) {
- MBB->remove(SrcMI);
- MBB->insert(++MBBI, SrcMI);
- }
-}
-
ARMCC::CondCodes
llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
unsigned Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 0911f8a597ce..2cdcd06e9350 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -57,11 +57,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
- /// two-addrss instruction inserted by two-address pass.
- void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index b5a397e61685..f18f491f4995 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -67,6 +67,7 @@ namespace {
{ ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
//{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 },
{ ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
{ ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
{ ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index cf4f796ec2fb..1f8ca8681c09 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -24,5 +24,7 @@ add_llvm_target(CellSPUCodeGen
SPUNopFiller.cpp
)
+add_dependencies(LLVMCellSPUCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
index 3e7e0b68e8e6..3bce9609bfef 100644
--- a/lib/Target/CellSPU/README.txt
+++ b/lib/Target/CellSPU/README.txt
@@ -37,6 +37,20 @@ to add 'spu' to configure's --enable-targets option, e.g.:
---------------------------------------------------------------------------
TODO:
+* In commit r142152 vector legalization was set to element promotion per
+ default. This breaks half vectors (e.g. v2i32) badly as they get element
+ promoted to much slower types (v2i64).
+
+* Many CellSPU specific codegen tests only grep & count the number of
+ instructions, not checking their place with FileCheck. There have also
+ been some commits that change the CellSPU checks, some of which might
+ have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU
+ assembly. (especially since about the time of r142152)
+
+* Some of the i64 math have huge tablegen rules, which sometime cause
+ tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics
+ should probably be done with libraries.
+
* Create a machine pass for performing dual-pipeline scheduling specifically
for CellSPU, and insert branch prediction instructions as needed.
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 14021fef05d9..03d5a9ae0c4c 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -301,7 +301,9 @@ bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'L': // Write second word of DImode reference.
// Verify that this operand has two consecutive registers.
if (!MI->getOperand(OpNo).isReg() ||
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
index 403d7ef1fd9e..67a83f16a649 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -30,12 +30,6 @@ using namespace llvm;
// very little right now.
//===----------------------------------------------------------------------===//
-SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
- TII(tii),
- EvenOdd(0)
-{
-}
-
/// Return the pipeline hazard type encountered or generated by this
/// instruction. Currently returns NoHazard.
///
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
index 675632cc7f13..30acaeaa36fb 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -24,12 +24,8 @@ class TargetInstrInfo;
/// SPUHazardRecognizer
class SPUHazardRecognizer : public ScheduleHazardRecognizer
{
-private:
- const TargetInstrInfo &TII;
- int EvenOdd;
-
public:
- SPUHazardRecognizer(const TargetInstrInfo &TII);
+ SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 062374127e2f..4e9fcd1bc765 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -77,12 +77,14 @@ namespace {
// Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy =
Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned,
+ false, false,
0, TLI.getLibcallCallingConv(LC),
/*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
Callee, Args, DAG, Op.getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo.first;
}
@@ -100,13 +102,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
// Set up the SPU's register classes:
- addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
- addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
- addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
- addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
- addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
- addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
- addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+ addRegisterClass(MVT::i8, &SPU::R8CRegClass);
+ addRegisterClass(MVT::i16, &SPU::R16CRegClass);
+ addRegisterClass(MVT::i32, &SPU::R32CRegClass);
+ addRegisterClass(MVT::i64, &SPU::R64CRegClass);
+ addRegisterClass(MVT::f32, &SPU::R32FPRegClass);
+ addRegisterClass(MVT::f64, &SPU::R64FPRegClass);
+ addRegisterClass(MVT::i128, &SPU::GPRCRegClass);
// SPU has no sign or zero extended loads for i1, i8, i16:
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
@@ -397,12 +399,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
- addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
- addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
- addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
- addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
- addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass);
+ addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass);
+ addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass);
+ addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass);
+ addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass);
+ addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass);
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1133,7 +1135,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// FIXME: allow for other calling conventions
CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
@@ -1263,14 +1265,19 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
}
SDValue
-SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
// CellSPU target does not yet support tail call optimization.
isTailCall = false;
@@ -1280,7 +1287,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// FIXME: allow for other calling conventions
CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
@@ -1441,7 +1448,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Now handle the return value(s)
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
@@ -1468,7 +1475,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
// If this is the first return lowered for this function, add the regs to the
@@ -3139,16 +3146,16 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'b': // R1-R31
case 'r': // R0-R31
if (VT == MVT::i64)
- return std::make_pair(0U, SPU::R64CRegisterClass);
- return std::make_pair(0U, SPU::R32CRegisterClass);
+ return std::make_pair(0U, &SPU::R64CRegClass);
+ return std::make_pair(0U, &SPU::R32CRegClass);
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, SPU::R32FPRegisterClass);
- else if (VT == MVT::f64)
- return std::make_pair(0U, SPU::R64FPRegisterClass);
+ return std::make_pair(0U, &SPU::R32FPRegClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SPU::R64FPRegClass);
break;
case 'v':
- return std::make_pair(0U, SPU::GPRCRegisterClass);
+ return std::make_pair(0U, &SPU::GPRCRegClass);
}
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index e3db7b2f1fbc..9f1599fa6fed 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -86,7 +86,6 @@ namespace llvm {
class SPUTargetLowering :
public TargetLowering
{
- int VarArgsFrameIndex; // FrameIndex for start of varargs area.
SPUTargetMachine &SPUTM;
public:
@@ -159,13 +158,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 759923d7bb42..b25a6397ec3a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -140,29 +140,27 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
+ const TargetRegisterInfo *TRI) const {
unsigned opc;
bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
- if (RC == SPU::GPRCRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
- } else if (RC == SPU::R64CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
- } else if (RC == SPU::R64FPRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
- } else if (RC == SPU::R32CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
- } else if (RC == SPU::R32FPRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
- } else if (RC == SPU::R16CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
- } else if (RC == SPU::R8CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
- } else if (RC == SPU::VECREGRegisterClass) {
- opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
- } else {
+ if (RC == &SPU::GPRCRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128;
+ else if (RC == &SPU::R64CRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
+ else if (RC == &SPU::R64FPRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
+ else if (RC == &SPU::R32CRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
+ else if (RC == &SPU::R32FPRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
+ else if (RC == &SPU::R16CRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16;
+ else if (RC == &SPU::R8CRegClass)
+ opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8;
+ else if (RC == &SPU::VECREGRegClass)
+ opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8;
+ else
llvm_unreachable("Unknown regclass!");
- }
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -175,29 +173,27 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
+ const TargetRegisterInfo *TRI) const {
unsigned opc;
bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
- if (RC == SPU::GPRCRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
- } else if (RC == SPU::R64CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
- } else if (RC == SPU::R64FPRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
- } else if (RC == SPU::R32CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
- } else if (RC == SPU::R32FPRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
- } else if (RC == SPU::R16CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
- } else if (RC == SPU::R8CRegisterClass) {
- opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
- } else if (RC == SPU::VECREGRegisterClass) {
- opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
- } else {
+ if (RC == &SPU::GPRCRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128;
+ else if (RC == &SPU::R64CRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
+ else if (RC == &SPU::R64FPRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
+ else if (RC == &SPU::R32CRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
+ else if (RC == &SPU::R32FPRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
+ else if (RC == &SPU::R16CRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16;
+ else if (RC == &SPU::R8CRegClass)
+ opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8;
+ else if (RC == &SPU::VECREGRegClass)
+ opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8;
+ else
llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
- }
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -340,11 +336,11 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
{
MachineBasicBlock::iterator J = MBB.end();
- for( int i=0; i<8; i++) {
- if( J == MBB.begin() ) return J;
- J--;
- }
- return J;
+ for( int i=0; i<8; i++) {
+ if( J == MBB.begin() ) return J;
+ J--;
+ }
+ return J;
}
unsigned
@@ -360,7 +356,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineInstrBuilder MIB;
//TODO: make a more accurate algorithm.
bool haveHBR = MBB.size()>8;
-
+
removeHBR(MBB);
MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
// Add a label just before the branch
@@ -382,7 +378,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
MIB.addSym(branchLabel);
MIB.addMBB(TBB);
- }
+ }
} else {
// Conditional branch
MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
@@ -392,7 +388,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
MIB.addSym(branchLabel);
MIB.addMBB(TBB);
- }
+ }
DEBUG(errs() << "Inserted one-way cond branch: ");
DEBUG((*MIB).dump());
@@ -410,7 +406,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
MIB.addSym(branchLabel);
MIB.addMBB(FBB);
- }
+ }
DEBUG(errs() << "Inserted conditional branch: ");
DEBUG((*MIB).dump());
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index f76ebd75bfef..117acd736aaa 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -3421,14 +3421,14 @@ let isCall = 1,
// Branch relative and set link: Used if we actually know that the target
// is within [-32768, 32767] bytes of the target
def BRSL:
- BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+ BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func),
"brsl\t$$lr, $func",
[(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
// Branch absolute and set link: Used if we actually know that the target
// is an absolute address
def BRASL:
- BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+ BranchSetLink<0b011001100, (outs), (ins calltarget:$func),
"brasl\t$$lr, $func",
[(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 1b2da5f50c81..e6c872d0bbb7 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -193,7 +193,8 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
-SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
+SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
return &SPU::R32CRegClass;
}
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index e5ab22422502..e9f9aba63a48 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -46,7 +46,7 @@ namespace llvm {
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
virtual const TargetRegisterClass *
- getPointerRegClass(unsigned Kind = 0) const;
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
/// After allocating this many registers, the allocator should feel
/// register pressure. The value is a somewhat random guess, based on the
@@ -63,6 +63,11 @@ namespace llvm {
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
{ return true; }
+ //! Enable tracking of liveness after register allocation, since register
+ // scavenging is enabled.
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
+ { return true; }
+
//! Return the reserved registers
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3b90261fe690..54764f133cc1 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -72,7 +72,7 @@ TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
bool SPUPassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createSPUISelDag(getSPUTargetMachine()));
+ addPass(createSPUISelDag(getSPUTargetMachine()));
return false;
}
@@ -85,9 +85,9 @@ bool SPUPassConfig::addPreEmitPass() {
(BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
"createTCESchedulerPass");
if (schedulerCreator != NULL)
- PM->add(schedulerCreator("cellspu"));
+ addPass(schedulerCreator("cellspu"));
//align instructions with nops/lnops for dual issue
- PM->add(createSPUNopFillerPass(getSPUTargetMachine()));
+ addPass(createSPUNopFillerPass(getSPUTargetMachine()));
return true;
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 69f0ff87eda0..c8e757becc72 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -130,6 +130,7 @@ namespace {
private:
void printLinkageType(GlobalValue::LinkageTypes LT);
void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM);
void printCallingConv(CallingConv::ID cc);
void printEscapedString(const std::string& str);
void printCFP(const ConstantFP* CFP);
@@ -325,6 +326,26 @@ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
}
}
+void CppWriter::printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM) {
+ switch (TLM) {
+ case GlobalVariable::NotThreadLocal:
+ Out << "GlobalVariable::NotThreadLocal";
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ Out << "GlobalVariable::GeneralDynamicTLSModel";
+ break;
+ case GlobalVariable::LocalDynamicTLSModel:
+ Out << "GlobalVariable::LocalDynamicTLSModel";
+ break;
+ case GlobalVariable::InitialExecTLSModel:
+ Out << "GlobalVariable::InitialExecTLSModel";
+ break;
+ case GlobalVariable::LocalExecTLSModel:
+ Out << "GlobalVariable::LocalExecTLSModel";
+ break;
+ }
+}
+
// printEscapedString - Print each character of the specified string, escaping
// it if it is not printable or if it is an escape char.
void CppWriter::printEscapedString(const std::string &Str) {
@@ -496,7 +517,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
Out << "Attrs.push_back(PAWI);";
nl(Out);
}
- Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ Out << name << "_PAL = AttrListPtr::get(Attrs);";
nl(Out);
out(); nl(Out);
Out << '}'; nl(Out);
@@ -996,7 +1017,9 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) {
}
if (GV->isThreadLocal()) {
printCppName(GV);
- Out << "->setThreadLocal(true);";
+ Out << "->setThreadLocalMode(";
+ printThreadLocalMode(GV->getThreadLocalMode());
+ Out << ");";
nl(Out);
}
if (is_inline) {
@@ -1105,7 +1128,7 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- const ConstantInt* CaseVal = i.getCaseValue();
+ const IntegersSubset CaseVal = i.getCaseValueEx();
const BasicBlock *BB = i.getCaseSuccessor();
Out << iName << "->addCase("
<< getOpName(CaseVal) << ", "
@@ -2078,7 +2101,9 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- bool DisableVerify) {
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 92bca6c3c770..9cbe7981a905 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -31,7 +31,9 @@ struct CPPTargetMachine : public TargetMachine {
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- bool DisableVerify);
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index af9e8136bf16..1f2d8accbb09 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -28,8 +28,12 @@ add_llvm_target(HexagonCodeGen
HexagonSubtarget.cpp
HexagonTargetMachine.cpp
HexagonTargetObjectFile.cpp
+ HexagonVLIWPacketizer.cpp
+ HexagonNewValueJump.cpp
)
+add_dependencies(LLVMHexagonCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 080832333671..45f857bab8c6 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -40,6 +40,9 @@ namespace llvm {
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonPacketizer();
+ FunctionPass *createHexagonNewValueJump();
+
/* TODO: object output.
MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
@@ -47,7 +50,8 @@ namespace llvm {
MCContext &Ctx);
*/
/* TODO: assembler input.
- TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &);
+ TargetAsmBackend *createHexagonAsmBackend(const Target &,
+ const std::string &);
*/
void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
HexagonAsmPrinter &AP);
@@ -67,7 +71,7 @@ namespace llvm {
// Normal instruction size (in bytes).
#define HEXAGON_INSTR_SIZE 4
-// Maximum number of words in a packet (in instructions).
+// Maximum number of words and instructions in a packet.
#define HEXAGON_PACKET_SIZE 4
#endif
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 4a50d1609308..451e56206e60 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -28,6 +28,8 @@ def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
"Hexagon v3">;
def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
"Hexagon v4">;
+def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
+ "Hexagon v5">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
@@ -45,13 +47,15 @@ def HexagonInstrInfo : InstrInfo;
// Hexagon processors supported.
//===----------------------------------------------------------------------===//
-class Proc<string Name, ProcessorItineraries Itin,
+class Proc<string Name, SchedMachineModel Model,
list<SubtargetFeature> Features>
- : Processor<Name, Itin, Features>;
+ : ProcessorModel<Name, Model, Features>;
+
+def : Proc<"hexagonv2", HexagonModel, [ArchV2]>;
+def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>;
+def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
-def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>;
-def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>;
-def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
// Hexagon Uses the MC printer for assembler output, so make sure the TableGen
// AsmWriter bits get associated with the correct class.
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 39bf45d2d773..5fa4740f2af3 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -13,11 +13,11 @@
//
//===----------------------------------------------------------------------===//
-
#define DEBUG_TYPE "asm-printer"
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
+#include "HexagonMCInst.h"
#include "HexagonTargetMachine.h"
#include "HexagonSubtarget.h"
#include "InstPrinter/HexagonInstPrinter.h"
@@ -77,8 +77,7 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
- default:
- assert(0 && "<unknown operand type>");
+ default: llvm_unreachable ("<unknown operand type>");
case MachineOperand::MO_Register:
O << HexagonInstPrinter::getRegisterName(MO.getReg());
return;
@@ -134,7 +133,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
case 'c': // Don't print "$" before a global var name or constant.
// Hexagon never has a prefix.
printOperand(MI, OpNo, OS);
@@ -196,10 +197,45 @@ void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
/// the current output stream.
///
void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- MCInst MCI;
-
- HexagonLowerToMC(MI, MCI, *this);
- OutStreamer.EmitInstruction(MCI);
+ if (MI->isBundle()) {
+ std::vector<const MachineInstr*> BundleMIs;
+
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator MII = MI;
+ ++MII;
+ unsigned int IgnoreCount = 0;
+ while (MII != MBB->end() && MII->isInsideBundle()) {
+ const MachineInstr *MInst = MII;
+ if (MInst->getOpcode() == TargetOpcode::DBG_VALUE ||
+ MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ IgnoreCount++;
+ ++MII;
+ continue;
+ }
+ //BundleMIs.push_back(&*MII);
+ BundleMIs.push_back(MInst);
+ ++MII;
+ }
+ unsigned Size = BundleMIs.size();
+ assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
+ for (unsigned Index = 0; Index < Size; Index++) {
+ HexagonMCInst MCI;
+ MCI.setStartPacket(Index == 0);
+ MCI.setEndPacket(Index == (Size-1));
+
+ HexagonLowerToMC(BundleMIs[Index], MCI, *this);
+ OutStreamer.EmitInstruction(MCI);
+ }
+ }
+ else {
+ HexagonMCInst MCI;
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ MCI.setStartPacket(true);
+ MCI.setEndPacket(true);
+ }
+ HexagonLowerToMC(MI, MCI, *this);
+ OutStreamer.EmitInstruction(MCI);
+ }
return;
}
@@ -241,15 +277,15 @@ void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
- assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
- "Expecting jump table index");
+ assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+ "Expecting jump table index");
// Hexagon_TODO: Do we need name mangling?
O << *GetJTISymbol(MO.getIndex());
}
void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo,
- raw_ostream &O) {
+ raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) &&
"Expecting constant pool index");
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
index bd9608bdb0f4..e61b2a7a58ac 100644
--- a/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -17,8 +17,8 @@
// Hexagon 32-bit C return-value convention.
def RetCC_Hexagon32 : CallingConv<[
- CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
- CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+ CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>,
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
@@ -27,8 +27,8 @@ def RetCC_Hexagon32 : CallingConv<[
// Hexagon 32-bit C Calling convention.
def CC_Hexagon32 : CallingConv<[
// All arguments get passed in integer registers if there is space.
- CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
- CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+ CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>,
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 46c20e9972b4..ba8e679ebdae 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -56,11 +56,8 @@ void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void Hexagon_CCState::MarkAllocated(unsigned Reg) {
- UsedRegs[Reg/32] |= 1 << (Reg&31);
-
- if (const uint16_t *RegAliases = TRI.getAliasSet(Reg))
- for (; (Reg = *RegAliases); ++RegAliases)
- UsedRegs[Reg/32] |= 1 << (Reg&31);
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
}
/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 210047446034..ae2ca378881d 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
// The Hexagon processor has no instructions that load or store predicate
-// registers directly. So, when these registers must be spilled a general
-// purpose register must be found and the value copied to/from it from/to
-// the predicate register. This code currently does not use the register
+// registers directly. So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register. This code currently does not use the register
// scavenger mechanism available in the allocator. There are two registers
// reserved to allow spilling/restoring predicate registers. One is used to
// hold the predicate value. The other is used when stack frame offsets are
@@ -84,7 +84,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
int SrcReg = MI->getOperand(2).getReg();
assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
"Not a predicate register");
- if (!TII->isValidOffset(Hexagon::STriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::STriw_indexed, Offset)) {
if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
@@ -95,7 +95,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::STriw))
+ TII->get(Hexagon::STriw_indexed))
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0).addReg(HEXAGON_RESERVED_REG_2);
} else {
@@ -103,7 +103,8 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw))
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw_indexed))
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0)
.addReg(HEXAGON_RESERVED_REG_2);
@@ -111,7 +112,8 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
} else {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)).
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw_indexed)).
addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
}
MII = MBB->erase(MI);
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index e8a692406e08..cd682df7a574 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -209,6 +209,16 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
FuncInfo->hasClobberLR() );
}
+static inline
+unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) {
+ MCSuperRegIterator SRI(Reg, TRI);
+ assert(SRI.isValid() && "Expected a superreg");
+ unsigned SuperReg = *SRI;
+ ++SRI;
+ assert(!SRI.isValid() && "Expected exactly one superreg");
+ return SuperReg;
+}
+
bool
HexagonFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB,
@@ -235,26 +245,21 @@ HexagonFrameLowering::spillCalleeSavedRegisters(
//
// Check if we can use a double-word store.
//
- const uint16_t* SuperReg = TRI->getSuperRegisters(Reg);
-
- // Assume that there is exactly one superreg.
- assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ unsigned SuperReg = uniqueSuperReg(Reg, TRI);
bool CanUseDblStore = false;
const TargetRegisterClass* SuperRegClass = 0;
if (ContiguousRegs && (i < CSI.size()-1)) {
- const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
- assert(SuperRegNext[0] && !SuperRegNext[1] &&
- "Expected exactly one superreg");
- SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
- CanUseDblStore = (SuperRegNext[0] == SuperReg[0]);
+ unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
+ CanUseDblStore = (SuperRegNext == SuperReg);
}
if (CanUseDblStore) {
- TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true,
+ TII.storeRegToStackSlot(MBB, MI, SuperReg, true,
CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
- MBB.addLiveIn(SuperReg[0]);
+ MBB.addLiveIn(SuperReg);
++i;
} else {
// Cannot use a double-word store.
@@ -295,25 +300,20 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
//
// Check if we can use a double-word load.
//
- const uint16_t* SuperReg = TRI->getSuperRegisters(Reg);
+ unsigned SuperReg = uniqueSuperReg(Reg, TRI);
const TargetRegisterClass* SuperRegClass = 0;
-
- // Assume that there is exactly one superreg.
- assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
bool CanUseDblLoad = false;
if (ContiguousRegs && (i < CSI.size()-1)) {
- const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
- assert(SuperRegNext[0] && !SuperRegNext[1] &&
- "Expected exactly one superreg");
- SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
- CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]);
+ unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
+ CanUseDblLoad = (SuperRegNext == SuperReg);
}
if (CanUseDblLoad) {
- TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(),
+ TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(),
SuperRegClass, TRI);
- MBB.addLiveIn(SuperReg[0]);
+ MBB.addLiveIn(SuperReg);
++i;
} else {
// Cannot use a double-word load.
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 57772a514d55..d756aec9bef9 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
// can get a useful trip count. The trip count can
// be either a register or an immediate. The location
// of the value depends upon the type (reg or imm).
- while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
const MachineInstr *MI = IV_Opnd->getParent();
if (L->contains(MI) && isCompareEqualsImm(MI)) {
const MachineOperand &MO = MI->getOperand(2);
@@ -491,7 +494,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
}
- // Add the Loop instruction to the begining of the loop.
+ // Add the Loop instruction to the beginning of the loop.
BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
} else {
@@ -623,7 +626,7 @@ void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
MachineBasicBlock *MBB = MII->getParent();
DebugLoc DL = MII->getDebugLoc();
- unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0);
+ unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
// First, set the LC0 with the trip count.
if (MII->getOperand(1).isReg()) {
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9df965efc14b..5499134eb98b 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -90,7 +90,9 @@ public:
SDNode *SelectMul(SDNode *N);
SDNode *SelectZeroExtend(SDNode *N);
SDNode *SelectIntrinsicWOChain(SDNode *N);
+ SDNode *SelectIntrinsicWChain(SDNode *N);
SDNode *SelectConstant(SDNode *N);
+ SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
// Include the pieces autogenerated from the target description.
@@ -318,7 +320,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
- else assert (0 && "unknown memory type");
+ else llvm_unreachable("unknown memory type");
// Build indexed load.
SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
@@ -375,7 +377,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
};
ReplaceUses(Froms, Tos, 3);
return Result_2;
- }
+ }
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
@@ -516,7 +518,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
else
Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
} else
- assert (0 && "unknown memory type");
+ llvm_unreachable("unknown memory type");
// For zero ext i64 loads, we need to add combine instructions.
if (LD->getValueType(0) == MVT::i64 &&
@@ -613,7 +615,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
- else assert (0 && "unknown memory type");
+ else llvm_unreachable("unknown memory type");
// Build post increment store.
SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
@@ -636,10 +638,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
// Figure out the opcode.
if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
- else assert (0 && "unknown memory type");
+ else llvm_unreachable("unknown memory type");
// Build regular store.
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
@@ -693,7 +695,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
- else assert (0 && "unknown memory type");
+ else llvm_unreachable("unknown memory type");
SDValue Ops[] = {SDValue(NewBase,0),
CurDAG->getTargetConstant(Offset,PointerTy),
@@ -723,7 +725,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
if (AM != ISD::UNINDEXED) {
return SelectIndexedStore(ST, dl);
}
-
+
return SelectBaseOffsetStore(ST, dl);
}
@@ -752,7 +754,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
SDValue Sext0 = MulOp0.getOperand(0);
if (Sext0.getNode()->getValueType(0) != MVT::i32) {
- SelectCode(N);
+ return SelectCode(N);
}
OP0 = Sext0;
@@ -761,7 +763,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
if (LD->getMemoryVT() != MVT::i32 ||
LD->getExtensionType() != ISD::SEXTLOAD ||
LD->getAddressingMode() != ISD::UNINDEXED) {
- SelectCode(N);
+ return SelectCode(N);
}
SDValue Chain = LD->getChain();
@@ -1128,12 +1130,12 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
// For immediates, lower it.
for (unsigned i = 1; i < N->getNumOperands(); ++i) {
SDNode *Arg = N->getOperand(i).getNode();
- const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF);
- if (RC == Hexagon::IntRegsRegisterClass ||
- RC == Hexagon::DoubleRegsRegisterClass) {
+ if (RC == &Hexagon::IntRegsRegClass ||
+ RC == &Hexagon::DoubleRegsRegClass) {
Ops.push_back(SDValue(Arg, 0));
- } else if (RC == Hexagon::PredRegsRegisterClass) {
+ } else if (RC == &Hexagon::PredRegsRegClass) {
// Do the transfer.
SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
SDValue(Arg, 0));
@@ -1158,6 +1160,25 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
return SelectCode(N);
}
+//
+// Map floating point constant values.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ APFloat APF = CN->getValueAPF();
+ if (N->getValueType(0) == MVT::f32) {
+ return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
+ CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32));
+ }
+ else if (N->getValueType(0) == MVT::f64) {
+ return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
+ CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64));
+ }
+
+ return SelectCode(N);
+}
+
//
// Map predicate true (encoded as -1 in LLVM) to a XOR.
@@ -1215,7 +1236,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
// Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
// Rd and Rd' are assigned to the same register
- SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32,
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_ADD_rr, dl, MVT::i32,
N->getOperand(1),
Src1->getOperand(0),
Src1->getOperand(1));
@@ -1234,6 +1255,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant:
return SelectConstant(N);
+ case ISD::ConstantFP:
+ return SelectConstantFP(N);
+
case ISD::ADD:
return SelectAdd(N);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 8c4350d1c504..703a128ee024 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -103,12 +103,12 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
- if (LocVT == MVT::i32) {
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
ofst = State.AllocateStack(4, 4);
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
- if (LocVT == MVT::i64) {
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
ofst = State.AllocateStack(8, 8);
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
@@ -142,12 +142,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::AExt;
}
- if (LocVT == MVT::i32) {
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
}
- if (LocVT == MVT::i64) {
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
}
@@ -217,12 +217,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::AExt;
}
- if (LocVT == MVT::i32) {
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
}
- if (LocVT == MVT::i64) {
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
}
@@ -234,7 +234,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (LocVT == MVT::i32) {
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
@@ -249,7 +249,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (LocVT == MVT::i64) {
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
@@ -299,7 +299,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
// Analyze return values of ISD::RET
CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
@@ -351,7 +351,7 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
@@ -370,21 +370,25 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
/// LowerCall - Functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
SDValue
-HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Check for varargs.
NumNamedVarArgParams = -1;
@@ -504,7 +508,7 @@ HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emited instructions must be
+ // The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
if (!isTailCall) {
@@ -524,7 +528,7 @@ HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
//
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
@@ -813,7 +817,7 @@ const {
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
@@ -839,14 +843,15 @@ const {
// 1. int, long long, ptr args that get allocated in register.
// 2. Large struct that gets an register to put its address in.
EVT RegVT = VA.getLocVT();
- if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) {
+ if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
+ RegVT == MVT::i32 || RegVT == MVT::f32) {
unsigned VReg =
- RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass);
+ RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
} else if (RegVT == MVT::i64) {
unsigned VReg =
- RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass);
+ RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
} else {
@@ -918,14 +923,33 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(4);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
SDNode* OpNode = Op.getNode();
+ EVT SVT = OpNode->getValueType(0);
- SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1,
- Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(4));
- return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0),
- Cond, Op.getOperand(0),
- Op.getOperand(1));
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC);
+ return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValTy = Op.getValueType();
+
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
+ CP->getAlignment());
+ return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res);
}
SDValue
@@ -1010,11 +1034,18 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
: TargetLowering(targetmachine, new HexagonTargetObjectFile()),
TM(targetmachine) {
+ const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
+
// Set up the register classes.
- addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass);
- addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass);
+ addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
- addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass);
+ if (QRI->Subtarget.hasV5TOps()) {
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
+ }
+
+ addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
computeRegisterProperties();
@@ -1028,32 +1059,16 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
//
// Library calls for unsupported operations
//
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
-
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
-
setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
@@ -1082,92 +1097,184 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
setOperationAction(ISD::FDIV, MVT::f64, Expand);
- setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+ if (QRI->Subtarget.hasV5TOps()) {
+ // Hexagon V5 Support.
+ setOperationAction(ISD::FADD, MVT::f32, Legal);
+ setOperationAction(ISD::FADD, MVT::f64, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ } else {
+
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
- setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setOperationAction(ISD::FADD, MVT::f32, Expand);
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setOperationAction(ISD::FADD, MVT::f32, Expand);
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
- setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
- setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
- setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
- setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
- setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
- setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
- setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
- setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
- setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
- setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
- setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
- setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
- setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
- setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
- setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
- setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
- setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
- setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
- setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
- setOperationAction(ISD::MUL, MVT::f32, Expand);
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
- setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
- setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
- setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
- setOperationAction(ISD::SUB, MVT::f64, Expand);
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
- setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
- setOperationAction(ISD::SUB, MVT::f32, Expand);
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
- setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
- setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
- setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
- setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
- setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
- setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
- setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+ setOperationAction(ISD::FABS, MVT::f32, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f32, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ }
+
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
@@ -1208,20 +1315,33 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
- // Expand fp<->uint.
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
-
- // Hexagon has no select or setcc: expand to SELECT_CC.
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
-
// Lower SELECT_CC to SETCC and SELECT.
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
- // This is a workaround documented in DAGCombiner.cpp:2892 We don't
- // support SELECT_CC on every type.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ if (QRI->Subtarget.hasV5TOps()) {
+
+ // We need to make the operation type of SELECT node to be Custom,
+ // such that we don't go into the infinite loop of
+ // select -> setcc -> select_cc -> select loop.
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ } else {
+
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+ // support SELECT_CC on every type.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ }
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
@@ -1307,22 +1427,22 @@ const char*
HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
- case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
- case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
- case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
- case HexagonISD::BRICC: return "HexagonISD::BRICC";
- case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
- case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
- case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
- case HexagonISD::Hi: return "HexagonISD::Hi";
- case HexagonISD::Lo: return "HexagonISD::Lo";
- case HexagonISD::FTOI: return "HexagonISD::FTOI";
- case HexagonISD::ITOF: return "HexagonISD::ITOF";
- case HexagonISD::CALL: return "HexagonISD::CALL";
- case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
- case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
- case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALL: return "HexagonISD::CALL";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
}
}
@@ -1347,9 +1467,10 @@ SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
// Frame & Return address. Currently unimplemented.
- case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GlobalTLSAddress:
llvm_unreachable("TLS not implemented for Hexagon.");
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
@@ -1359,9 +1480,10 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SELECT: return Op;
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
}
}
@@ -1404,9 +1526,11 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const
case MVT::i32:
case MVT::i16:
case MVT::i8:
- return std::make_pair(0U, Hexagon::IntRegsRegisterClass);
+ case MVT::f32:
+ return std::make_pair(0U, &Hexagon::IntRegsRegClass);
case MVT::i64:
- return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass);
+ case MVT::f64:
+ return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
}
default:
llvm_unreachable("Unknown asm register class");
@@ -1416,6 +1540,14 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
+ return QRI->Subtarget.hasV5TOps();
+}
+
/// isLegalAddressingMode - Return true if the addressing mode represented by
/// AM is legal for this target, for a load/store of the specified type.
bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4208bcb2fdca..fe6c905adfcb 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -27,6 +27,7 @@ namespace llvm {
CONST32,
CONST32_GP, // For marking data present in GP.
+ FCONST32,
SETCC,
ADJDYNALLOC,
ARGEXTEND,
@@ -48,6 +49,7 @@ namespace llvm {
BR_JT, // Jump table.
BARRIER, // Memory barrier.
WrapperJT,
+ WrapperCP,
TC_RETURN
};
}
@@ -94,13 +96,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -128,6 +124,7 @@ namespace llvm {
MachineBasicBlock *BB) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
virtual EVT getSetCCResultType(EVT VT) const {
return MVT::i1;
}
@@ -150,6 +147,7 @@ namespace llvm {
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
index e78bb790ae7d..18692c4dcc5e 100644
--- a/lib/Target/Hexagon/HexagonImmediates.td
+++ b/lib/Target/Hexagon/HexagonImmediates.td
@@ -371,7 +371,7 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{
def u64ImmPred : PatLeaf<(i64 imm), [{
// immS16 predicate - True if the immediate fits in a 16-bit sign extended
// field.
- // Adding "N ||" to supress gcc unused warning.
+ // Adding "N ||" to suppress gcc unused warning.
return (N || true);
}]>;
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index c9f16fb538aa..e472d490e0a0 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -13,29 +13,48 @@
// *** Must match HexagonBaseInfo.h ***
//===----------------------------------------------------------------------===//
+class Type<bits<5> t> {
+ bits<5> Value = t;
+}
+def TypePSEUDO : Type<0>;
+def TypeALU32 : Type<1>;
+def TypeCR : Type<2>;
+def TypeJR : Type<3>;
+def TypeJ : Type<4>;
+def TypeLD : Type<5>;
+def TypeST : Type<6>;
+def TypeSYSTEM : Type<7>;
+def TypeXTYPE : Type<8>;
+def TypeMARKER : Type<31>;
//===----------------------------------------------------------------------===//
// Intruction Class Declaration +
//===----------------------------------------------------------------------===//
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr, InstrItinClass itin> : Instruction {
+ string cstr, InstrItinClass itin, Type type> : Instruction {
field bits<32> Inst;
let Namespace = "Hexagon";
dag OutOperandList = outs;
dag InOperandList = ins;
- let AsmString = asmstr;
+ let AsmString = asmstr;
let Pattern = pattern;
let Constraints = cstr;
- let Itinerary = itin;
-
- // *** The code below must match HexagonBaseInfo.h ***
-
+ let Itinerary = itin;
+ let Size = 4;
+
+ // *** Must match HexagonBaseInfo.h ***
+ // Instruction type according to the ISA.
+ Type HexagonType = type;
+ let TSFlags{4-0} = HexagonType.Value;
+ // Solo instructions, i.e., those that cannot be in a packet with others.
+ bits<1> isHexagonSolo = 0;
+ let TSFlags{5} = isHexagonSolo;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{1} = isPredicated;
+ let TSFlags{6} = isPredicated;
// *** The code above must match HexagonBaseInfo.h ***
}
@@ -47,17 +66,25 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", LD> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
bits<5> rd;
bits<5> rs;
bits<13> imm13;
}
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+ let mayLoad = 1;
+}
+
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, LD> {
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -68,7 +95,24 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ST> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+ let mayStore = 1;
+}
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> {
bits<5> rd;
bits<5> rs;
bits<13> imm13;
@@ -79,7 +123,7 @@ class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// Definition of the instruction class CHANGED from V2/V3 to V4.
class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ST> {
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -89,7 +133,7 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ALU32> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -102,7 +146,17 @@ class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ALU64> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -115,7 +169,7 @@ class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", M> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -126,8 +180,8 @@ class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, M> {
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -138,9 +192,7 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-//: InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, M)> {
- : InstHexagon<outs, ins, asmstr, pattern, "", S> {
-// : InstHexagon<outs, ins, asmstr, pattern, "", S> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -151,8 +203,8 @@ class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> {
// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
bits<5> rd;
@@ -163,14 +215,14 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
// J Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", J> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> {
bits<16> imm16;
}
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", JR> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> {
bits<5> rs;
bits<5> pu; // Predicate register
}
@@ -178,15 +230,22 @@ class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", CR> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> {
bits<5> rs;
bits<10> imm10;
}
+class Marker<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>;
-
+ : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
//===----------------------------------------------------------------------===//
// Intruction Classes Definitions -
@@ -222,6 +281,11 @@ class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
: ALU64Type<outs, ins, asmstr, pattern> {
}
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU64Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
// J Type Instructions.
class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
: JType<outs, ins, asmstr, pattern> {
@@ -234,15 +298,31 @@ class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// Post increment ST Instruction.
-class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : STInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
: STInstPost<outs, ins, asmstr, pattern, cstr> {
let rt{0-4} = 0;
+ let mayStore = 1;
}
// Post increment LD Instruction.
-class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : LDInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
: LDInstPost<outs, ins, asmstr, pattern, cstr> {
let rt{0-4} = 0;
+ let mayLoad = 1;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index bd5e4493d7c2..49741a3d1b20 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -11,11 +11,25 @@
//
//===----------------------------------------------------------------------===//
+//----------------------------------------------------------------------------//
+// Hexagon Intruction Flags +
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeMEMOP : Type<9>;
+def TypeNV : Type<10>;
+def TypePREFIX : Type<30>;
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
//
// NV type instructions.
//
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> {
bits<5> rd;
bits<5> rs;
bits<13> imm13;
@@ -24,7 +38,7 @@ class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
// Definition of Post increment new value store.
class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4> {
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -39,8 +53,15 @@ class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
}
class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> {
bits<5> rd;
bits<5> rs;
bits<6> imm6;
}
+
+class Immext<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> {
+ let isCodeGenOnly = 1;
+
+ bits<26> imm26;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 77b366372cf8..c8f933dcf4bd 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "Hexagon.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DFAPacketizer.h"
@@ -34,24 +34,23 @@ using namespace llvm;
/// Constants for Hexagon instructions.
///
const int Hexagon_MEMW_OFFSET_MAX = 4095;
-const int Hexagon_MEMW_OFFSET_MIN = 4096;
+const int Hexagon_MEMW_OFFSET_MIN = -4096;
const int Hexagon_MEMD_OFFSET_MAX = 8191;
-const int Hexagon_MEMD_OFFSET_MIN = 8192;
+const int Hexagon_MEMD_OFFSET_MIN = -8192;
const int Hexagon_MEMH_OFFSET_MAX = 2047;
-const int Hexagon_MEMH_OFFSET_MIN = 2048;
+const int Hexagon_MEMH_OFFSET_MIN = -2048;
const int Hexagon_MEMB_OFFSET_MAX = 1023;
-const int Hexagon_MEMB_OFFSET_MIN = 1024;
+const int Hexagon_MEMB_OFFSET_MIN = -1024;
const int Hexagon_ADDI_OFFSET_MAX = 32767;
-const int Hexagon_ADDI_OFFSET_MIN = 32768;
+const int Hexagon_ADDI_OFFSET_MIN = -32768;
const int Hexagon_MEMD_AUTOINC_MAX = 56;
-const int Hexagon_MEMD_AUTOINC_MIN = 64;
+const int Hexagon_MEMD_AUTOINC_MIN = -64;
const int Hexagon_MEMW_AUTOINC_MAX = 28;
-const int Hexagon_MEMW_AUTOINC_MIN = 32;
+const int Hexagon_MEMW_AUTOINC_MIN = -32;
const int Hexagon_MEMH_AUTOINC_MAX = 14;
-const int Hexagon_MEMH_AUTOINC_MIN = 16;
+const int Hexagon_MEMH_AUTOINC_MIN = -16;
const int Hexagon_MEMB_AUTOINC_MAX = 7;
-const int Hexagon_MEMB_AUTOINC_MIN = 8;
-
+const int Hexagon_MEMB_AUTOINC_MIN = -8;
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
@@ -70,6 +69,7 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
switch (MI->getOpcode()) {
+ default: break;
case Hexagon::LDriw:
case Hexagon::LDrid:
case Hexagon::LDrih:
@@ -81,11 +81,7 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
-
- default:
- break;
}
-
return 0;
}
@@ -98,21 +94,18 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
+ default: break;
case Hexagon::STriw:
case Hexagon::STrid:
case Hexagon::STrih:
case Hexagon::STrib:
if (MI->getOperand(2).isFI() &&
MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
- FrameIndex = MI->getOperand(2).getIndex();
- return MI->getOperand(0).getReg();
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
}
break;
-
- default:
- break;
}
-
return 0;
}
@@ -176,6 +169,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
+ TBB = NULL;
FBB = NULL;
// If the block has no terminators, it just falls into the block after it.
@@ -328,7 +322,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DestReg).addReg(SrcReg).addReg(SrcReg);
return;
}
- if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) {
+ if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg)) {
// We can have an overlap between single and double reg: r1:0 = r0.
if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
// r1:0 = r0
@@ -343,7 +338,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
return;
}
- if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) {
+ if (Hexagon::CRRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
return;
}
@@ -370,15 +366,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
- if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) {
+ if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::STriw))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
- } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) {
+ } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::STrid))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
- } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) {
+ } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
@@ -415,14 +411,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
-
- if (RC == Hexagon::IntRegsRegisterClass) {
+ if (RC == &Hexagon::IntRegsRegClass) {
BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- } else if (RC == Hexagon::DoubleRegsRegisterClass) {
+ } else if (RC == &Hexagon::DoubleRegsRegClass) {
BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- } else if (RC == Hexagon::PredRegsRegisterClass) {
+ } else if (RC == &Hexagon::PredRegsRegClass) {
BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else {
@@ -453,11 +448,11 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *TRC;
if (VT == MVT::i1) {
- TRC = Hexagon::PredRegsRegisterClass;
- } else if (VT == MVT::i32) {
- TRC = Hexagon::IntRegsRegisterClass;
- } else if (VT == MVT::i64) {
- TRC = Hexagon::DoubleRegsRegisterClass;
+ TRC = &Hexagon::PredRegsRegClass;
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
+ TRC = &Hexagon::IntRegsRegClass;
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
+ TRC = &Hexagon::DoubleRegsRegClass;
} else {
llvm_unreachable("Cannot handle this register class");
}
@@ -466,7 +461,852 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
return NewReg;
}
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ default: return false;
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+
+ // TFR_FI
+ case Hexagon::TFR_FI:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ default: return false;
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+
+ // V4 absolute set addressing.
+ case Hexagon::LDrid_abs_setimm_V4:
+ case Hexagon::LDriw_abs_setimm_V4:
+ case Hexagon::LDrih_abs_setimm_V4:
+ case Hexagon::LDrib_abs_setimm_V4:
+ case Hexagon::LDriuh_abs_setimm_V4:
+ case Hexagon::LDriub_abs_setimm_V4:
+
+ case Hexagon::STrid_abs_setimm_V4:
+ case Hexagon::STrib_abs_setimm_V4:
+ case Hexagon::STrih_abs_setimm_V4:
+ case Hexagon::STriw_abs_setimm_V4:
+
+ // V4 global address load.
+ case Hexagon::LDrid_GP_cPt_V4 :
+ case Hexagon::LDrid_GP_cNotPt_V4 :
+ case Hexagon::LDrid_GP_cdnPt_V4 :
+ case Hexagon::LDrid_GP_cdnNotPt_V4 :
+ case Hexagon::LDrib_GP_cPt_V4 :
+ case Hexagon::LDrib_GP_cNotPt_V4 :
+ case Hexagon::LDrib_GP_cdnPt_V4 :
+ case Hexagon::LDrib_GP_cdnNotPt_V4 :
+ case Hexagon::LDriub_GP_cPt_V4 :
+ case Hexagon::LDriub_GP_cNotPt_V4 :
+ case Hexagon::LDriub_GP_cdnPt_V4 :
+ case Hexagon::LDriub_GP_cdnNotPt_V4 :
+ case Hexagon::LDrih_GP_cPt_V4 :
+ case Hexagon::LDrih_GP_cNotPt_V4 :
+ case Hexagon::LDrih_GP_cdnPt_V4 :
+ case Hexagon::LDrih_GP_cdnNotPt_V4 :
+ case Hexagon::LDriuh_GP_cPt_V4 :
+ case Hexagon::LDriuh_GP_cNotPt_V4 :
+ case Hexagon::LDriuh_GP_cdnPt_V4 :
+ case Hexagon::LDriuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDriw_GP_cPt_V4 :
+ case Hexagon::LDriw_GP_cNotPt_V4 :
+ case Hexagon::LDriw_GP_cdnPt_V4 :
+ case Hexagon::LDriw_GP_cdnNotPt_V4 :
+ case Hexagon::LDd_GP_cPt_V4 :
+ case Hexagon::LDd_GP_cNotPt_V4 :
+ case Hexagon::LDd_GP_cdnPt_V4 :
+ case Hexagon::LDd_GP_cdnNotPt_V4 :
+ case Hexagon::LDb_GP_cPt_V4 :
+ case Hexagon::LDb_GP_cNotPt_V4 :
+ case Hexagon::LDb_GP_cdnPt_V4 :
+ case Hexagon::LDb_GP_cdnNotPt_V4 :
+ case Hexagon::LDub_GP_cPt_V4 :
+ case Hexagon::LDub_GP_cNotPt_V4 :
+ case Hexagon::LDub_GP_cdnPt_V4 :
+ case Hexagon::LDub_GP_cdnNotPt_V4 :
+ case Hexagon::LDh_GP_cPt_V4 :
+ case Hexagon::LDh_GP_cNotPt_V4 :
+ case Hexagon::LDh_GP_cdnPt_V4 :
+ case Hexagon::LDh_GP_cdnNotPt_V4 :
+ case Hexagon::LDuh_GP_cPt_V4 :
+ case Hexagon::LDuh_GP_cNotPt_V4 :
+ case Hexagon::LDuh_GP_cdnPt_V4 :
+ case Hexagon::LDuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDw_GP_cPt_V4 :
+ case Hexagon::LDw_GP_cNotPt_V4 :
+ case Hexagon::LDw_GP_cdnPt_V4 :
+ case Hexagon::LDw_GP_cdnNotPt_V4 :
+
+ // V4 global address store.
+ case Hexagon::STrid_GP_cPt_V4 :
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ case Hexagon::STrid_GP_cdnPt_V4 :
+ case Hexagon::STrid_GP_cdnNotPt_V4 :
+ case Hexagon::STrib_GP_cPt_V4 :
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ case Hexagon::STrib_GP_cdnPt_V4 :
+ case Hexagon::STrib_GP_cdnNotPt_V4 :
+ case Hexagon::STrih_GP_cPt_V4 :
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ case Hexagon::STrih_GP_cdnPt_V4 :
+ case Hexagon::STrih_GP_cdnNotPt_V4 :
+ case Hexagon::STriw_GP_cPt_V4 :
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ case Hexagon::STriw_GP_cdnPt_V4 :
+ case Hexagon::STriw_GP_cdnNotPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cdnPt_V4 :
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cdnPt_V4 :
+ case Hexagon::STb_GP_cdnNotPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cdnPt_V4 :
+ case Hexagon::STh_GP_cdnNotPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cdnPt_V4 :
+ case Hexagon::STw_GP_cdnNotPt_V4 :
+
+ // V4 predicated global address new value store.
+ case Hexagon::STrib_GP_cPt_nv_V4 :
+ case Hexagon::STrib_GP_cNotPt_nv_V4 :
+ case Hexagon::STrib_GP_cdnPt_nv_V4 :
+ case Hexagon::STrib_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_GP_cPt_nv_V4 :
+ case Hexagon::STrih_GP_cNotPt_nv_V4 :
+ case Hexagon::STrih_GP_cdnPt_nv_V4 :
+ case Hexagon::STrih_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_GP_cPt_nv_V4 :
+ case Hexagon::STriw_GP_cNotPt_nv_V4 :
+ case Hexagon::STriw_GP_cdnPt_nv_V4 :
+ case Hexagon::STriw_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STb_GP_cPt_nv_V4 :
+ case Hexagon::STb_GP_cNotPt_nv_V4 :
+ case Hexagon::STb_GP_cdnPt_nv_V4 :
+ case Hexagon::STb_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STh_GP_cPt_nv_V4 :
+ case Hexagon::STh_GP_cNotPt_nv_V4 :
+ case Hexagon::STh_GP_cdnPt_nv_V4 :
+ case Hexagon::STh_GP_cdnNotPt_nv_V4 :
+ case Hexagon::STw_GP_cPt_nv_V4 :
+ case Hexagon::STw_GP_cNotPt_nv_V4 :
+ case Hexagon::STw_GP_cdnPt_nv_V4 :
+ case Hexagon::STw_GP_cdnNotPt_nv_V4 :
+
+ // TFR_FI
+ case Hexagon::TFR_FI_immext_V4:
+
+ // TFRI_F
+ case Hexagon::TFRI_f:
+ case Hexagon::TFRI_cPt_f:
+ case Hexagon::TFRI_cNotPt_f:
+ case Hexagon::CONST64_Float_Real:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+ // JMP_EQri - with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+ // JMP_GTri - with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+ return true;
+ }
+}
+
+unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown type of instruction.");
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_nv_V4:
+ return Hexagon::JMP_EQriPt_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPt_nv_V4:
+ return Hexagon::JMP_EQriNotPt_ie_nv_V4;
+ case Hexagon::JMP_EQriPnt_nv_V4:
+ return Hexagon::JMP_EQriPnt_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPnt_nv_V4:
+ return Hexagon::JMP_EQriNotPnt_ie_nv_V4;
+
+ // JMP_EQri -- with -1
+ case Hexagon::JMP_EQriPtneg_nv_V4:
+ return Hexagon::JMP_EQriPtneg_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPtneg_nv_V4:
+ return Hexagon::JMP_EQriNotPtneg_ie_nv_V4;
+ case Hexagon::JMP_EQriPntneg_nv_V4:
+ return Hexagon::JMP_EQriPntneg_ie_nv_V4;
+ case Hexagon::JMP_EQriNotPntneg_nv_V4:
+ return Hexagon::JMP_EQriNotPntneg_ie_nv_V4;
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ return Hexagon::JMP_EQrrPt_ie_nv_V4;
+ case Hexagon::JMP_EQrrNotPt_nv_V4:
+ return Hexagon::JMP_EQrrNotPt_ie_nv_V4;
+ case Hexagon::JMP_EQrrPnt_nv_V4:
+ return Hexagon::JMP_EQrrPnt_ie_nv_V4;
+ case Hexagon::JMP_EQrrNotPnt_nv_V4:
+ return Hexagon::JMP_EQrrNotPnt_ie_nv_V4;
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_nv_V4:
+ return Hexagon::JMP_GTriPt_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPt_nv_V4:
+ return Hexagon::JMP_GTriNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTriPnt_nv_V4:
+ return Hexagon::JMP_GTriPnt_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPnt_nv_V4:
+ return Hexagon::JMP_GTriNotPnt_ie_nv_V4;
+
+ // JMP_GTri -- with -1
+ case Hexagon::JMP_GTriPtneg_nv_V4:
+ return Hexagon::JMP_GTriPtneg_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPtneg_nv_V4:
+ return Hexagon::JMP_GTriNotPtneg_ie_nv_V4;
+ case Hexagon::JMP_GTriPntneg_nv_V4:
+ return Hexagon::JMP_GTriPntneg_ie_nv_V4;
+ case Hexagon::JMP_GTriNotPntneg_nv_V4:
+ return Hexagon::JMP_GTriNotPntneg_ie_nv_V4;
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_nv_V4:
+ return Hexagon::JMP_GTrrPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrNotPt_nv_V4:
+ return Hexagon::JMP_GTrrNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrPnt_nv_V4:
+ return Hexagon::JMP_GTrrPnt_ie_nv_V4;
+ case Hexagon::JMP_GTrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrNotPnt_ie_nv_V4;
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_nv_V4:
+ return Hexagon::JMP_GTrrdnPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnPnt_ie_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4;
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_nv_V4:
+ return Hexagon::JMP_GTUriPt_ie_nv_V4;
+ case Hexagon::JMP_GTUriNotPt_nv_V4:
+ return Hexagon::JMP_GTUriNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTUriPnt_nv_V4:
+ return Hexagon::JMP_GTUriPnt_ie_nv_V4;
+ case Hexagon::JMP_GTUriNotPnt_nv_V4:
+ return Hexagon::JMP_GTUriNotPnt_ie_nv_V4;
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_nv_V4:
+ return Hexagon::JMP_GTUrrPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrPnt_nv_V4:
+ return Hexagon::JMP_GTUrrPnt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4;
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4;
+
+ case Hexagon::TFR_FI:
+ return Hexagon::TFR_FI_immext_V4;
+
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ llvm_unreachable("Needs implementing.");
+ }
+}
+
+unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const {
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown type of jump instruction.");
+ // JMP_EQri
+ case Hexagon::JMP_EQriPt_ie_nv_V4:
+ return Hexagon::JMP_EQriPt_nv_V4;
+ case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPt_nv_V4;
+ case Hexagon::JMP_EQriPnt_ie_nv_V4:
+ return Hexagon::JMP_EQriPnt_nv_V4;
+ case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPnt_nv_V4;
+
+ // JMP_EQri -- with -1
+ case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+ return Hexagon::JMP_EQriPtneg_nv_V4;
+ case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPtneg_nv_V4;
+ case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+ return Hexagon::JMP_EQriPntneg_nv_V4;
+ case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+ return Hexagon::JMP_EQriNotPntneg_nv_V4;
+
+ // JMP_EQrr
+ case Hexagon::JMP_EQrrPt_ie_nv_V4:
+ return Hexagon::JMP_EQrrPt_nv_V4;
+ case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+ return Hexagon::JMP_EQrrNotPt_nv_V4;
+ case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+ return Hexagon::JMP_EQrrPnt_nv_V4;
+ case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+ return Hexagon::JMP_EQrrNotPnt_nv_V4;
+
+ // JMP_GTri
+ case Hexagon::JMP_GTriPt_ie_nv_V4:
+ return Hexagon::JMP_GTriPt_nv_V4;
+ case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPt_nv_V4;
+ case Hexagon::JMP_GTriPnt_ie_nv_V4:
+ return Hexagon::JMP_GTriPnt_nv_V4;
+ case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPnt_nv_V4;
+
+ // JMP_GTri -- with -1
+ case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+ case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPtneg_nv_V4;
+ case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+ return Hexagon::JMP_GTriPntneg_nv_V4;
+ case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+ return Hexagon::JMP_GTriNotPntneg_nv_V4;
+
+ // JMP_GTrr
+ case Hexagon::JMP_GTrrPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrPt_nv_V4;
+ case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrNotPt_nv_V4;
+ case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrPnt_nv_V4;
+ case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrNotPnt_nv_V4;
+
+ // JMP_GTrrdn
+ case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnPt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnPnt_nv_V4;
+ case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
+
+ // JMP_GTUri
+ case Hexagon::JMP_GTUriPt_ie_nv_V4:
+ return Hexagon::JMP_GTUriPt_nv_V4;
+ case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTUriNotPt_nv_V4;
+ case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUriPnt_nv_V4;
+ case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUriNotPnt_nv_V4;
+
+ // JMP_GTUrr
+ case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrPt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrPnt_nv_V4;
+ case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrNotPnt_nv_V4;
+
+ // JMP_GTUrrdn
+ case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnPt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
+ case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnPnt_nv_V4;
+ case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+ return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
+ }
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ // Store Byte
+ case Hexagon::STrib_nv_V4:
+ case Hexagon::STrib_indexed_nv_V4:
+ case Hexagon::STrib_indexed_shl_nv_V4:
+ case Hexagon::STrib_shl_nv_V4:
+ case Hexagon::STrib_GP_nv_V4:
+ case Hexagon::STb_GP_nv_V4:
+ case Hexagon::POST_STbri_nv_V4:
+ case Hexagon::STrib_cPt_nv_V4:
+ case Hexagon::STrib_cdnPt_nv_V4:
+ case Hexagon::STrib_cNotPt_nv_V4:
+ case Hexagon::STrib_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_cPt_nv_V4:
+ case Hexagon::STrib_indexed_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_cNotPt_nv_V4:
+ case Hexagon::STrib_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_STbri_cPt_nv_V4:
+ case Hexagon::POST_STbri_cdnPt_nv_V4:
+ case Hexagon::POST_STbri_cNotPt_nv_V4:
+ case Hexagon::POST_STbri_cdnNotPt_nv_V4:
+ case Hexagon::STb_GP_cPt_nv_V4:
+ case Hexagon::STb_GP_cNotPt_nv_V4:
+ case Hexagon::STb_GP_cdnPt_nv_V4:
+ case Hexagon::STb_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_GP_cPt_nv_V4:
+ case Hexagon::STrib_GP_cNotPt_nv_V4:
+ case Hexagon::STrib_GP_cdnPt_nv_V4:
+ case Hexagon::STrib_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_abs_nv_V4:
+ case Hexagon::STrib_abs_cPt_nv_V4:
+ case Hexagon::STrib_abs_cdnPt_nv_V4:
+ case Hexagon::STrib_abs_cNotPt_nv_V4:
+ case Hexagon::STrib_abs_cdnNotPt_nv_V4:
+ case Hexagon::STrib_imm_abs_nv_V4:
+ case Hexagon::STrib_imm_abs_cPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
+
+ // Store Halfword
+ case Hexagon::STrih_nv_V4:
+ case Hexagon::STrih_indexed_nv_V4:
+ case Hexagon::STrih_indexed_shl_nv_V4:
+ case Hexagon::STrih_shl_nv_V4:
+ case Hexagon::STrih_GP_nv_V4:
+ case Hexagon::STh_GP_nv_V4:
+ case Hexagon::POST_SThri_nv_V4:
+ case Hexagon::STrih_cPt_nv_V4:
+ case Hexagon::STrih_cdnPt_nv_V4:
+ case Hexagon::STrih_cNotPt_nv_V4:
+ case Hexagon::STrih_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cPt_nv_V4:
+ case Hexagon::STrih_indexed_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_cNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_SThri_cPt_nv_V4:
+ case Hexagon::POST_SThri_cdnPt_nv_V4:
+ case Hexagon::POST_SThri_cNotPt_nv_V4:
+ case Hexagon::POST_SThri_cdnNotPt_nv_V4:
+ case Hexagon::STh_GP_cPt_nv_V4:
+ case Hexagon::STh_GP_cNotPt_nv_V4:
+ case Hexagon::STh_GP_cdnPt_nv_V4:
+ case Hexagon::STh_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_GP_cPt_nv_V4:
+ case Hexagon::STrih_GP_cNotPt_nv_V4:
+ case Hexagon::STrih_GP_cdnPt_nv_V4:
+ case Hexagon::STrih_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_abs_nv_V4:
+ case Hexagon::STrih_abs_cPt_nv_V4:
+ case Hexagon::STrih_abs_cdnPt_nv_V4:
+ case Hexagon::STrih_abs_cNotPt_nv_V4:
+ case Hexagon::STrih_abs_cdnNotPt_nv_V4:
+ case Hexagon::STrih_imm_abs_nv_V4:
+ case Hexagon::STrih_imm_abs_cPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
+
+ // Store Word
+ case Hexagon::STriw_nv_V4:
+ case Hexagon::STriw_indexed_nv_V4:
+ case Hexagon::STriw_indexed_shl_nv_V4:
+ case Hexagon::STriw_shl_nv_V4:
+ case Hexagon::STriw_GP_nv_V4:
+ case Hexagon::STw_GP_nv_V4:
+ case Hexagon::POST_STwri_nv_V4:
+ case Hexagon::STriw_cPt_nv_V4:
+ case Hexagon::STriw_cdnPt_nv_V4:
+ case Hexagon::STriw_cNotPt_nv_V4:
+ case Hexagon::STriw_cdnNotPt_nv_V4:
+ case Hexagon::STriw_indexed_cPt_nv_V4:
+ case Hexagon::STriw_indexed_cdnPt_nv_V4:
+ case Hexagon::STriw_indexed_cNotPt_nv_V4:
+ case Hexagon::STriw_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4:
+ case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::POST_STwri_cPt_nv_V4:
+ case Hexagon::POST_STwri_cdnPt_nv_V4:
+ case Hexagon::POST_STwri_cNotPt_nv_V4:
+ case Hexagon::POST_STwri_cdnNotPt_nv_V4:
+ case Hexagon::STw_GP_cPt_nv_V4:
+ case Hexagon::STw_GP_cNotPt_nv_V4:
+ case Hexagon::STw_GP_cdnPt_nv_V4:
+ case Hexagon::STw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_GP_cPt_nv_V4:
+ case Hexagon::STriw_GP_cNotPt_nv_V4:
+ case Hexagon::STriw_GP_cdnPt_nv_V4:
+ case Hexagon::STriw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_abs_nv_V4:
+ case Hexagon::STriw_abs_cPt_nv_V4:
+ case Hexagon::STriw_abs_cdnPt_nv_V4:
+ case Hexagon::STriw_abs_cNotPt_nv_V4:
+ case Hexagon::STriw_abs_cdnNotPt_nv_V4:
+ case Hexagon::STriw_imm_abs_nv_V4:
+ case Hexagon::STriw_imm_abs_cPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
+ case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ // Load Byte
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDrib_cPt:
+ case Hexagon::POST_LDrib_cNotPt:
+ case Hexagon::POST_LDrib_cdnPt_V4:
+ case Hexagon::POST_LDrib_cdnNotPt_V4:
+
+ // Load unsigned byte
+ case Hexagon::POST_LDriub:
+ case Hexagon::POST_LDriub_cPt:
+ case Hexagon::POST_LDriub_cNotPt:
+ case Hexagon::POST_LDriub_cdnPt_V4:
+ case Hexagon::POST_LDriub_cdnNotPt_V4:
+
+ // Load halfword
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDrih_cPt:
+ case Hexagon::POST_LDrih_cNotPt:
+ case Hexagon::POST_LDrih_cdnPt_V4:
+ case Hexagon::POST_LDrih_cdnNotPt_V4:
+
+ // Load unsigned halfword
+ case Hexagon::POST_LDriuh:
+ case Hexagon::POST_LDriuh_cPt:
+ case Hexagon::POST_LDriuh_cNotPt:
+ case Hexagon::POST_LDriuh_cdnPt_V4:
+ case Hexagon::POST_LDriuh_cdnNotPt_V4:
+
+ // Load word
+ case Hexagon::POST_LDriw:
+ case Hexagon::POST_LDriw_cPt:
+ case Hexagon::POST_LDriw_cNotPt:
+ case Hexagon::POST_LDriw_cdnPt_V4:
+ case Hexagon::POST_LDriw_cdnNotPt_V4:
+
+ // Load double word
+ case Hexagon::POST_LDrid:
+ case Hexagon::POST_LDrid_cPt:
+ case Hexagon::POST_LDrid_cNotPt:
+ case Hexagon::POST_LDrid_cdnPt_V4:
+ case Hexagon::POST_LDrid_cdnNotPt_V4:
+
+ // Store byte
+ case Hexagon::POST_STbri:
+ case Hexagon::POST_STbri_cPt:
+ case Hexagon::POST_STbri_cNotPt:
+ case Hexagon::POST_STbri_cdnPt_V4:
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+
+ // Store halfword
+ case Hexagon::POST_SThri:
+ case Hexagon::POST_SThri_cPt:
+ case Hexagon::POST_SThri_cNotPt:
+ case Hexagon::POST_SThri_cdnPt_V4:
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+
+ // Store word
+ case Hexagon::POST_STwri:
+ case Hexagon::POST_STwri_cPt:
+ case Hexagon::POST_STwri_cNotPt:
+ case Hexagon::POST_STwri_cdnPt_V4:
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+
+ // Store double word
+ case Hexagon::POST_STdri:
+ case Hexagon::POST_STdri_cPt:
+ case Hexagon::POST_STdri_cNotPt:
+ case Hexagon::POST_STdri_cdnPt_V4:
+ case Hexagon::POST_STdri_cdnNotPt_V4:
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+ return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
+}
bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
bool isPred = MI->getDesc().isPredicable();
@@ -548,7 +1388,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
case Hexagon::SXTH:
case Hexagon::ZXTB:
case Hexagon::ZXTH:
- return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ return Subtarget.hasV4TOps();
case Hexagon::JMPR:
return false;
@@ -557,8 +1397,27 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
+// This function performs the following inversiones:
+//
+// cPt ---> cNotPt
+// cNotPt ---> cPt
+//
+// however, these inversiones are NOT included:
+//
+// cdnPt -X-> cdnNotPt
+// cdnNotPt -X-> cdnPt
+// cPt_nv -X-> cNotPt_nv (new value stores)
+// cNotPt_nv -X-> cPt_nv (new value stores)
+//
+// because only the following transformations are allowed:
+//
+// cNotPt ---> cdnNotPt
+// cPt ---> cdnPt
+// cNotPt ---> cNotPt_nv
+// cPt ---> cPt_nv
unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
switch(Opc) {
+ default: llvm_unreachable("Unexpected predicated instruction");
case Hexagon::TFR_cPt:
return Hexagon::TFR_cNotPt;
case Hexagon::TFR_cNotPt:
@@ -805,6 +1664,47 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::STrid_indexed_shl_cNotPt_V4:
return Hexagon::STrid_indexed_shl_cPt_V4;
+ // V4 Store to global address.
+ case Hexagon::STd_GP_cPt_V4:
+ return Hexagon::STd_GP_cNotPt_V4;
+ case Hexagon::STd_GP_cNotPt_V4:
+ return Hexagon::STd_GP_cPt_V4;
+
+ case Hexagon::STb_GP_cPt_V4:
+ return Hexagon::STb_GP_cNotPt_V4;
+ case Hexagon::STb_GP_cNotPt_V4:
+ return Hexagon::STb_GP_cPt_V4;
+
+ case Hexagon::STh_GP_cPt_V4:
+ return Hexagon::STh_GP_cNotPt_V4;
+ case Hexagon::STh_GP_cNotPt_V4:
+ return Hexagon::STh_GP_cPt_V4;
+
+ case Hexagon::STw_GP_cPt_V4:
+ return Hexagon::STw_GP_cNotPt_V4;
+ case Hexagon::STw_GP_cNotPt_V4:
+ return Hexagon::STw_GP_cPt_V4;
+
+ case Hexagon::STrid_GP_cPt_V4:
+ return Hexagon::STrid_GP_cNotPt_V4;
+ case Hexagon::STrid_GP_cNotPt_V4:
+ return Hexagon::STrid_GP_cPt_V4;
+
+ case Hexagon::STrib_GP_cPt_V4:
+ return Hexagon::STrib_GP_cNotPt_V4;
+ case Hexagon::STrib_GP_cNotPt_V4:
+ return Hexagon::STrib_GP_cPt_V4;
+
+ case Hexagon::STrih_GP_cPt_V4:
+ return Hexagon::STrih_GP_cNotPt_V4;
+ case Hexagon::STrih_GP_cNotPt_V4:
+ return Hexagon::STrih_GP_cPt_V4;
+
+ case Hexagon::STriw_GP_cPt_V4:
+ return Hexagon::STriw_GP_cNotPt_V4;
+ case Hexagon::STriw_GP_cNotPt_V4:
+ return Hexagon::STriw_GP_cPt_V4;
+
// Load.
case Hexagon::LDrid_cPt:
return Hexagon::LDrid_cNotPt;
@@ -1009,9 +1909,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
return Hexagon::JMP_GTUrrdnPnt_nv_V4;
-
- default:
- llvm_unreachable("Unexpected predicated instruction");
}
}
@@ -1022,12 +1919,21 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
case Hexagon::TFR:
return !invertPredicate ? Hexagon::TFR_cPt :
Hexagon::TFR_cNotPt;
+ case Hexagon::TFRI_f:
+ return !invertPredicate ? Hexagon::TFRI_cPt_f :
+ Hexagon::TFRI_cNotPt_f;
case Hexagon::TFRI:
return !invertPredicate ? Hexagon::TFRI_cPt :
Hexagon::TFRI_cNotPt;
case Hexagon::JMP:
return !invertPredicate ? Hexagon::JMP_c :
Hexagon::JMP_cNot;
+ case Hexagon::JMP_EQrrPt_nv_V4:
+ return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 :
+ Hexagon::JMP_EQrrNotPt_nv_V4;
+ case Hexagon::JMP_EQriPt_nv_V4:
+ return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 :
+ Hexagon::JMP_EQriNotPt_nv_V4;
case Hexagon::ADD_ri:
return !invertPredicate ? Hexagon::ADD_ri_cPt :
Hexagon::ADD_ri_cNotPt;
@@ -1121,6 +2027,46 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
case Hexagon::LDriw_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
Hexagon::LDriw_indexed_shl_cNotPt_V4;
+
+ // V4 Load from global address
+ case Hexagon::LDrid_GP_V4:
+ return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 :
+ Hexagon::LDrid_GP_cNotPt_V4;
+ case Hexagon::LDrib_GP_V4:
+ return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 :
+ Hexagon::LDrib_GP_cNotPt_V4;
+ case Hexagon::LDriub_GP_V4:
+ return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 :
+ Hexagon::LDriub_GP_cNotPt_V4;
+ case Hexagon::LDrih_GP_V4:
+ return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 :
+ Hexagon::LDrih_GP_cNotPt_V4;
+ case Hexagon::LDriuh_GP_V4:
+ return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 :
+ Hexagon::LDriuh_GP_cNotPt_V4;
+ case Hexagon::LDriw_GP_V4:
+ return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 :
+ Hexagon::LDriw_GP_cNotPt_V4;
+
+ case Hexagon::LDd_GP_V4:
+ return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 :
+ Hexagon::LDd_GP_cNotPt_V4;
+ case Hexagon::LDb_GP_V4:
+ return !invertPredicate ? Hexagon::LDb_GP_cPt_V4 :
+ Hexagon::LDb_GP_cNotPt_V4;
+ case Hexagon::LDub_GP_V4:
+ return !invertPredicate ? Hexagon::LDub_GP_cPt_V4 :
+ Hexagon::LDub_GP_cNotPt_V4;
+ case Hexagon::LDh_GP_V4:
+ return !invertPredicate ? Hexagon::LDh_GP_cPt_V4 :
+ Hexagon::LDh_GP_cNotPt_V4;
+ case Hexagon::LDuh_GP_V4:
+ return !invertPredicate ? Hexagon::LDuh_GP_cPt_V4 :
+ Hexagon::LDuh_GP_cNotPt_V4;
+ case Hexagon::LDw_GP_V4:
+ return !invertPredicate ? Hexagon::LDw_GP_cPt_V4 :
+ Hexagon::LDw_GP_cNotPt_V4;
+
// Byte.
case Hexagon::POST_STbri:
return !invertPredicate ? Hexagon::POST_STbri_cPt :
@@ -1182,6 +2128,34 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
case Hexagon::STrid_indexed_shl_V4:
return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
Hexagon::STrid_indexed_shl_cNotPt_V4;
+
+ // V4 Store to global address
+ case Hexagon::STrid_GP_V4:
+ return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 :
+ Hexagon::STrid_GP_cNotPt_V4;
+ case Hexagon::STrib_GP_V4:
+ return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 :
+ Hexagon::STrib_GP_cNotPt_V4;
+ case Hexagon::STrih_GP_V4:
+ return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 :
+ Hexagon::STrih_GP_cNotPt_V4;
+ case Hexagon::STriw_GP_V4:
+ return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 :
+ Hexagon::STriw_GP_cNotPt_V4;
+
+ case Hexagon::STd_GP_V4:
+ return !invertPredicate ? Hexagon::STd_GP_cPt_V4 :
+ Hexagon::STd_GP_cNotPt_V4;
+ case Hexagon::STb_GP_V4:
+ return !invertPredicate ? Hexagon::STb_GP_cPt_V4 :
+ Hexagon::STb_GP_cNotPt_V4;
+ case Hexagon::STh_GP_V4:
+ return !invertPredicate ? Hexagon::STh_GP_cPt_V4 :
+ Hexagon::STh_GP_cNotPt_V4;
+ case Hexagon::STw_GP_V4:
+ return !invertPredicate ? Hexagon::STw_GP_cPt_V4 :
+ Hexagon::STw_GP_cNotPt_V4;
+
// Load.
case Hexagon::LDrid:
return !invertPredicate ? Hexagon::LDrid_cPt :
@@ -1201,9 +2175,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
case Hexagon::LDriub:
return !invertPredicate ? Hexagon::LDriub_cPt :
Hexagon::LDriub_cNotPt;
- case Hexagon::LDriubit:
- return !invertPredicate ? Hexagon::LDriub_cPt :
- Hexagon::LDriub_cNotPt;
// Load Indexed.
case Hexagon::LDrid_indexed:
return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
@@ -1297,7 +2268,7 @@ PredicateInstruction(MachineInstr *MI,
bool
HexagonInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const {
return true;
@@ -1323,7 +2294,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
-
bool
HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
@@ -1331,7 +2301,7 @@ HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
MachineOperand MO = MI->getOperand(oper);
if (MO.isReg() && MO.isDef()) {
const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
- if (RC == Hexagon::PredRegsRegisterClass) {
+ if (RC == &Hexagon::PredRegsRegClass) {
Pred.push_back(MO);
return true;
}
@@ -1373,6 +2343,7 @@ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
+ default: return false;
case Hexagon::DEALLOC_RET_V4 :
case Hexagon::DEALLOC_RET_cPt_V4 :
case Hexagon::DEALLOC_RET_cNotPt_V4 :
@@ -1382,7 +2353,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
return true;
}
- return false;
}
@@ -1396,13 +2366,17 @@ isValidOffset(const int Opcode, const int Offset) const {
switch(Opcode) {
case Hexagon::LDriw:
+ case Hexagon::LDriw_f:
case Hexagon::STriw:
+ case Hexagon::STriw_f:
assert((Offset % 4 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
case Hexagon::LDrid:
+ case Hexagon::LDrid_f:
case Hexagon::STrid:
+ case Hexagon::STrid_f:
assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
@@ -1410,7 +2384,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::LDrih:
case Hexagon::LDriuh:
case Hexagon::STrih:
- case Hexagon::LDrih_ae:
assert((Offset % 2 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
@@ -1418,9 +2391,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::LDrib:
case Hexagon::STrib:
case Hexagon::LDriub:
- case Hexagon::LDriubit:
- case Hexagon::LDrib_ae:
- case Hexagon::LDriub_ae:
return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
(Offset <= Hexagon_MEMB_OFFSET_MAX);
@@ -1528,6 +2498,7 @@ bool HexagonInstrInfo::
isMemOp(const MachineInstr *MI) const {
switch (MI->getOpcode())
{
+ default: return false;
case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
@@ -1570,28 +2541,59 @@ isMemOp(const MachineInstr *MI) const {
case Hexagon::MEMb_SUBr_MEM_V4 :
case Hexagon::MEMb_ANDr_MEM_V4 :
case Hexagon::MEMb_ORr_MEM_V4 :
- return true;
+ return true;
}
- return false;
}
bool HexagonInstrInfo::
isSpillPredRegOp(const MachineInstr *MI) const {
- switch (MI->getOpcode())
- {
+ switch (MI->getOpcode()) {
+ default: return false;
case Hexagon::STriw_pred :
case Hexagon::LDriw_pred :
- return true;
+ return true;
+ }
+}
+
+bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPLTrr:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGEri:
+ case Hexagon::CMPGEUri:
+ return true;
}
- return false;
}
+bool HexagonInstrInfo::
+isConditionalTransfer (const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ return true;
+ }
+}
bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
const HexagonRegisterInfo& QRI = getRegisterInfo();
switch (MI->getOpcode())
{
+ default: return false;
case Hexagon::ADD_ri_cPt:
case Hexagon::ADD_ri_cNotPt:
case Hexagon::ADD_rr_cPt:
@@ -1619,19 +2621,16 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
case Hexagon::ZXTB_cNotPt_V4:
case Hexagon::ZXTH_cPt_V4:
case Hexagon::ZXTH_cNotPt_V4:
- return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
-
- default:
- return false;
+ return QRI.Subtarget.hasV4TOps();
}
}
-
bool HexagonInstrInfo::
isConditionalLoad (const MachineInstr* MI) const {
const HexagonRegisterInfo& QRI = getRegisterInfo();
switch (MI->getOpcode())
{
+ default: return false;
case Hexagon::LDrid_cPt :
case Hexagon::LDrid_cNotPt :
case Hexagon::LDrid_indexed_cPt :
@@ -1669,7 +2668,7 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::POST_LDriuh_cNotPt :
case Hexagon::POST_LDriub_cPt :
case Hexagon::POST_LDriub_cNotPt :
- return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ return QRI.Subtarget.hasV4TOps();
case Hexagon::LDrid_indexed_cPt_V4 :
case Hexagon::LDrid_indexed_cNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cPt_V4 :
@@ -1694,12 +2693,136 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::LDriw_indexed_cNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cPt_V4 :
case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
- return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
- default:
- return false;
+ return QRI.Subtarget.hasV4TOps();
}
}
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+//
+// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+// ^ ^
+// / \ (not OK. it will cause new-value store to be
+// / X conditional on p0.new while R2 producer is
+// / \ on p0)
+// / \.
+// p.new store p.old NV store
+// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new]
+// ^ ^
+// \ /
+// \ /
+// \ /
+// p.old store
+// [if (p0)memw(R0+#0)=R2]
+//
+// The above diagram shows the steps involoved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+// 2) p0 = cmp.eq(r3, #0) }
+//
+// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+bool HexagonInstrInfo::
+isConditionalStore (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ default: return false;
+ case Hexagon::STrib_imm_cPt_V4 :
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrib_cPt :
+ case Hexagon::STrib_cNotPt :
+ case Hexagon::POST_STbri_cPt :
+ case Hexagon::POST_STbri_cNotPt :
+ case Hexagon::STrid_indexed_cPt :
+ case Hexagon::STrid_indexed_cNotPt :
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ case Hexagon::POST_STdri_cPt :
+ case Hexagon::POST_STdri_cNotPt :
+ case Hexagon::STrih_cPt :
+ case Hexagon::STrih_cNotPt :
+ case Hexagon::STrih_indexed_cPt :
+ case Hexagon::STrih_indexed_cNotPt :
+ case Hexagon::STrih_imm_cPt_V4 :
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::POST_SThri_cPt :
+ case Hexagon::POST_SThri_cNotPt :
+ case Hexagon::STriw_cPt :
+ case Hexagon::STriw_cNotPt :
+ case Hexagon::STriw_indexed_cPt :
+ case Hexagon::STriw_indexed_cNotPt :
+ case Hexagon::STriw_imm_cPt_V4 :
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::POST_STwri_cPt :
+ case Hexagon::POST_STwri_cNotPt :
+ return QRI.Subtarget.hasV4TOps();
+
+ // V4 global address store before promoting to dot new.
+ case Hexagon::STrid_GP_cPt_V4 :
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ case Hexagon::STrib_GP_cPt_V4 :
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ case Hexagon::STrih_GP_cPt_V4 :
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ case Hexagon::STriw_GP_cPt_V4 :
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ return QRI.Subtarget.hasV4TOps();
+
+ // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+ // from the "Conditional Store" list. Because a predicated new value store
+ // would NOT be promoted to a double dot new store. See diagram below:
+ // This function returns yes for those stores that are predicated but not
+ // yet promoted to predicate dot new instructions.
+ //
+ // +---------------------+
+ // /-----| if (p0) memw(..)=r0 |---------\~
+ // || +---------------------+ ||
+ // promote || /\ /\ || promote
+ // || /||\ /||\ ||
+ // \||/ demote || \||/
+ // \/ || || \/
+ // +-------------------------+ || +-------------------------+
+ // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
+ // +-------------------------+ || +-------------------------+
+ // || || ||
+ // || demote \||/
+ // promote || \/ NOT possible
+ // || || /\~
+ // \||/ || /||\~
+ // \/ || ||
+ // +-----------------------------+
+ // | if (p0.new) memw(..)=r0.new |
+ // +-----------------------------+
+ // Double Dot New Store
+ //
+ }
+}
+
+
+
DFAPacketizer *HexagonInstrInfo::
CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 730687036c81..2bb53f899ce1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -112,7 +112,7 @@ public:
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const;
@@ -160,10 +160,21 @@ public:
bool isS8_Immediate(const int value) const;
bool isS6_Immediate(const int value) const;
+ bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
+ bool isConditionalTransfer(const MachineInstr* MI) const;
bool isConditionalALU32 (const MachineInstr* MI) const;
bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isConditionalStore(const MachineInstr* MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
unsigned getInvertedPredicatedOpcode(const int Opc) const;
+ bool isExtendable(const MachineInstr* MI) const;
+ bool isExtended(const MachineInstr* MI) const;
+ bool isPostIncrement(const MachineInstr* MI) const;
+ bool isNewValueStore(const MachineInstr* MI) const;
+ bool isNewValueJump(const MachineInstr* MI) const;
+ bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+ unsigned getImmExtForm(const MachineInstr* MI) const;
+ unsigned getNormalBranchForm(const MachineInstr* MI) const;
private:
int getMatchingCondBranchOpcode(int Opc, bool sense) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index b563ac3c613d..c0c0df6004cd 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -25,7 +25,10 @@ def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget.hasV5TOps()">;
+def NoV5T : Predicate<"!Subtarget.hasV5TOps()">;
def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
+def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">;
// Addressing modes.
def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
@@ -84,10 +87,12 @@ def symbolLo32 : Operand<i32> {
multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$b),
+ (i32 IntRegs:$c)))]>;
def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
- [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>;
+ [(set (i32 IntRegs:$dst), (OpNode s10Imm:$b,
+ (i32 IntRegs:$c)))]>;
}
// Multi-class for compare ops.
@@ -95,111 +100,114 @@ let isCompare = 1 in {
multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>;
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>;
}
multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
}
multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>;
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), s10ImmPred:$c))]>;
}
multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), u9ImmPred:$c))]>;
}
-multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> {
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Imm:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+ [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
+ u8ImmPred:$c))]>;
}
multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>;
+ [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
+ s8ImmPred:$c))]>;
}
}
//===----------------------------------------------------------------------===//
-// Instructions
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html
-// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html
-// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html
-// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html
-// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
// ALU32/ALU +
//===----------------------------------------------------------------------===//
// Add.
-let isPredicable = 1 in
+let isCommutable = 1, isPredicable = 1 in
def ADD_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = add($src1, $src2)",
- [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
let isPredicable = 1 in
def ADD_ri : ALU32_ri<(outs IntRegs:$dst),
(ins IntRegs:$src1, s16Imm:$src2),
"$dst = add($src1, #$src2)",
- [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1),
+ s16ImmPred:$src2))]>;
// Logical operations.
let isPredicable = 1 in
def XOR_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = xor($src1, $src2)",
- [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
-let isPredicable = 1 in
+let isCommutable = 1, isPredicable = 1 in
def AND_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = and($src1, $src2)",
- [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
def OR_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s8Imm:$src2),
+ (ins IntRegs:$src1, s10Imm:$src2),
"$dst = or($src1, #$src2)",
- [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
+ s10ImmPred:$src2))]>;
def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1),
"$dst = not($src1)",
- [(set IntRegs:$dst, (not IntRegs:$src1))]>;
+ [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
def AND_ri : ALU32_ri<(outs IntRegs:$dst),
(ins IntRegs:$src1, s10Imm:$src2),
"$dst = and($src1, #$src2)",
- [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
+ s10ImmPred:$src2))]>;
-let isPredicable = 1 in
+let isCommutable = 1, isPredicable = 1 in
def OR_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = or($src1, $src2)",
- [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
// Negate.
def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = neg($src1)",
- [(set IntRegs:$dst, (ineg IntRegs:$src1))]>;
+ [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>;
// Nop.
let neverHasSideEffects = 1 in
def NOP : ALU32_rr<(outs), (ins),
@@ -211,13 +219,20 @@ let isPredicable = 1 in
def SUB_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = sub($src1, $src2)",
- [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+// Rd32=sub(#s10,Rs32)
+def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins s10Imm:$src1, IntRegs:$src2),
+ "$dst = sub(#$src1, $src2)",
+ [(set IntRegs:$dst, (sub s10ImmPred:$src1, IntRegs:$src2))]>;
// Transfer immediate.
-let isReMaterializable = 1, isPredicable = 1 in
+let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in
def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
"$dst = #$src1",
- [(set IntRegs:$dst, s16ImmPred:$src1)]>;
+ [(set (i32 IntRegs:$dst), s16ImmPred:$src1)]>;
// Transfer register.
let neverHasSideEffects = 1, isPredicable = 1 in
@@ -225,6 +240,11 @@ def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = $src1",
[]>;
+let neverHasSideEffects = 1, isPredicable = 1 in
+def TFR64 : ALU32_ri<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = $src1",
+ []>;
+
// Transfer control register.
let neverHasSideEffects = 1 in
def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
@@ -246,6 +266,12 @@ def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
"$dst = combine($src1, $src2)",
[]>;
+let neverHasSideEffects = 1 in
+def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst),
+ (ins s8Imm:$src1, s8Imm:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ []>;
+
// Mux.
def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
DoubleRegs:$src2,
@@ -256,48 +282,52 @@ def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst = mux($src1, $src2, $src3)",
- [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
+ [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
+ (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))]>;
def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
IntRegs:$src3),
"$dst = mux($src1, #$src2, $src3)",
- [(set IntRegs:$dst, (select PredRegs:$src1,
- s8ImmPred:$src2, IntRegs:$src3))]>;
+ [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
+ s8ImmPred:$src2,
+ (i32 IntRegs:$src3))))]>;
def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
s8Imm:$src3),
"$dst = mux($src1, $src2, #$src3)",
- [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
- s8ImmPred:$src3))]>;
+ [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
+ (i32 IntRegs:$src2),
+ s8ImmPred:$src3)))]>;
def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
s8Imm:$src3),
"$dst = mux($src1, #$src2, #$src3)",
- [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2,
- s8ImmPred:$src3))]>;
+ [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
+ s8ImmPred:$src2,
+ s8ImmPred:$src3)))]>;
// Shift halfword.
let isPredicable = 1 in
def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = aslh($src1)",
- [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>;
+ [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>;
let isPredicable = 1 in
def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = asrh($src1)",
- [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>;
+ [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>;
// Sign extend.
let isPredicable = 1 in
def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = sxtb($src1)",
- [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>;
+ [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>;
let isPredicable = 1 in
def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = sxth($src1)",
- [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>;
+ [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>;
// Zero extend.
let isPredicable = 1, neverHasSideEffects = 1 in
@@ -321,25 +351,25 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
// Conditional add.
let neverHasSideEffects = 1, isPredicated = 1 in
def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3),
"if ($src1) $dst = add($src2, #$src3)",
[]>;
let neverHasSideEffects = 1, isPredicated = 1 in
def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3),
"if (!$src1) $dst = add($src2, #$src3)",
[]>;
let neverHasSideEffects = 1, isPredicated = 1 in
def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3),
"if ($src1.new) $dst = add($src2, #$src3)",
[]>;
let neverHasSideEffects = 1, isPredicated = 1 in
def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3),
"if (!$src1.new) $dst = add($src2, #$src3)",
[]>;
@@ -497,7 +527,6 @@ def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
// Conditional transfer.
-
let neverHasSideEffects = 1, isPredicated = 1 in
def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = $src2",
@@ -510,6 +539,18 @@ def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
[]>;
let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR64_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2),
+ "if ($src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR64_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2),
+ "if (!$src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
"if ($src1) $dst = #$src2",
[]>;
@@ -548,25 +589,14 @@ def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
+defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
//===----------------------------------------------------------------------===//
// ALU32/PRED -
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// ALU32/VH +
-//===----------------------------------------------------------------------===//
-// Vector add halfwords
-
-// Vector averagehalfwords
-
-// Vector subtract halfwords
-//===----------------------------------------------------------------------===//
-// ALU32/VH -
-//===----------------------------------------------------------------------===//
-
//===----------------------------------------------------------------------===//
// ALU64/ALU +
@@ -575,8 +605,8 @@ defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = add($src1, $src2)",
- [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (add (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
// Add halfword.
@@ -589,40 +619,93 @@ defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = and($src1, $src2)",
- [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = or($src1, $src2)",
- [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (or (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = xor($src1, $src2)",
- [(set DoubleRegs:$dst, (xor DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
// Maximum.
def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = max($src2, $src1)",
- [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2,
- IntRegs:$src1)),
- IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setlt (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MAXUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = maxu($src2, $src1)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setult (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MAXd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
+
+def MAXUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = maxu($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setult (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
// Minimum.
def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = min($src2, $src1)",
- [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2,
- IntRegs:$src1)),
- IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setgt (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MINUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = minu($src2, $src1)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 (setugt (i32 IntRegs:$src2),
+ (i32 IntRegs:$src1))),
+ (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>;
+
+def MINd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
+
+def MINUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = minu($src2, $src1)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setugt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>;
// Subtract.
def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = sub($src1, $src2)",
- [(set DoubleRegs:$dst, (sub DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))]>;
// Subtract halfword.
@@ -652,30 +735,6 @@ def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// ALU64/VB +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/VB -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/VH +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/VH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/VW +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/VW -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
// CR +
//===----------------------------------------------------------------------===//
// Logical reductions on predicates.
@@ -687,7 +746,8 @@ def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
// Logical operations on predicates.
def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
"$dst = and($src1, $src2)",
- [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>;
+ [(set (i1 PredRegs:$dst), (and (i1 PredRegs:$src1),
+ (i1 PredRegs:$src2)))]>;
let neverHasSideEffects = 1 in
def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
@@ -726,15 +786,17 @@ def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
"$dst = not($src1)",
- [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+ [(set (i1 PredRegs:$dst), (not (i1 PredRegs:$src1)))]>;
def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
"$dst = or($src1, $src2)",
- [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>;
+ [(set (i1 PredRegs:$dst), (or (i1 PredRegs:$src1),
+ (i1 PredRegs:$src2)))]>;
def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
"$dst = xor($src1, $src2)",
- [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>;
+ [(set (i1 PredRegs:$dst), (xor (i1 PredRegs:$src1),
+ (i1 PredRegs:$src2)))]>;
// User control register transfer.
@@ -760,7 +822,7 @@ let isBranch = 1, isTerminator=1, Defs = [PC],
def JMP_c : JInst< (outs),
(ins PredRegs:$src, brtarget:$offset),
"if ($src) jump $offset",
- [(brcond PredRegs:$src, bb:$offset)]>;
+ [(brcond (i1 PredRegs:$src), bb:$offset)]>;
}
// if (!p0) jump
@@ -826,7 +888,7 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
+let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
Defs = [PC], Uses = [R31] in {
def JMPR: JRInst<(outs), (ins),
"jumpr r31",
@@ -834,7 +896,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1,
}
// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
Defs = [PC], Uses = [R31] in {
def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
"if ($src1) jumpr r31",
@@ -842,7 +904,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1,
}
// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
Defs = [PC], Uses = [R31] in {
def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
"if (!$src1) jumpr r31",
@@ -865,96 +927,99 @@ let isPredicable = 1 in
def LDrid : LDInst<(outs DoubleRegs:$dst),
(ins MEMri:$addr),
"$dst = memd($addr)",
- [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>;
+ [(set (i64 DoubleRegs:$dst), (i64 (load ADDRriS11_3:$addr)))]>;
let isPredicable = 1, AddedComplexity = 20 in
def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
(ins IntRegs:$src1, s11_3Imm:$offset),
- "$dst=memd($src1+#$offset)",
- [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
- s11_3ImmPred:$offset)))]>;
+ "$dst = memd($src1+#$offset)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (load (add (i32 IntRegs:$src1),
+ s11_3ImmPred:$offset))))]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_GP : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
(ins globaladdress:$global, u16Imm:$offset),
- "$dst=memd(#$global+$offset)",
- []>;
+ "$dst = memd(#$global+$offset)",
+ []>,
+ Requires<[NoV4T]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDd_GP : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
(ins globaladdress:$global),
- "$dst=memd(#$global)",
- []>;
+ "$dst = memd(#$global)",
+ []>,
+ Requires<[NoV4T]>;
-let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2),
+let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid : LDInst2PI<(outs DoubleRegs:$dst, IntRegs:$dst2),
(ins IntRegs:$src1, s4Imm:$offset),
"$dst = memd($src1++#$offset)",
[],
"$src1 = $dst2">;
// Load doubleword conditionally.
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_cPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_cPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1) $dst = memd($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_cNotPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1) $dst = memd($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_indexed_cPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if ($src1) $dst=memd($src2+#$src3)",
+ "if ($src1) $dst = memd($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_indexed_cNotPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if (!$src1) $dst=memd($src2+#$src3)",
+ "if (!$src1) $dst = memd($src2+#$src3)",
[]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
"if ($src1) $dst1 = memd($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cNotPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
"if (!$src1) $dst1 = memd($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_cdnPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1.new) $dst = memd($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1.new) $dst = memd($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_indexed_cdnPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if ($src1.new) $dst=memd($src2+#$src3)",
+ "if ($src1.new) $dst = memd($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_indexed_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if (!$src1.new) $dst=memd($src2+#$src3)",
+ "if (!$src1.new) $dst = memd($src2+#$src3)",
[]>;
@@ -963,114 +1028,113 @@ let isPredicable = 1 in
def LDrib : LDInst<(outs IntRegs:$dst),
(ins MEMri:$addr),
"$dst = memb($addr)",
- [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>;
+ [(set (i32 IntRegs:$dst), (i32 (sextloadi8 ADDRriS11_0:$addr)))]>;
-def LDrib_ae : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memb($addr)",
- [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+// Load byte any-extend.
+def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
+ (i32 (LDrib ADDRriS11_0:$addr)) >;
// Indexed load byte.
let isPredicable = 1, AddedComplexity = 20 in
def LDrib_indexed : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst=memb($src1+#$offset)",
- [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
- s11_0ImmPred:$offset)))]>;
-
+ "$dst = memb($src1+#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ImmPred:$offset))))]>;
// Indexed load byte any-extend.
let AddedComplexity = 20 in
-def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst=memb($src1+#$offset)",
- [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
- s11_0ImmPred:$offset)))]>;
+def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
+ (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDrib_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global, u16Imm:$offset),
- "$dst=memb(#$global+$offset)",
- []>;
+ "$dst = memb(#$global+$offset)",
+ []>,
+ Requires<[NoV4T]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDb_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDb_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
- "$dst=memb(#$global)",
- []>;
+ "$dst = memb(#$global)",
+ []>,
+ Requires<[NoV4T]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDub_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDub_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
- "$dst=memub(#$global)",
- []>;
+ "$dst = memub(#$global)",
+ []>,
+ Requires<[NoV4T]>;
-let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
(ins IntRegs:$src1, s4Imm:$offset),
"$dst = memb($src1++#$offset)",
[],
"$src1 = $dst2">;
// Load byte conditionally.
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1) $dst = memb($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1) $dst = memb($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_indexed_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if ($src1) $dst = memb($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if (!$src1) $dst = memb($src2+#$src3)",
[]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if ($src1) $dst1 = memb($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if (!$src1) $dst1 = memb($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1.new) $dst = memb($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1.new) $dst = memb($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if ($src1.new) $dst = memb($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if (!$src1.new) $dst = memb($src2+#$src3)",
[]>;
@@ -1081,112 +1145,110 @@ let isPredicable = 1 in
def LDrih : LDInst<(outs IntRegs:$dst),
(ins MEMri:$addr),
"$dst = memh($addr)",
- [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>;
+ [(set (i32 IntRegs:$dst), (i32 (sextloadi16 ADDRriS11_1:$addr)))]>;
let isPredicable = 1, AddedComplexity = 20 in
def LDrih_indexed : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s11_1Imm:$offset),
- "$dst=memh($src1+#$offset)",
- [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
- s11_1ImmPred:$offset)))] >;
+ "$dst = memh($src1+#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ImmPred:$offset))))]>;
-def LDrih_ae : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memh($addr)",
- [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
+ (i32 (LDrih ADDRriS11_1:$addr))>;
let AddedComplexity = 20 in
-def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_1Imm:$offset),
- "$dst=memh($src1+#$offset)",
- [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
- s11_1ImmPred:$offset)))] >;
+def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
+ (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDrih_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global, u16Imm:$offset),
- "$dst=memh(#$global+$offset)",
- []>;
+ "$dst = memh(#$global+$offset)",
+ []>,
+ Requires<[NoV4T]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDh_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDh_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
- "$dst=memh(#$global)",
- []>;
+ "$dst = memh(#$global)",
+ []>,
+ Requires<[NoV4T]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDuh_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDuh_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
- "$dst=memuh(#$global)",
- []>;
-
+ "$dst = memuh(#$global)",
+ []>,
+ Requires<[NoV4T]>;
-let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
(ins IntRegs:$src1, s4Imm:$offset),
"$dst = memh($src1++#$offset)",
[],
"$src1 = $dst2">;
// Load halfword conditionally.
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1) $dst = memh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1) $dst = memh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_indexed_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if ($src1) $dst = memh($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if (!$src1) $dst = memh($src2+#$src3)",
[]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if ($src1) $dst1 = memh($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if (!$src1) $dst1 = memh($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1.new) $dst = memh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1.new) $dst = memh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if ($src1.new) $dst = memh($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if (!$src1.new) $dst = memh($src2+#$src3)",
[]>;
@@ -1196,113 +1258,96 @@ let isPredicable = 1 in
def LDriub : LDInst<(outs IntRegs:$dst),
(ins MEMri:$addr),
"$dst = memub($addr)",
- [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>;
+ [(set (i32 IntRegs:$dst), (i32 (zextloadi8 ADDRriS11_0:$addr)))]>;
-let isPredicable = 1 in
-def LDriubit : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memub($addr)",
- [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>;
+def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
+ (i32 (LDriub ADDRriS11_0:$addr))>;
let isPredicable = 1, AddedComplexity = 20 in
def LDriub_indexed : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst=memub($src1+#$offset)",
- [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
- s11_0ImmPred:$offset)))]>;
-
-let AddedComplexity = 20 in
-def LDriubit_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst=memub($src1+#$offset)",
- [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1,
- s11_0ImmPred:$offset)))]>;
-
-def LDriub_ae : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memub($addr)",
- [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
-
+ "$dst = memub($src1+#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ImmPred:$offset))))]>;
let AddedComplexity = 20 in
-def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst=memub($src1+#$offset)",
- [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
- s11_0ImmPred:$offset)))]>;
+def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
+ (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDriub_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global, u16Imm:$offset),
- "$dst=memub(#$global+$offset)",
- []>;
+ "$dst = memub(#$global+$offset)",
+ []>,
+ Requires<[NoV4T]>;
-let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
(ins IntRegs:$src1, s4Imm:$offset),
"$dst = memub($src1++#$offset)",
[],
"$src1 = $dst2">;
// Load unsigned byte conditionally.
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1) $dst = memub($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1) $dst = memub($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_indexed_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if ($src1) $dst = memub($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if (!$src1) $dst = memub($src2+#$src3)",
[]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if ($src1) $dst1 = memub($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if (!$src1) $dst1 = memub($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1.new) $dst = memub($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1.new) $dst = memub($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if ($src1.new) $dst = memub($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
"if (!$src1.new) $dst = memub($src2+#$src3)",
[]>;
@@ -1312,102 +1357,90 @@ let isPredicable = 1 in
def LDriuh : LDInst<(outs IntRegs:$dst),
(ins MEMri:$addr),
"$dst = memuh($addr)",
- [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>;
+ [(set (i32 IntRegs:$dst), (i32 (zextloadi16 ADDRriS11_1:$addr)))]>;
// Indexed load unsigned halfword.
let isPredicable = 1, AddedComplexity = 20 in
def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s11_1Imm:$offset),
- "$dst=memuh($src1+#$offset)",
- [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
- s11_1ImmPred:$offset)))]>;
-
-def LDriuh_ae : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memuh($addr)",
- [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+ "$dst = memuh($src1+#$offset)",
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ImmPred:$offset))))]>;
-
-// Indexed load unsigned halfword any-extend.
-let AddedComplexity = 20 in
-def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_1Imm:$offset),
- "$dst=memuh($src1+#$offset)",
- [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
- s11_1ImmPred:$offset)))] >;
-
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global, u16Imm:$offset),
- "$dst=memuh(#$global+$offset)",
- []>;
+ "$dst = memuh(#$global+$offset)",
+ []>,
+ Requires<[NoV4T]>;
-let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
(ins IntRegs:$src1, s4Imm:$offset),
"$dst = memuh($src1++#$offset)",
[],
"$src1 = $dst2">;
// Load unsigned halfword conditionally.
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1) $dst = memuh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1) $dst = memuh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_indexed_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if ($src1) $dst = memuh($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if (!$src1) $dst = memuh($src2+#$src3)",
[]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if ($src1) $dst1 = memuh($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if (!$src1) $dst1 = memuh($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1.new) $dst = memuh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1.new) $dst = memuh($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if ($src1.new) $dst = memuh($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
"if (!$src1.new) $dst = memuh($src2+#$src3)",
[]>;
@@ -1417,10 +1450,10 @@ def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
let isPredicable = 1 in
def LDriw : LDInst<(outs IntRegs:$dst),
(ins MEMri:$addr), "$dst = memw($addr)",
- [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>;
+ [(set IntRegs:$dst, (i32 (load ADDRriS11_2:$addr)))]>;
// Load predicate.
-let mayLoad = 1, Defs = [R10,R11] in
+let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
def LDriw_pred : LDInst<(outs PredRegs:$dst),
(ins MEMri:$addr),
"Error; should not emit",
@@ -1430,24 +1463,26 @@ def LDriw_pred : LDInst<(outs PredRegs:$dst),
let isPredicable = 1, AddedComplexity = 20 in
def LDriw_indexed : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s11_2Imm:$offset),
- "$dst=memw($src1+#$offset)",
- [(set IntRegs:$dst, (load (add IntRegs:$src1,
- s11_2ImmPred:$offset)))]>;
+ "$dst = memw($src1+#$offset)",
+ [(set IntRegs:$dst, (i32 (load (add IntRegs:$src1,
+ s11_2ImmPred:$offset))))]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDriw_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global, u16Imm:$offset),
- "$dst=memw(#$global+$offset)",
- []>;
+ "$dst = memw(#$global+$offset)",
+ []>,
+ Requires<[NoV4T]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDw_GP : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1 in
+def LDw_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
- "$dst=memw(#$global)",
- []>;
+ "$dst = memw(#$global)",
+ []>,
+ Requires<[NoV4T]>;
-let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
(ins IntRegs:$src1, s4Imm:$offset),
"$dst = memw($src1++#$offset)",
[],
@@ -1455,71 +1490,71 @@ def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
// Load word conditionally.
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1) $dst = memw($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1) $dst = memw($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_indexed_cPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if ($src1) $dst=memw($src2+#$src3)",
+ "if ($src1) $dst = memw($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if (!$src1) $dst=memw($src2+#$src3)",
+ "if (!$src1) $dst = memw($src2+#$src3)",
[]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
"if ($src1) $dst1 = memw($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
"if (!$src1) $dst1 = memw($src2++#$src3)",
[],
"$src2 = $dst2">;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if ($src1.new) $dst = memw($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
"if (!$src1.new) $dst = memw($addr)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if ($src1.new) $dst=memw($src2+#$src3)",
+ "if ($src1.new) $dst = memw($src2+#$src3)",
[]>;
-let mayLoad = 1, neverHasSideEffects = 1 in
-def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if (!$src1.new) $dst=memw($src2+#$src3)",
+ "if (!$src1.new) $dst = memw($src2+#$src3)",
[]>;
// Deallocate stack frame.
let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
- def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1),
+ def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1),
"deallocframe",
[]>;
}
@@ -1550,13 +1585,14 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
// Rd=+mpyi(Rs,#u8)
def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
"$dst =+ mpyi($src1, #$src2)",
- [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ u8ImmPred:$src2))]>;
// Rd=-mpyi(Rs,#u8)
def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
"$dst =- mpyi($src1, #$src2)",
- [(set IntRegs:$dst,
- (mul IntRegs:$src1, n8ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ n8ImmPred:$src2))]>;
// Rd=mpyi(Rs,#m9)
// s9 is NOT the same as m9 - but it works.. so far.
@@ -1564,35 +1600,40 @@ def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
// depending on the value of m9. See Arch Spec.
def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
"$dst = mpyi($src1, #$src2)",
- [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ s9ImmPred:$src2))]>;
// Rd=mpyi(Rs,Rt)
def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpyi($src1, $src2)",
- [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
// Rx+=mpyi(Rs,#u8)
def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
"$dst += mpyi($src2, #$src3)",
- [(set IntRegs:$dst,
- (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3),
+ (i32 IntRegs:$src1)))],
"$src1 = $dst">;
// Rx+=mpyi(Rs,Rt)
def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += mpyi($src2, $src3)",
- [(set IntRegs:$dst,
- (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (i32 IntRegs:$src1)))],
"$src1 = $dst">;
// Rx-=mpyi(Rs,#u8)
def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
"$dst -= mpyi($src2, #$src3)",
- [(set IntRegs:$dst,
- (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ u8ImmPred:$src3)))],
"$src1 = $dst">;
// Multiply and use upper result.
@@ -1601,27 +1642,30 @@ def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
// Rd=mpy(Rs,Rt)
def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpy($src1, $src2)",
- [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (mulhs (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
// Rd=mpy(Rs,Rt):rnd
// Rd=mpyu(Rs,Rt)
def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpyu($src1, $src2)",
- [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (mulhu (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
// Multiply and use full result.
// Rdd=mpyu(Rs,Rt)
def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpyu($src1, $src2)",
- [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)),
- (i64 (anyext IntRegs:$src2))))]>;
+ [(set (i64 DoubleRegs:$dst),
+ (mul (i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (anyext (i32 IntRegs:$src2)))))]>;
// Rdd=mpy(Rs,Rt)
def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpy($src1, $src2)",
- [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)),
- (i64 (sext IntRegs:$src2))))]>;
-
+ [(set (i64 DoubleRegs:$dst),
+ (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2)))))]>;
// Multiply and accumulate, use full result.
// Rxx[+-]=mpy(Rs,Rt)
@@ -1629,18 +1673,20 @@ def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += mpy($src2, $src3)",
- [(set DoubleRegs:$dst,
- (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))),
- DoubleRegs:$src1))],
+ [(set (i64 DoubleRegs:$dst),
+ (add (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))),
+ (i64 DoubleRegs:$src1)))],
"$src1 = $dst">;
// Rxx-=mpy(Rs,Rt)
def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst -= mpy($src2, $src3)",
- [(set DoubleRegs:$dst,
- (sub DoubleRegs:$src1,
- (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))],
+ [(set (i64 DoubleRegs:$dst),
+ (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3))))))],
"$src1 = $dst">;
// Rxx[+-]=mpyu(Rs,Rt)
@@ -1648,47 +1694,52 @@ def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst += mpyu($src2, $src3)",
- [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)),
- (i64 (anyext IntRegs:$src3))),
- DoubleRegs:$src1))],"$src1 = $dst">;
+ [(set (i64 DoubleRegs:$dst),
+ (add (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))),
+ (i64 DoubleRegs:$src1)))], "$src1 = $dst">;
// Rxx-=mpyu(Rs,Rt)
def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += mpyu($src2, $src3)",
- [(set DoubleRegs:$dst,
- (sub DoubleRegs:$src1,
- (mul (i64 (anyext IntRegs:$src2)),
- (i64 (anyext IntRegs:$src3)))))],
+ [(set (i64 DoubleRegs:$dst),
+ (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3))))))],
"$src1 = $dst">;
def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst += add($src2, $src3)",
- [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3),
- IntRegs:$src1))],
+ [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3)),
+ (i32 IntRegs:$src1)))],
"$src1 = $dst">;
def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, s8Imm:$src3),
"$dst += add($src2, #$src3)",
- [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3),
- IntRegs:$src1))],
+ [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
+ s8ImmPred:$src3),
+ (i32 IntRegs:$src1)))],
"$src1 = $dst">;
def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst -= add($src2, $src3)",
- [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2,
- IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">;
def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, s8Imm:$src3),
"$dst -= add($src2, #$src3)",
- [(set IntRegs:$dst, (sub IntRegs:$src1,
- (add IntRegs:$src2, s8ImmPred:$src3)))],
+ [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
+ (add (i32 IntRegs:$src2),
+ s8ImmPred:$src3)))],
"$src1 = $dst">;
//===----------------------------------------------------------------------===//
@@ -1731,57 +1782,70 @@ let isPredicable = 1 in
def STrid : STInst<(outs),
(ins MEMri:$addr, DoubleRegs:$src1),
"memd($addr) = $src1",
- [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>;
+ [(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr)]>;
// Indexed store double word.
let AddedComplexity = 10, isPredicable = 1 in
def STrid_indexed : STInst<(outs),
(ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
"memd($src1+#$src2) = $src3",
- [(store DoubleRegs:$src3,
- (add IntRegs:$src1, s11_3ImmPred:$src2))]>;
+ [(store (i64 DoubleRegs:$src3),
+ (add (i32 IntRegs:$src1), s11_3ImmPred:$src2))]>;
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrid_GP : STInst<(outs),
+let neverHasSideEffects = 1 in
+def STrid_GP : STInst2<(outs),
(ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
"memd(#$global+$offset) = $src",
- []>;
+ []>,
+ Requires<[NoV4T]>;
+
+let neverHasSideEffects = 1 in
+def STd_GP : STInst2<(outs),
+ (ins globaladdress:$global, DoubleRegs:$src),
+ "memd(#$global) = $src",
+ []>,
+ Requires<[NoV4T]>;
let hasCtrlDep = 1, isPredicable = 1 in
def POST_STdri : STInstPI<(outs IntRegs:$dst),
(ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
"memd($src2++#$offset) = $src1",
[(set IntRegs:$dst,
- (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))],
+ (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2),
+ s4_3ImmPred:$offset))],
"$src2 = $dst">;
// Store doubleword conditionally.
// if ([!]Pv) memd(Rs+#u6:3)=Rtt
// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_cPt : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_cPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
"if ($src1) memd($addr) = $src2",
[]>;
// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_cNotPt : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
"if (!$src1) memd($addr) = $src2",
[]>;
// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_cPt : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_cPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
DoubleRegs:$src4),
"if ($src1) memd($src2+#$src3) = $src4",
[]>;
// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_cNotPt : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
DoubleRegs:$src4),
"if (!$src1) memd($src2+#$src3) = $src4",
@@ -1789,8 +1853,9 @@ def STrid_indexed_cNotPt : STInst<(outs),
// if ([!]Pv) memd(Rx++#s4:3)=Rtt
// if (Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
s4_3Imm:$offset),
"if ($src1) memd($src3++#$offset) = $src2",
@@ -1798,9 +1863,9 @@ def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
"$src3 = $dst">;
// if (!Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1,
isPredicated = 1 in
-def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst),
+def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
s4_3Imm:$offset),
"if (!$src1) memd($src3++#$offset) = $src2",
@@ -1814,27 +1879,30 @@ let isPredicable = 1 in
def STrib : STInst<(outs),
(ins MEMri:$addr, IntRegs:$src1),
"memb($addr) = $src1",
- [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>;
+ [(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr)]>;
let AddedComplexity = 10, isPredicable = 1 in
def STrib_indexed : STInst<(outs),
(ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
"memb($src1+#$src2) = $src3",
- [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1,
- s11_0ImmPred:$src2))]>;
+ [(truncstorei8 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
+ s11_0ImmPred:$src2))]>;
// memb(gp+#u16:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP : STInst<(outs),
+let neverHasSideEffects = 1 in
+def STrib_GP : STInst2<(outs),
(ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
"memb(#$global+$offset) = $src",
- []>;
+ []>,
+ Requires<[NoV4T]>;
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP : STInst<(outs),
+// memb(#global)=Rt
+let neverHasSideEffects = 1 in
+def STb_GP : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memb(#$global) = $src",
- []>;
+ []>,
+ Requires<[NoV4T]>;
// memb(Rx++#s4:0)=Rt
let hasCtrlDep = 1, isPredicable = 1 in
@@ -1843,51 +1911,51 @@ def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
s4Imm:$offset),
"memb($src2++#$offset) = $src1",
[(set IntRegs:$dst,
- (post_truncsti8 IntRegs:$src1, IntRegs:$src2,
+ (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
s4_0ImmPred:$offset))],
"$src2 = $dst">;
// Store byte conditionally.
// if ([!]Pv) memb(Rs+#u6:0)=Rt
// if (Pv) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_cPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_cPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1) memb($addr) = $src2",
[]>;
// if (!Pv) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_cNotPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1) memb($addr) = $src2",
[]>;
// if (Pv) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_indexed_cPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_indexed_cPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if ($src1) memb($src2+#$src3) = $src4",
[]>;
// if (!Pv) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_indexed_cNotPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_indexed_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if (!$src1) memb($src2+#$src3) = $src4",
[]>;
// if ([!]Pv) memb(Rx++#s4:0)=Rt
// if (Pv) memb(Rx++#s4:0)=Rt
-let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1, isPredicated = 1 in
+def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if ($src1) memb($src3++#$offset) = $src2",
[],"$src3 = $dst">;
// if (!Pv) memb(Rx++#s4:0)=Rt
-let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1, isPredicated = 1 in
+def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if (!$src1) memb($src3++#$offset) = $src2",
[],"$src3 = $dst">;
@@ -1899,27 +1967,29 @@ let isPredicable = 1 in
def STrih : STInst<(outs),
(ins MEMri:$addr, IntRegs:$src1),
"memh($addr) = $src1",
- [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>;
+ [(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr)]>;
let AddedComplexity = 10, isPredicable = 1 in
def STrih_indexed : STInst<(outs),
(ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
"memh($src1+#$src2) = $src3",
- [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1,
- s11_1ImmPred:$src2))]>;
+ [(truncstorei16 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
+ s11_1ImmPred:$src2))]>;
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP : STInst<(outs),
+let neverHasSideEffects = 1 in
+def STrih_GP : STInst2<(outs),
(ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
"memh(#$global+$offset) = $src",
- []>;
+ []>,
+ Requires<[NoV4T]>;
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP : STInst<(outs),
+let neverHasSideEffects = 1 in
+def STh_GP : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memh(#$global) = $src",
- []>;
+ []>,
+ Requires<[NoV4T]>;
// memh(Rx++#s4:1)=Rt.H
// memh(Rx++#s4:1)=Rt
@@ -1928,51 +1998,51 @@ def POST_SThri : STInstPI<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
"memh($src2++#$offset) = $src1",
[(set IntRegs:$dst,
- (post_truncsti16 IntRegs:$src1, IntRegs:$src2,
+ (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
s4_1ImmPred:$offset))],
"$src2 = $dst">;
// Store halfword conditionally.
// if ([!]Pv) memh(Rs+#u6:1)=Rt
// if (Pv) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_cPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_cPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1) memh($addr) = $src2",
[]>;
// if (!Pv) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_cNotPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1) memh($addr) = $src2",
[]>;
// if (Pv) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_indexed_cPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_indexed_cPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if ($src1) memh($src2+#$src3) = $src4",
[]>;
// if (!Pv) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_indexed_cNotPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_indexed_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if (!$src1) memh($src2+#$src3) = $src4",
[]>;
// if ([!]Pv) memh(Rx++#s4:1)=Rt
// if (Pv) memh(Rx++#s4:1)=Rt
-let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1, isPredicated = 1 in
+def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if ($src1) memh($src3++#$offset) = $src2",
[],"$src3 = $dst">;
// if (!Pv) memh(Rx++#s4:1)=Rt
-let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1, isPredicated = 1 in
+def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if (!$src1) memh($src3++#$offset) = $src2",
[],"$src3 = $dst">;
@@ -1980,8 +2050,8 @@ def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
// Store word.
// Store predicate.
-let Defs = [R10,R11] in
-def STriw_pred : STInst<(outs),
+let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+def STriw_pred : STInst2<(outs),
(ins MEMri:$addr, PredRegs:$src1),
"Error; should not emit",
[]>;
@@ -1991,69 +2061,79 @@ let isPredicable = 1 in
def STriw : STInst<(outs),
(ins MEMri:$addr, IntRegs:$src1),
"memw($addr) = $src1",
- [(store IntRegs:$src1, ADDRriS11_2:$addr)]>;
+ [(store (i32 IntRegs:$src1), ADDRriS11_2:$addr)]>;
let AddedComplexity = 10, isPredicable = 1 in
def STriw_indexed : STInst<(outs),
(ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
"memw($src1+#$src2) = $src3",
- [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>;
+ [(store (i32 IntRegs:$src3),
+ (add (i32 IntRegs:$src1), s11_2ImmPred:$src2))]>;
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP : STInst<(outs),
+let neverHasSideEffects = 1 in
+def STriw_GP : STInst2<(outs),
(ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
"memw(#$global+$offset) = $src",
- []>;
+ []>,
+ Requires<[NoV4T]>;
+
+let neverHasSideEffects = 1 in
+def STw_GP : STInst2<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memw(#$global) = $src",
+ []>,
+ Requires<[NoV4T]>;
let hasCtrlDep = 1, isPredicable = 1 in
def POST_STwri : STInstPI<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
"memw($src2++#$offset) = $src1",
[(set IntRegs:$dst,
- (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))],
+ (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2),
+ s4_2ImmPred:$offset))],
"$src2 = $dst">;
// Store word conditionally.
// if ([!]Pv) memw(Rs+#u6:2)=Rt
// if (Pv) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_cPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_cPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1) memw($addr) = $src2",
[]>;
// if (!Pv) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_cNotPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1) memw($addr) = $src2",
[]>;
// if (Pv) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_indexed_cPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_indexed_cPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if ($src1) memw($src2+#$src3) = $src4",
[]>;
// if (!Pv) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_indexed_cNotPt : STInst<(outs),
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_indexed_cNotPt : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if (!$src1) memw($src2+#$src3) = $src4",
[]>;
// if ([!]Pv) memw(Rx++#s4:2)=Rt
// if (Pv) memw(Rx++#s4:2)=Rt
-let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1, isPredicated = 1 in
+def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if ($src1) memw($src3++#$offset) = $src2",
[],"$src3 = $dst">;
// if (!Pv) memw(Rx++#s4:2)=Rt
-let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1, isPredicated = 1 in
+def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if (!$src1) memw($src3++#$offset) = $src2",
[],"$src3 = $dst">;
@@ -2062,7 +2142,7 @@ def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
// Allocate stack frame.
let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
- def ALLOCFRAME : STInst<(outs),
+ def ALLOCFRAME : STInst2<(outs),
(ins i32imm:$amt),
"allocframe(#$amt)",
[]>;
@@ -2077,13 +2157,13 @@ let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
// Logical NOT.
def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
"$dst = not($src1)",
- [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>;
+ [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>;
// Sign extend word to doubleword.
def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
"$dst = sxtw($src1)",
- [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>;
+ [(set (i64 DoubleRegs:$dst), (sext (i32 IntRegs:$src1)))]>;
//===----------------------------------------------------------------------===//
// STYPE/ALU -
//===----------------------------------------------------------------------===//
@@ -2091,37 +2171,58 @@ def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
//===----------------------------------------------------------------------===//
// STYPE/BIT +
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/BIT -
-//===----------------------------------------------------------------------===//
+// clrbit.
+def CLRBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = clrbit($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
+ (not
+ (shl 1, u5ImmPred:$src2))))]>;
+def CLRBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = clrbit($src1, #$src2)",
+ []>;
-//===----------------------------------------------------------------------===//
-// STYPE/COMPLEX +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/COMPLEX -
-//===----------------------------------------------------------------------===//
+// Map from r0 = and(r1, 2147483647) to r0 = clrbit(r1, #31).
+def : Pat <(and (i32 IntRegs:$src1), 2147483647),
+ (CLRBIT_31 (i32 IntRegs:$src1), 31)>;
-//===----------------------------------------------------------------------===//
-// STYPE/PERM +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/PERM -
-//===----------------------------------------------------------------------===//
+// setbit.
+def SETBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = setbit($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
+ (shl 1, u5ImmPred:$src2)))]>;
+
+// Map from r0 = or(r1, -2147483648) to r0 = setbit(r1, #31).
+def SETBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = setbit($src1, #$src2)",
+ []>;
+
+def : Pat <(or (i32 IntRegs:$src1), -2147483648),
+ (SETBIT_31 (i32 IntRegs:$src1), 31)>;
+
+// togglebit.
+def TOGBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = setbit($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1),
+ (shl 1, u5ImmPred:$src2)))]>;
+
+// Map from r0 = xor(r1, -2147483648) to r0 = togglebit(r1, #31).
+def TOGBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = togglebit($src1, #$src2)",
+ []>;
+
+def : Pat <(xor (i32 IntRegs:$src1), -2147483648),
+ (TOGBIT_31 (i32 IntRegs:$src1), 31)>;
-//===----------------------------------------------------------------------===//
-// STYPE/PRED +
-//===----------------------------------------------------------------------===//
// Predicate transfer.
let neverHasSideEffects = 1 in
def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
- "$dst = $src1 // Should almost never emit this",
+ "$dst = $src1 /* Should almost never emit this. */",
[]>;
def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
- "$dst = $src1 // Should almost never emit!",
- [(set PredRegs:$dst, (trunc IntRegs:$src1))]>;
+ "$dst = $src1 /* Should almost never emit this. */",
+ [(set (i1 PredRegs:$dst), (trunc (i32 IntRegs:$src1)))]>;
//===----------------------------------------------------------------------===//
// STYPE/PRED -
//===----------------------------------------------------------------------===//
@@ -2132,75 +2233,85 @@ def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
// Shift by immediate.
def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
"$dst = asr($src1, #$src2)",
- [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1),
+ u5ImmPred:$src2))]>;
def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
"$dst = asr($src1, #$src2)",
- [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]>;
def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
"$dst = asl($src1, #$src2)",
- [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
+ u5ImmPred:$src2))]>;
+
+def ASLd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asl($src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]>;
def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
"$dst = lsr($src1, #$src2)",
- [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1),
+ u5ImmPred:$src2))]>;
def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
"$dst = lsr($src1, #$src2)",
- [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>;
-
-def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2,
- u6Imm:$src3),
- "$dst += lsr($src2, #$src3)",
- [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
- (srl DoubleRegs:$src2,
- u6ImmPred:$src3)))],
- "$src1 = $dst">;
-
-// Shift by immediate and accumulate.
-def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1,
- IntRegs:$src2,
- IntRegs:$src3),
- "$dst += asr($src2, $src3)",
- [], "$src1 = $dst">;
+ [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]>;
// Shift by immediate and add.
+let AddedComplexity = 100 in
def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
u3Imm:$src3),
"$dst = addasl($src1, $src2, #$src3)",
- [(set IntRegs:$dst, (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u3ImmPred:$src3)))]>;
+ [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u3ImmPred:$src3)))]>;
// Shift by register.
def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = asl($src1, $src2)",
- [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = asr($src1, $src2)",
- [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+def LSL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = lsl($src1, $src2)",
+ [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = lsr($src1, $src2)",
- [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>;
+ [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+def ASLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ "$dst = asl($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
"$dst = lsl($src1, $src2)",
- [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
IntRegs:$src2),
"$dst = asr($src1, $src2)",
- [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
IntRegs:$src2),
"$dst = lsr($src1, $src2)",
- [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>;
+ [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
//===----------------------------------------------------------------------===//
// STYPE/SHIFT -
@@ -2231,8 +2342,8 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
[SDNPHasChain]>;
-let hasSideEffects = 1 in
-def BARRIER : STInst<(outs), (ins),
+let hasSideEffects = 1, isHexagonSolo = 1 in
+def BARRIER : SYSInst<(outs), (ins),
"barrier",
[(HexagonBARRIER)]>;
@@ -2244,47 +2355,50 @@ def BARRIER : STInst<(outs), (ins),
let isReMaterializable = 1 in
def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
"$dst = #$src1",
- [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>;
+ [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
// Pseudo instruction to encode a set of conditional transfers.
// This instruction is used instead of a mux and trades-off codesize
// for performance. We conduct this transformation optimistically in
// the hope that these instructions get promoted to dot-new transfers.
-let AddedComplexity = 100 in
+let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
IntRegs:$src2,
IntRegs:$src3),
"Error; should not emit",
- [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-let AddedComplexity = 100 in
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1),
+ (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))]>;
+let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
"Error; should not emit",
- [(set IntRegs:$dst,
- (select PredRegs:$src1, IntRegs:$src2, s12ImmPred:$src3))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+ s12ImmPred:$src3)))]>;
-let AddedComplexity = 100 in
+let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
"Error; should not emit",
- [(set IntRegs:$dst,
- (select PredRegs:$src1, s12ImmPred:$src2, IntRegs:$src3))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
+ (i32 IntRegs:$src3))))]>;
-let AddedComplexity = 100 in
+let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
"Error; should not emit",
- [(set IntRegs:$dst, (select PredRegs:$src1,
- s12ImmPred:$src2,
- s12ImmPred:$src3))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
+ s12ImmPred:$src3)))]>;
// Generate frameindex addresses.
let isReMaterializable = 1 in
def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
"$dst = add($src1)",
- [(set IntRegs:$dst, ADDRri:$src1)]>;
+ [(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
//
// CR - Type.
@@ -2303,69 +2417,116 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset),
+def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset),
":endloop0",
[]>;
}
// Support for generating global address.
// Taken from X86InstrInfo.td.
-def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
- SDTCisPtrTy<0>]>;
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisPtrTy<0>]>;
def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+// HI/LO Instructions
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
+ "$dst.l = #LO($imm_value)",
+ []>;
+
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
+ "$dst.h = #HI($imm_value)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst.l = #LO($jt)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst.h = #HI($jt)",
+ []>;
+
+
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.l = #LO($label)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1 , neverHasSideEffects = 1 in
+def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.h = #HI($label)",
+ []>;
+
// This pattern is incorrect. When we add small data, we should change
// this pattern to use memw(#foo).
+// This is for sdata.
let isMoveImm = 1 in
def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
"$dst = CONST32(#$global)",
- [(set IntRegs:$dst,
- (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+ [(set (i32 IntRegs:$dst),
+ (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+// This is for non-sdata.
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
"$dst = CONST32(#$global)",
- [(set IntRegs:$dst,
- (HexagonCONST32 tglobaladdr:$global))]>;
+ [(set (i32 IntRegs:$dst),
+ (HexagonCONST32 tglobaladdr:$global))]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt),
"$dst = CONST32(#$jt)",
- [(set IntRegs:$dst,
- (HexagonCONST32 tjumptable:$jt))]>;
+ [(set (i32 IntRegs:$dst),
+ (HexagonCONST32 tjumptable:$jt))]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
"$dst = CONST32(#$global)",
- [(set IntRegs:$dst,
- (HexagonCONST32_GP tglobaladdr:$global))]>;
+ [(set (i32 IntRegs:$dst),
+ (HexagonCONST32_GP tglobaladdr:$global))]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
"$dst = CONST32(#$global)",
- [(set IntRegs:$dst, imm:$global) ]>;
+ [(set (i32 IntRegs:$dst), imm:$global) ]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label),
+def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst = CONST32($label)",
- [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>;
+ [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global),
"$dst = CONST64(#$global)",
- [(set DoubleRegs:$dst, imm:$global) ]>;
+ [(set (i64 DoubleRegs:$dst), imm:$global) ]>;
def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
"$dst = xor($dst, $dst)",
- [(set PredRegs:$dst, 0)]>;
+ [(set (i1 PredRegs:$dst), 0)]>;
def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpy($src1, $src2)",
- [(set IntRegs:$dst,
- (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)),
- (i64 (sext IntRegs:$src2)))),
- (i32 32)))))]>;
+ "$dst = mpy($src1, $src2)",
+ [(set (i32 IntRegs:$dst),
+ (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2))))),
+ (i32 32)))))]>;
// Pseudo instructions.
def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
@@ -2405,7 +2566,7 @@ let Defs = [R29, R30, R31], Uses = [R29] in {
let isCall = 1, neverHasSideEffects = 1,
Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ def CALL : JInst<(outs), (ins calltarget:$dst),
"call $dst", []>;
}
@@ -2413,34 +2574,28 @@ let isCall = 1, neverHasSideEffects = 1,
let isCall = 1, neverHasSideEffects = 1,
Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ def CALLR : JRInst<(outs), (ins IntRegs:$dst),
"callr $dst",
[]>;
}
// Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops),
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+ def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
"jump $dst // TAILCALL", []>;
}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops),
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+ def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
"jump $dst // TAILCALL", []>;
}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops),
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+ def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
"jumpr $dst // TAILCALL", []>;
}
// Map call instruction.
-def : Pat<(call IntRegs:$dst),
- (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call (i32 IntRegs:$dst)),
+ (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
def : Pat<(call tglobaladdr:$dst),
(CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
def : Pat<(call texternalsym:$dst),
@@ -2450,309 +2605,516 @@ def : Pat<(HexagonTCRet tglobaladdr:$dst),
(TCRETURNtg tglobaladdr:$dst)>;
def : Pat<(HexagonTCRet texternalsym:$dst),
(TCRETURNtext texternalsym:$dst)>;
-def : Pat<(HexagonTCRet IntRegs:$dst),
- (TCRETURNR IntRegs:$dst)>;
+def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
+ (TCRETURNR (i32 IntRegs:$dst))>;
+
+// Atomic load and store support
+// 8 bit atomic load
+def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDub_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
+ (i32 (LDriub ADDRriS11_0:$src1))>;
+
+def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
+ (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
+
+
+
+// 16 bit atomic load
+def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDuh_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
+ (i32 (LDriuh ADDRriS11_1:$src1))>;
+
+def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
+ (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
+
+
+
+// 32 bit atomic load
+def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDw_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
+ (i32 (LDriw ADDRriS11_2:$src1))>;
+
+def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
+ (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
+
+
+// 64 bit atomic load
+def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i64 (LDd_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
+ (i64 (LDrid ADDRriS11_3:$src1))>;
-// Map from r0 = and(r1, 65535) to r0 = zxth(r1).
-def : Pat <(and IntRegs:$src1, 65535),
- (ZXTH IntRegs:$src1)>;
+def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
+ (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
+
+
+// 64 bit atomic store
+def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+ (i64 DoubleRegs:$src1)),
+ (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i64 DoubleRegs:$src1)),
+ (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
+
+// 8 bit atomic store
+def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrib_GP tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
+ (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrib_indexed (i32 IntRegs:$src2), s11_0ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+
+// 16 bit atomic store
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrih_GP tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
+ (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
+ (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)),
+ (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STriw_GP tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
+
+def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
+ (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
+
+def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STriw_indexed (i32 IntRegs:$src2), s11_2ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+
+
+
+def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)),
+ (STrid ADDRriS11_3:$src2, (i64 DoubleRegs:$src1))>;
+
+def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset),
+ (i64 DoubleRegs:$src1)),
+ (STrid_indexed (i32 IntRegs:$src2), s11_3ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
+def : Pat <(and (i32 IntRegs:$src1), 65535),
+ (ZXTH (i32 IntRegs:$src1))>;
// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
-def : Pat <(and IntRegs:$src1, 255),
- (ZXTB IntRegs:$src1)>;
+def : Pat <(and (i32 IntRegs:$src1), 255),
+ (ZXTB (i32 IntRegs:$src1))>;
// Map Add(p1, true) to p1 = not(p1).
// Add(p1, false) should never be produced,
// if it does, it got to be mapped to NOOP.
-def : Pat <(add PredRegs:$src1, -1),
- (NOT_p PredRegs:$src1)>;
+def : Pat <(add (i1 PredRegs:$src1), -1),
+ (NOT_p (i1 PredRegs:$src1))>;
// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
-def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3,
- IntRegs:$src4),
- (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4,
- IntRegs:$src3)>, Requires<[HasV2TOnly]>;
+def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src3),
+ (i32 IntRegs:$src4)),
+ (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>,
+ Requires<[HasV2TOnly]>;
// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3),
- (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>;
+def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3),
+ (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3,
+ s8ImmPred:$src2))>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = TFR_condset_ri(p0, r1, #i)
+def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
+ (i32 IntRegs:$src3)),
+ (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3),
+ s12ImmPred:$src2))>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = TFR_condset_ir(p0, #i, r1)
+def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3),
+ (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
+ (i32 IntRegs:$src2)))>;
// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
- (JMP_cNot PredRegs:$src1, bb:$offset)>;
+ (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
- (AND_pnotp PredRegs:$src1, PredRegs:$src2)>;
+ (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
// Map from store(globaladdress + x) -> memd(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(store DoubleRegs:$src1,
+def : Pat <(store (i64 DoubleRegs:$src1),
(add (HexagonCONST32_GP tglobaladdr:$global),
u16ImmPred:$offset)),
- (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>;
+ (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-// Map from store(globaladdress) -> memd(#foo + 0).
+// Map from store(globaladdress) -> memd(#foo).
let AddedComplexity = 100 in
-def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
- (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>;
+def : Pat <(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from store(globaladdress + x) -> memw(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global),
+def : Pat <(store (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
u16ImmPred:$offset)),
- (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+ (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from store(globaladdress) -> memw(#foo + 0).
let AddedComplexity = 100 in
-def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
- (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>;
-// Map from store(globaladdress) -> memw(#foo + 0).
+// Map from store(globaladdress) -> memw(#foo).
let AddedComplexity = 100 in
-def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
- (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from store(globaladdress + x) -> memh(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(truncstorei16 IntRegs:$src1,
+def : Pat <(truncstorei16 (i32 IntRegs:$src1),
(add (HexagonCONST32_GP tglobaladdr:$global),
u16ImmPred:$offset)),
- (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+ (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from store(globaladdress) -> memh(#foo).
let AddedComplexity = 100 in
-def : Pat <(truncstorei16 IntRegs:$src1,
+def : Pat <(truncstorei16 (i32 IntRegs:$src1),
(HexagonCONST32_GP tglobaladdr:$global)),
- (STh_GP tglobaladdr:$global, IntRegs:$src1)>;
+ (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from store(globaladdress + x) -> memb(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(truncstorei8 IntRegs:$src1,
+def : Pat <(truncstorei8 (i32 IntRegs:$src1),
(add (HexagonCONST32_GP tglobaladdr:$global),
u16ImmPred:$offset)),
- (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+ (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from store(globaladdress) -> memb(#foo).
let AddedComplexity = 100 in
-def : Pat <(truncstorei8 IntRegs:$src1,
+def : Pat <(truncstorei8 (i32 IntRegs:$src1),
(HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP tglobaladdr:$global, IntRegs:$src1)>;
+ (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress + x) -> memw(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress) -> memw(#foo + 0).
+// Map from load(globaladdress) -> memw(#foo).
let AddedComplexity = 100 in
-def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)),
- (LDw_GP tglobaladdr:$global)>;
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDw_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress + x) -> memd(#foo + x).
let AddedComplexity = 100 in
def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
u16ImmPred:$offset))),
- (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+ (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress) -> memw(#foo + 0).
let AddedComplexity = 100 in
def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (LDd_GP tglobaladdr:$global)>;
-
+ (i64 (LDd_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd.
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd.
let AddedComplexity = 100 in
def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>;
+ (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress + x) -> memh(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress) -> memh(#foo + 0).
+// Map from load(globaladdress + x) -> memh(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDrih_GP tglobaladdr:$global, 0)>;
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDrih_GP tglobaladdr:$global, 0))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress + x) -> memuh(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress) -> memuh(#foo + 0).
+// Map from load(globaladdress) -> memuh(#foo).
let AddedComplexity = 100 in
-def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDriuh_GP tglobaladdr:$global, 0)>;
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDriuh_GP tglobaladdr:$global, 0))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress + x) -> memuh(#foo + x).
+// Map from load(globaladdress) -> memh(#foo).
let AddedComplexity = 100 in
-def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress) -> memuh(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDriuh_GP tglobaladdr:$global, 0)>;
-// Map from load(globaladdress + x) -> memub(#foo + x).
+// Map from load(globaladdress) -> memuh(#foo).
let AddedComplexity = 100 in
-def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDuh_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress) -> memuh(#foo + 0).
+// Map from load(globaladdress + x) -> memb(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDriub_GP tglobaladdr:$global, 0)>;
+def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress + x) -> memb(#foo + x).
let AddedComplexity = 100 in
-def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress) -> memb(#foo).
let AddedComplexity = 100 in
-def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDb_GP tglobaladdr:$global)>;
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress) -> memb(#foo).
let AddedComplexity = 100 in
-def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDb_GP tglobaladdr:$global)>;
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
// Map from load(globaladdress) -> memub(#foo).
let AddedComplexity = 100 in
-def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDub_GP tglobaladdr:$global)>;
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
// When the Interprocedural Global Variable optimizer realizes that a
// certain global variable takes only two constant values, it shrinks the
// global to a boolean. Catch those loads here in the following 3 patterns.
let AddedComplexity = 100 in
-def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDb_GP tglobaladdr:$global)>;
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
let AddedComplexity = 100 in
-def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDb_GP tglobaladdr:$global)>;
-
-let AddedComplexity = 100 in
-def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDub_GP tglobaladdr:$global)>;
-
-// Map from load(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDh_GP tglobaladdr:$global)>;
-
-// Map from load(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDh_GP tglobaladdr:$global)>;
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
-// Map from load(globaladdress) -> memuh(#foo).
let AddedComplexity = 100 in
-def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
- (LDuh_GP tglobaladdr:$global)>;
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP tglobaladdr:$global))>,
+ Requires<[NoV4T]>;
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
- (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>;
+ (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
-def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)),
- (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>;
+def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+ (i64 (SXTW (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>;
// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
-def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)),
- (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+ (i64 (SXTW (i32 (SXTH (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg))))))>;
// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
-def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)),
- (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+ (i64 (SXTW (i32 (SXTB (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg))))))>;
-// We want to prevent emiting pnot's as much as possible.
+// We want to prevent emitting pnot's as much as possible.
// Map brcond with an unsupported setcc to a JMP_cNot.
-def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset),
- (JMP_cNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ bb:$offset)>;
-def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset),
- (JMP_cNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>;
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
-def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset),
- (JMP_cNot PredRegs:$src1, bb:$offset)>;
+def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+ (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
-def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset),
- (JMP_c PredRegs:$src1, bb:$offset)>;
+def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+ (JMP_c (i1 PredRegs:$src1), bb:$offset)>;
-def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset),
- (JMP_cNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>;
+def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>;
-def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset),
- (JMP_c (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>;
-def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
bb:$offset),
- (JMP_cNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1),
+ (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
bb:$offset)>;
-def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset),
- (JMP_cNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ bb:$offset)>;
-def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
bb:$offset),
- (JMP_cNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2),
- bb:$offset)>;
+ (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ bb:$offset)>;
// Map from a 64-bit select to an emulated 64-bit mux.
// Hexagon does not support 64-bit MUXes; so emulate with combines.
-def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (COMBINE_rr
- (MUX_rr PredRegs:$src1,
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg),
- (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)),
- (MUX_rr PredRegs:$src1,
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>;
+def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3)),
+ (i64 (COMBINE_rr (i32 (MUX_rr (i1 PredRegs:$src1),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
+ subreg_hireg)))),
+ (i32 (MUX_rr (i1 PredRegs:$src1),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
+ subreg_loreg))))))>;
// Map from a 1-bit select to logical ops.
// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
-def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2),
- (AND_pp (NOT_p PredRegs:$src1), PredRegs:$src3))>;
+def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2),
+ (i1 PredRegs:$src3)),
+ (OR_pp (AND_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)),
+ (AND_pp (NOT_p (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>;
// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
def : Pat<(i1 (load ADDRriS11_2:$addr)),
(i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
// Map for truncating from 64 immediates to 32 bit immediates.
-def : Pat<(i32 (trunc DoubleRegs:$src)),
- (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>;
+def : Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>;
// Map for truncating from i64 immediates to i1 bit immediates.
-def : Pat<(i1 (trunc DoubleRegs:$src)),
- (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>;
+def : Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+ (i1 (TFR_PdRs (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg))))>;
// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
-def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr),
- (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
-def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr),
- (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
+ subreg_loreg)))>;
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt
+def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
-def : Pat<(truncstorei32 DoubleRegs:$src, ADDRriS11_0:$addr),
- (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
+ (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
subreg_loreg)))>;
// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
@@ -2763,118 +3125,134 @@ let AddedComplexity = 100 in
// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
// memw(#foo) = r0
def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP tglobaladdr:$global, (TFRI 1))>;
-
+ (STb_GP tglobaladdr:$global, (TFRI 1))>,
+ Requires<[NoV4T]>;
// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
(STrib ADDRriS11_2:$addr, (TFRI 1))>;
// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
-def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr),
- (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>;
+def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0)) )>;
// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
// Better way to do this?
-def : Pat<(i64 (anyext IntRegs:$src1)),
- (i64 (SXTW IntRegs:$src1))>;
+def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (SXTW (i32 IntRegs:$src1)))>;
// Map cmple -> cmpgt.
// rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)),
- (i1 (NOT_p (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>;
+def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>;
// rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)),
- (i1 (NOT_p (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>;
+def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (CMPGTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
// Rss <= Rtt -> !(Rss > Rtt).
-def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (NOT_p (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (CMPGT64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
// Map cmpne -> cmpeq.
// Hexagon_TODO: We should improve on this.
// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)),
- (i1 (NOT_p(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>;
+def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>;
// Map cmpne(Rs) -> !cmpeqe(Rs).
// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)),
- (i1 (NOT_p(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>;
+def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
// Convert setne back to xor for hexagon since we compute w/ pred registers.
-def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)),
- (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>;
+def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+ (i1 (XOR_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
// Map cmpne(Rss) -> !cmpew(Rss).
// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (NOT_p(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>;
+def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPEHexagon4rr (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2)))))>;
// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)),
- (i1 (NOT_p(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>;
+def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
-def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)),
- (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>;
+def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)),
+ (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
-def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (NOT_p(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>;
+def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (i1 (CMPGT64rr (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1)))))>;
// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
// rs < rt -> !(rs >= rt).
-def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)),
- (i1 (NOT_p (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>;
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
+ (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>;
-// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt).
-// rs < rt -> rs < rt. Let assembler map it.
-def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)),
- (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>;
+// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
+// rs < rt -> rt > rs.
+// We can let assembler map it, or we can do in the compiler itself.
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
// rss < rtt -> (rtt > rss).
-def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (CMPGT64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
-// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1).
+// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs)
// rs < rt -> rt > rs.
-def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)),
- (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>;
+// We can let assembler map it, or we can do in the compiler itself.
+def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
-// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1).
+// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss).
// rs < rt -> rt > rs.
-def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+
+// Generate cmpgeu(Rs, #u8)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)),
+ (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+
+// Generate cmpgtu(Rs, #u9)
+def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)),
+ (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)),
- (i1 (NOT_p (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>;
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>;
+def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
// Map from (Rs <= Rt) -> !(Rs > Rt).
-def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)),
- (i1 (NOT_p (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>;
+def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
// Map from (Rs <= Rt) -> !(Rs > Rt).
-def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
- (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
// Sign extends.
// i1 -> i32
-def : Pat <(i32 (sext PredRegs:$src1)),
- (i32 (MUX_ii PredRegs:$src1, -1, 0))>;
+def : Pat <(i32 (sext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), -1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (sext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI -1), (MUX_ii (i1 PredRegs:$src1), -1, 0)))>;
// Convert sign-extended load back to load and sign extend.
// i8 -> i64
@@ -2899,16 +3277,16 @@ def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
// Zero extends.
// i1 -> i32
-def : Pat <(i32 (zext PredRegs:$src1)),
- (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+def : Pat <(i32 (zext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
// i1 -> i64
-def : Pat <(i64 (zext PredRegs:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>;
+def : Pat <(i64 (zext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>;
// i32 -> i64
-def : Pat <(i64 (zext IntRegs:$src1)),
- (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+def : Pat <(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
// i8 -> i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
@@ -2926,16 +3304,16 @@ def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
(i32 (LDriw ADDRriS11_0:$src1))>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def : Pat <(i32 (zext PredRegs:$src1)),
- (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+def : Pat <(i32 (zext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def : Pat <(i32 (anyext PredRegs:$src1)),
- (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+def : Pat <(i32 (anyext (i1 PredRegs:$src1))),
+ (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>;
// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
-def : Pat <(i64 (anyext PredRegs:$src1)),
- (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>;
+def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
+ (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
// Any extended 64-bit load.
@@ -2948,75 +3326,103 @@ def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
-def : Pat<(i64 (zext IntRegs:$src1)),
- (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+def : Pat<(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
// Multiply 64-bit unsigned and use upper result.
-def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2),
- (MPYU64_acc(COMBINE_rr (TFRI 0),
- (EXTRACT_SUBREG
- (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0),
- (EXTRACT_SUBREG (LSRd_ri(MPYU64
- (EXTRACT_SUBREG DoubleRegs:$src1,
- subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2,
- subreg_loreg)),
- 32) ,subreg_loreg)),
- (EXTRACT_SUBREG DoubleRegs:$src1,
- subreg_hireg),
- (EXTRACT_SUBREG DoubleRegs:$src2,
- subreg_loreg)),
- (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
- 32),subreg_loreg)),
- (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
- )>;
+def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (i64
+ (MPYU64_acc
+ (i64
+ (COMBINE_rr
+ (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPYU64_acc
+ (i64
+ (MPYU64_acc
+ (i64
+ (COMBINE_rr (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), 32)),
+ subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
+ 32)), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
// Multiply 64-bit signed and use upper result.
-def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2),
- (MPY64_acc(COMBINE_rr (TFRI 0),
- (EXTRACT_SUBREG
- (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0),
- (EXTRACT_SUBREG (LSRd_ri(MPYU64
- (EXTRACT_SUBREG DoubleRegs:$src1,
- subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2,
- subreg_loreg)),
- 32) ,subreg_loreg)),
- (EXTRACT_SUBREG DoubleRegs:$src1,
- subreg_hireg),
- (EXTRACT_SUBREG DoubleRegs:$src2,
- subreg_loreg)),
- (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
- 32),subreg_loreg)),
- (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
- )>;
+def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (i64
+ (MPY64_acc
+ (i64
+ (COMBINE_rr (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPY64_acc
+ (i64
+ (MPY64_acc
+ (i64
+ (COMBINE_rr (TFRI 0),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (LSRd_ri
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), 32)),
+ subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
+ 32)), subreg_loreg)))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
// Hexagon specific ISD nodes.
-def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
- SDTHexagonADJDYNALLOC>;
+ SDTHexagonADJDYNALLOC>;
// Needed to tag these instructions for stack layout.
let usesCustomInserter = 1 in
def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
s16Imm:$src2),
"$dst = add($src1, #$src2)",
- [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1,
- s16ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst),
+ (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1),
+ s16ImmPred:$src2))]>;
-def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>;
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = $src1",
- [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>;
+ [(set (i32 IntRegs:$dst),
+ (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
let AddedComplexity = 100 in
-def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)),
- (TFR IntRegs:$src1)>;
-
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
+ (COPY (i32 IntRegs:$src1))>;
def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
@@ -3024,12 +3430,91 @@ def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BR_JT : JRInst<(outs), (ins IntRegs:$src),
"jumpr $src",
- [(HexagonBR_JT IntRegs:$src)]>;
+ [(HexagonBR_JT (i32 IntRegs:$src))]>;
+
def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
def : Pat<(HexagonWrapperJT tjumptable:$dst),
- (CONST32_set_jt tjumptable:$dst)>;
+ (i32 (CONST32_set_jt tjumptable:$dst))>;
+
+// XTYPE/SHIFT
+
+// Multi-class for logical operators :
+// Shift by immediate/register and accumulate/logical
+multiclass xtype_imm<string OpcStr, SDNode OpNode1, SDNode OpNode2> {
+ def _ri : SInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u5Imm:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")),
+ [(set (i32 IntRegs:$dst),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$src2),
+ u5ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+ def d_ri : SInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, u6Imm:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")),
+ [(set (i64 DoubleRegs:$dst), (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$src2), u6ImmPred:$src3)))],
+ "$src1 = $dst">;
+}
+
+// Multi-class for logical operators :
+// Shift by register and accumulate/logical (32/64 bits)
+multiclass xtype_reg<string OpcStr, SDNode OpNode1, SDNode OpNode2> {
+ def _rr : SInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")),
+ [(set (i32 IntRegs:$dst),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">;
+
+ def d_rr : SInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")),
+ [(set (i64 DoubleRegs:$dst),
+ (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
+ "$src1 = $dst">;
+
+}
+
+multiclass basic_xtype_imm<string OpcStr, SDNode OpNode> {
+let AddedComplexity = 100 in
+ defm _ADD : xtype_imm< !strconcat("+= ", OpcStr), OpNode, add>;
+ defm _SUB : xtype_imm< !strconcat("-= ", OpcStr), OpNode, sub>;
+ defm _AND : xtype_imm< !strconcat("&= ", OpcStr), OpNode, and>;
+ defm _OR : xtype_imm< !strconcat("|= ", OpcStr), OpNode, or>;
+}
+
+multiclass basic_xtype_reg<string OpcStr, SDNode OpNode> {
+let AddedComplexity = 100 in
+ defm _ADD : xtype_reg< !strconcat("+= ", OpcStr), OpNode, add>;
+ defm _SUB : xtype_reg< !strconcat("-= ", OpcStr), OpNode, sub>;
+ defm _AND : xtype_reg< !strconcat("&= ", OpcStr), OpNode, and>;
+ defm _OR : xtype_reg< !strconcat("|= ", OpcStr), OpNode, or>;
+}
+
+multiclass xtype_xor_imm<string OpcStr, SDNode OpNode> {
+let AddedComplexity = 100 in
+ defm _XOR : xtype_imm< !strconcat("^= ", OpcStr), OpNode, xor>;
+}
+
+defm ASL : basic_xtype_imm<"asl", shl>, basic_xtype_reg<"asl", shl>,
+ xtype_xor_imm<"asl", shl>;
+defm LSR : basic_xtype_imm<"lsr", srl>, basic_xtype_reg<"lsr", srl>,
+ xtype_xor_imm<"lsr", srl>;
+
+defm ASR : basic_xtype_imm<"asr", sra>, basic_xtype_reg<"asr", sra>;
+defm LSL : basic_xtype_reg<"lsl", shl>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
+ (i32 (MPYI_rin (i32 IntRegs:$src1), u8ImmPred:$src2))>;
//===----------------------------------------------------------------------===//
// V3 Instructions +
@@ -3046,3 +3531,19 @@ include "HexagonInstrInfoV3.td"
//===----------------------------------------------------------------------===//
include "HexagonInstrInfoV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV5.td"
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions -
+//===----------------------------------------------------------------------===//
+
+
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
index a73897ee3451..157ab3d0e38c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -19,7 +19,7 @@
let isCall = 1, neverHasSideEffects = 1,
Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ def CALLv3 : JInst<(outs), (ins calltarget:$dst),
"call $dst", []>, Requires<[HasV3T]>;
}
@@ -35,16 +35,17 @@ let isCall = 1, neverHasSideEffects = 1,
let isCall = 1, neverHasSideEffects = 1,
Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst),
"callr $dst",
[]>, Requires<[HasV3TOnly]>;
}
+// Jump to address from register
// if(p?.new) jumpr:t r?
let isReturn = 1, isTerminator = 1, isBarrier = 1,
Defs = [PC], Uses = [R31] in {
- def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) jumpr:t $src2",
[]>, Requires<[HasV3T]>;
}
@@ -52,7 +53,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1,
// if (!p?.new) jumpr:t r?
let isReturn = 1, isTerminator = 1, isBarrier = 1,
Defs = [PC], Uses = [R31] in {
- def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) jumpr:t $src2",
[]>, Requires<[HasV3T]>;
}
@@ -61,7 +62,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1,
// if(p?.new) jumpr:nt r?
let isReturn = 1, isTerminator = 1, isBarrier = 1,
Defs = [PC], Uses = [R31] in {
- def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) jumpr:nt $src2",
[]>, Requires<[HasV3T]>;
}
@@ -69,7 +70,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1,
// if (!p?.new) jumpr:nt r?
let isReturn = 1, isTerminator = 1, isBarrier = 1,
Defs = [PC], Uses = [R31] in {
- def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) jumpr:nt $src2",
[]>, Requires<[HasV3T]>;
}
@@ -86,20 +87,22 @@ let AddedComplexity = 200 in
def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = max($src2, $src1)",
- [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2,
- DoubleRegs:$src1)),
- DoubleRegs:$src1,
- DoubleRegs:$src2))]>,
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>,
Requires<[HasV3T]>;
let AddedComplexity = 200 in
def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
DoubleRegs:$src2),
"$dst = min($src2, $src1)",
- [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2,
- DoubleRegs:$src1)),
- DoubleRegs:$src1,
- DoubleRegs:$src2))]>,
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src1))),
+ (i64 DoubleRegs:$src1),
+ (i64 DoubleRegs:$src2))))]>,
Requires<[HasV3T]>;
//===----------------------------------------------------------------------===//
@@ -109,25 +112,25 @@ Requires<[HasV3T]>;
-//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset),
-// (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
-//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset),
-// (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
-//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset),
-// (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
-//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset),
-// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
-//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset),
-// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
// Map call instruction
-def : Pat<(call IntRegs:$dst),
- (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call (i32 IntRegs:$dst)),
+ (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
def : Pat<(call tglobaladdr:$dst),
(CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
def : Pat<(call texternalsym:$dst),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 9e60cf26d08f..70448fc7af38 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -11,6 +11,12 @@
//
//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1 in
+def IMMEXT : Immext<(outs), (ins),
+ "/* immext #... */",
+ []>,
+ Requires<[HasV4T]>;
+
// Hexagon V4 Architecture spec defines 8 instruction classes:
// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
// compiler)
@@ -250,23 +256,151 @@ def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
[]>,
Requires<[HasV4T]>;
+// Generate frame index addresses.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$offset),
+ "$dst = add($src1, ##$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
//===----------------------------------------------------------------------===//
// ALU32 -
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine
+// Rdd=combine(Rs, #s8)
+let neverHasSideEffects = 1 in
+def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = combine($src1, #$src2)",
+ []>,
+ Requires<[HasV4T]>;
+// Rdd=combine(#s8, Rs)
+let neverHasSideEffects = 1 in
+def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+ (ins s8Imm:$src1, IntRegs:$src2),
+ "$dst = combine(#$src1, $src2)",
+ []>,
+ Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// LD +
//===----------------------------------------------------------------------===//
-///
-/// Make sure that in post increment load, the first operand is always the post
-/// increment operand.
-///
-//// Load doubleword.
-// Rdd=memd(Re=#U6)
+//
+// These absolute set addressing mode instructions accept immediate as
+// an operand. We have duplicated these patterns to take global address.
+
+let neverHasSideEffects = 1 in
+def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memd($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memb($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memh($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memub($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memuh($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins u6Imm:$addr),
+ "$dst1 = memw($dst2=#$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Following patterns are defined for absolute set addressing mode
+// instruction which take global address as operand.
+let neverHasSideEffects = 1 in
+def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memd($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memb($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+// Rd=memh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memub($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memuh($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins globaladdress:$addr),
+ "$dst1 = memw($dst2=##$addr)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Load doubleword.
+//
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//
// Rdd=memd(Rs+Rt<<#u2)
// Special case pattern for indexed load without offset which is easier to
// match. AddedComplexity of this pattern should be lower than base+offset load
@@ -276,56 +410,58 @@ let AddedComplexity = 10, isPredicable = 1 in
def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memd($src1+$src2<<#0)",
- [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (load (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 40, isPredicable = 1 in
def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memd($src1+$src2<<#$offset)",
- [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (load (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
//// Load doubleword conditionally.
// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst=memd($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst=memd($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst=memd($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst=memd($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1) $dst=memd($src2+$src3<<#$offset)",
@@ -333,8 +469,8 @@ def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
@@ -342,8 +478,8 @@ def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1) $dst=memd($src2+$src3<<#$offset)",
@@ -351,8 +487,8 @@ def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
@@ -362,99 +498,101 @@ def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
// Rdd=memd(Rt<<#u2+#U6)
//// Load byte.
-// Rd=memb(Re=#U6)
-
// Rd=memb(Rs+Rt<<#u2)
let AddedComplexity = 10, isPredicable = 1 in
def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memb($src1+$src2<<#0)",
- [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 10, isPredicable = 1 in
def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memub($src1+$src2<<#0)",
- [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 10, isPredicable = 1 in
def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memub($src1+$src2<<#0)",
- [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi8 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 40, isPredicable = 1 in
def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memb($src1+$src2<<#$offset)",
- [(set IntRegs:$dst,
- (sextloadi8 (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 40, isPredicable = 1 in
def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memub($src1+$src2<<#$offset)",
- [(set IntRegs:$dst,
- (zextloadi8 (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 40, isPredicable = 1 in
def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memub($src1+$src2<<#$offset)",
- [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi8 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
//// Load byte conditionally.
// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst=memb($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst=memb($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst=memb($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst=memb($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1) $dst=memb($src2+$src3<<#$offset)",
@@ -462,8 +600,8 @@ def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
@@ -471,8 +609,8 @@ def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1) $dst=memb($src2+$src3<<#$offset)",
@@ -480,8 +618,8 @@ def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
@@ -491,40 +629,40 @@ def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
//// Load unsigned byte conditionally.
// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst=memub($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst=memub($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst=memub($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst=memub($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1) $dst=memub($src2+$src3<<#$offset)",
@@ -532,8 +670,8 @@ def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
@@ -541,8 +679,8 @@ def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1) $dst=memub($src2+$src3<<#$offset)",
@@ -550,8 +688,8 @@ def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
@@ -561,31 +699,32 @@ def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
// Rd=memb(Rt<<#u2+#U6)
//// Load halfword
-// Rd=memh(Re=#U6)
-
// Rd=memh(Rs+Rt<<#u2)
let AddedComplexity = 10, isPredicable = 1 in
def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memh($src1+$src2<<#0)",
- [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 10, isPredicable = 1 in
def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memuh($src1+$src2<<#0)",
- [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 10, isPredicable = 1 in
def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memuh($src1+$src2<<#0)",
- [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi16 (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
// Rd=memh(Rs+Rt<<#u2)
@@ -593,69 +732,69 @@ let AddedComplexity = 40, isPredicable = 1 in
def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memh($src1+$src2<<#$offset)",
- [(set IntRegs:$dst,
- (sextloadi16 (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 40, isPredicable = 1 in
def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memuh($src1+$src2<<#$offset)",
- [(set IntRegs:$dst,
- (zextloadi16 (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
let AddedComplexity = 40, isPredicable = 1 in
def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memuh($src1+$src2<<#$offset)",
- [(set IntRegs:$dst,
- (extloadi16 (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (extloadi16 (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
//// Load halfword conditionally.
// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst=memh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst=memh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst=memh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst=memh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1) $dst=memh($src2+$src3<<#$offset)",
@@ -663,8 +802,8 @@ def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
@@ -672,8 +811,8 @@ def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1) $dst=memh($src2+$src3<<#$offset)",
@@ -681,8 +820,8 @@ def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
@@ -692,40 +831,40 @@ def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
//// Load unsigned halfword conditionally.
// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst=memuh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst=memuh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst=memuh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1) $dst=memuh($src2+$src3<<#$offset)",
@@ -733,8 +872,8 @@ def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
@@ -742,8 +881,8 @@ def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
@@ -751,8 +890,8 @@ def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
@@ -762,6 +901,14 @@ def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
// Rd=memh(Rt<<#u2+#U6)
//// Load word.
+// Load predicate: Fix for bug 5279.
+let neverHasSideEffects = 1 in
+def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst),
+ (ins MEMri:$addr),
+ "Error; should not emit",
+ []>,
+ Requires<[HasV4T]>;
+
// Rd=memw(Re=#U6)
// Rd=memw(Rs+Rt<<#u2)
@@ -769,8 +916,9 @@ let AddedComplexity = 10, isPredicable = 1 in
def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst=memw($src1+$src2<<#0)",
- [(set IntRegs:$dst, (load (add IntRegs:$src1,
- IntRegs:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (load (add (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))))]>,
Requires<[HasV4T]>;
// Rd=memw(Rs+Rt<<#u2)
@@ -778,48 +926,49 @@ let AddedComplexity = 40, isPredicable = 1 in
def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst=memw($src1+$src2<<#$offset)",
- [(set IntRegs:$dst, (load (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$offset))))]>,
+ [(set (i32 IntRegs:$dst),
+ (i32 (load (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$offset)))))]>,
Requires<[HasV4T]>;
//// Load word conditionally.
// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
// if (Pv) Rd=memw(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst=memw($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst=memw($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst=memw($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst=memw($src2+$src3<<#0)",
[]>,
Requires<[HasV4T]>;
// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1) $dst=memw($src2+$src3<<#$offset)",
@@ -827,8 +976,8 @@ def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
@@ -836,8 +985,8 @@ def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1) $dst=memw($src2+$src3<<#$offset)",
@@ -845,8 +994,8 @@ def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+let AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
u2Imm:$offset),
"if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
@@ -859,102 +1008,729 @@ def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
// Post-inc Load, Predicated, Dot new
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cdnPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
"if ($src1.new) $dst1 = memd($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cdnNotPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
"if (!$src1.new) $dst1 = memd($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if ($src1.new) $dst1 = memb($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if (!$src1.new) $dst1 = memb($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if ($src1.new) $dst1 = memh($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if (!$src1.new) $dst1 = memh($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if ($src1.new) $dst1 = memub($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
"if (!$src1.new) $dst1 = memub($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if ($src1.new) $dst1 = memuh($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
"if (!$src1.new) $dst1 = memuh($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
"if ($src1.new) $dst1 = memw($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
-let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
"if (!$src1.new) $dst1 = memw($src2++#$src3)",
[],
"$src2 = $dst2">,
Requires<[HasV4T]>;
+/// Load from global offset
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memd(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memd(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memb(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memb(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memub(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memub(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memh(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memuh(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memuh(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memw(#$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if ($src1.new) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+ "if (!$src1.new) $dst=memw(##$global+$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memd(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rtt=memd(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rtt=memd(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rtt=memd(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rtt=memd(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memd(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memb(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memb(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memb(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memb(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memb(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memb(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memub(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memub(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memub(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memub(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memub(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memub(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memh(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memuh(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memuh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memuh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memuh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memuh(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memuh(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memw(#$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memw(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memw(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rt=memw(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if ($src1.new) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memw(##global)
+let neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$global),
+ "if (!$src1.new) $dst=memw(##$global)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
+ Requires<[HasV4T]>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+ Requires<[HasV4T]>;
+
//===----------------------------------------------------------------------===//
// LD -
@@ -971,18 +1747,70 @@ def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
/// last operand.
///
-// Store doubleword.
// memd(Re=#U6)=Rtt
-// TODO: needs to be implemented
+def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins DoubleRegs:$src1, u6Imm:$src2),
+ "memd($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Re=#U6)=Rs
+def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u6Imm:$src2),
+ "memb($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Re=#U6)=Rs
+def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u6Imm:$src2),
+ "memh($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Re=#U6)=Rs
+def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, u6Imm:$src2),
+ "memw($dst1=#$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memd(Re=#U6)=Rtt
+def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins DoubleRegs:$src1, globaladdress:$src2),
+ "memd($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Re=#U6)=Rs
+def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdress:$src2),
+ "memb($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Re=#U6)=Rs
+def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdress:$src2),
+ "memh($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Re=#U6)=Rs
+def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
+ (ins IntRegs:$src1, globaladdress:$src2),
+ "memw($dst1=##$src2) = $src1",
+ []>,
+ Requires<[HasV4T]>;
-// memd(Rs+#s11:3)=Rtt
// memd(Rs+Ru<<#u2)=Rtt
let AddedComplexity = 10, isPredicable = 1 in
def STrid_indexed_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
"memd($src1+$src2<<#$src3) = $src4",
- [(store DoubleRegs:$src4, (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+ [(store (i64 DoubleRegs:$src4),
+ (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>,
Requires<[HasV4T]>;
// memd(Ru<<#u2+#U6)=Rtt
@@ -990,9 +1818,9 @@ let AddedComplexity = 10 in
def STrid_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
"memd($src1<<#$src2+#$src3) = $src4",
- [(store DoubleRegs:$src4, (shl IntRegs:$src1,
- (add u2ImmPred:$src2,
- u6ImmPred:$src3)))]>,
+ [(store (i64 DoubleRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
Requires<[HasV4T]>;
// memd(Rx++#s4:3)=Rtt
@@ -1009,8 +1837,9 @@ def STrid_shl_V4 : STInst<(outs),
// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
// if (Pv) memd(Rs+#u6:3)=Rtt
// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_cdnPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
"if ($src1.new) memd($addr) = $src2",
[]>,
@@ -1018,8 +1847,9 @@ def STrid_cdnPt_V4 : STInst<(outs),
// if (!Pv) memd(Rs+#u6:3)=Rtt
// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_cdnNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
"if (!$src1.new) memd($addr) = $src2",
[]>,
@@ -1027,8 +1857,9 @@ def STrid_cdnNotPt_V4 : STInst<(outs),
// if (Pv) memd(Rs+#u6:3)=Rtt
// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_cdnPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
DoubleRegs:$src4),
"if ($src1.new) memd($src2+#$src3) = $src4",
@@ -1037,8 +1868,9 @@ def STrid_indexed_cdnPt_V4 : STInst<(outs),
// if (!Pv) memd(Rs+#u6:3)=Rtt
// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
DoubleRegs:$src4),
"if (!$src1.new) memd($src2+#$src3) = $src4",
@@ -1047,8 +1879,9 @@ def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
// if (Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_shl_cPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
DoubleRegs:$src5),
"if ($src1) memd($src2+$src3<<#$src4) = $src5",
@@ -1056,24 +1889,27 @@ def STrid_indexed_shl_cPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_shl_cdnPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
DoubleRegs:$src5),
- "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+ "if ($src1.new) memd($src2+$src3<<#$src4) = $src5",
[]>,
Requires<[HasV4T]>;
// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_shl_cNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
DoubleRegs:$src5),
"if (!$src1) memd($src2+$src3<<#$src4) = $src5",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrid_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
DoubleRegs:$src5),
"if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
@@ -1083,8 +1919,9 @@ def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
// if (Pv) memd(Rx++#s4:3)=Rtt
// if (Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
s4_3Imm:$offset),
"if ($src1.new) memd($src3++#$offset) = $src2",
@@ -1094,8 +1931,9 @@ def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
// if (!Pv) memd(Rx++#s4:3)=Rtt
// if (!Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
-def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+let AddedComplexity = 10, neverHasSideEffects = 1,
+ isPredicated = 1 in
+def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
s4_3Imm:$offset),
"if (!$src1.new) memd($src3++#$offset) = $src2",
@@ -1105,15 +1943,12 @@ def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
// Store byte.
-// memb(Re=#U6)=Rt
-// TODO: needs to be implemented.
-// memb(Rs+#s11:0)=Rt
// memb(Rs+#u6:0)=#S8
let AddedComplexity = 10, isPredicable = 1 in
def STrib_imm_V4 : STInst<(outs),
(ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
"memb($src1+#$src2) = #$src3",
- [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1,
+ [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
u6_0ImmPred:$src2))]>,
Requires<[HasV4T]>;
@@ -1122,9 +1957,10 @@ let AddedComplexity = 10, isPredicable = 1 in
def STrib_indexed_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
"memb($src1+$src2<<#$src3) = $src4",
- [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$src3)))]>,
+ [(truncstorei8 (i32 IntRegs:$src4),
+ (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$src3)))]>,
Requires<[HasV4T]>;
// memb(Ru<<#u2+#U6)=Rt
@@ -1132,9 +1968,9 @@ let AddedComplexity = 10 in
def STrib_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
"memb($src1<<#$src2+#$src3) = $src4",
- [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1,
- (add u2ImmPred:$src2,
- u6ImmPred:$src3)))]>,
+ [(truncstorei8 (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
Requires<[HasV4T]>;
// memb(Rx++#s4:0:circ(Mu))=Rt
@@ -1148,32 +1984,36 @@ def STrib_shl_V4 : STInst<(outs),
// if ([!]Pv[.new]) memb(#u6)=Rt
// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
// if (Pv) memb(Rs+#u6:0)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_imm_cPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
"if ($src1) memb($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) memb(Rs+#u6:0)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_imm_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
"if ($src1.new) memb($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv) memb(Rs+#u6:0)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_imm_cNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
"if (!$src1) memb($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rs+#u6:0)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_imm_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_imm_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
"if (!$src1.new) memb($src2+#$src3) = #$src4",
[]>,
@@ -1182,8 +2022,9 @@ def STrib_imm_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
// if (Pv) memb(Rs+#u6:0)=Rt
// if (Pv.new) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1.new) memb($addr) = $src2",
[]>,
@@ -1191,8 +2032,9 @@ def STrib_cdnPt_V4 : STInst<(outs),
// if (!Pv) memb(Rs+#u6:0)=Rt
// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1.new) memb($addr) = $src2",
[]>,
@@ -1201,16 +2043,18 @@ def STrib_cdnNotPt_V4 : STInst<(outs),
// if (Pv) memb(Rs+#u6:0)=Rt
// if (!Pv) memb(Rs+#u6:0)=Rt
// if (Pv.new) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_indexed_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if ($src1.new) memb($src2+#$src3) = $src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrib_indexed_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if (!$src1.new) memb($src2+#$src3) = $src4",
[]>,
@@ -1218,8 +2062,9 @@ def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
// if (Pv) memb(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrib_indexed_shl_cPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if ($src1) memb($src2+$src3<<#$src4) = $src5",
@@ -1227,8 +2072,9 @@ def STrib_indexed_shl_cPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
@@ -1236,8 +2082,9 @@ def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if (!$src1) memb($src2+$src3<<#$src4) = $src5",
@@ -1245,8 +2092,9 @@ def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrib_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
@@ -1256,8 +2104,9 @@ def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
// if (Pv) memb(Rx++#s4:0)=Rt
// if (Pv.new) memb(Rx++#s4:0)=Rt
-let mayStore = 1, hasCtrlDep = 1 in
-def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if ($src1.new) memb($src3++#$offset) = $src2",
[],"$src3 = $dst">,
@@ -1265,8 +2114,9 @@ def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
// if (!Pv) memb(Rx++#s4:0)=Rt
// if (!Pv.new) memb(Rx++#s4:0)=Rt
-let mayStore = 1, hasCtrlDep = 1 in
-def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if (!$src1.new) memb($src3++#$offset) = $src2",
[],"$src3 = $dst">,
@@ -1274,20 +2124,15 @@ def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
// Store halfword.
-// memh(Re=#U6)=Rt.H
-// TODO: needs to be implemented
-
-// memh(Re=#U6)=Rt
// TODO: needs to be implemented
-
+// memh(Re=#U6)=Rt.H
// memh(Rs+#s11:1)=Rt.H
-// memh(Rs+#s11:1)=Rt
// memh(Rs+#u6:1)=#S8
let AddedComplexity = 10, isPredicable = 1 in
def STrih_imm_V4 : STInst<(outs),
(ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
"memh($src1+#$src2) = #$src3",
- [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1,
+ [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
u6_1ImmPred:$src2))]>,
Requires<[HasV4T]>;
@@ -1299,9 +2144,10 @@ let AddedComplexity = 10, isPredicable = 1 in
def STrih_indexed_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
"memh($src1+$src2<<#$src3) = $src4",
- [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1,
- (shl IntRegs:$src2,
- u2ImmPred:$src3)))]>,
+ [(truncstorei16 (i32 IntRegs:$src4),
+ (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$src3)))]>,
Requires<[HasV4T]>;
// memh(Ru<<#u2+#U6)=Rt.H
@@ -1310,9 +2156,9 @@ let AddedComplexity = 10 in
def STrih_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
"memh($src1<<#$src2+#$src3) = $src4",
- [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1,
- (add u2ImmPred:$src2,
- u6ImmPred:$src3)))]>,
+ [(truncstorei16 (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
Requires<[HasV4T]>;
// memh(Rx++#s4:1:circ(Mu))=Rt.H
@@ -1323,42 +2169,42 @@ def STrih_shl_V4 : STInst<(outs),
// memh(Rx++Mu)=Rt
// memh(Rx++Mu:brev)=Rt.H
// memh(Rx++Mu:brev)=Rt
-// memh(gp+#u16:1)=Rt.H
// memh(gp+#u16:1)=Rt
-
-
-// Store halfword conditionally.
// if ([!]Pv[.new]) memh(#u6)=Rt.H
// if ([!]Pv[.new]) memh(#u6)=Rt
// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
// if (Pv) memh(Rs+#u6:1)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_imm_cPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
"if ($src1) memh($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) memh(Rs+#u6:1)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_imm_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
"if ($src1.new) memh($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv) memh(Rs+#u6:1)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_imm_cNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
"if (!$src1) memh($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rs+#u6:1)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_imm_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_imm_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
"if (!$src1.new) memh($src2+#$src3) = #$src4",
[]>,
@@ -1370,8 +2216,9 @@ def STrih_imm_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
// if (Pv) memh(Rs+#u6:1)=Rt
// if (Pv.new) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1.new) memh($addr) = $src2",
[]>,
@@ -1379,24 +2226,27 @@ def STrih_cdnPt_V4 : STInst<(outs),
// if (!Pv) memh(Rs+#u6:1)=Rt
// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1.new) memh($addr) = $src2",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_indexed_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if ($src1.new) memh($src2+#$src3) = $src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STrih_indexed_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if (!$src1.new) memh($src2+#$src3) = $src4",
[]>,
@@ -1405,8 +2255,9 @@ def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
// if (Pv) memh(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrih_indexed_shl_cPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if ($src1) memh($src2+$src3<<#$src4) = $src5",
@@ -1414,7 +2265,9 @@ def STrih_indexed_shl_cPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
-def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
@@ -1422,8 +2275,9 @@ def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if (!$src1) memh($src2+$src3<<#$src4) = $src5",
@@ -1431,8 +2285,9 @@ def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STrih_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
@@ -1445,8 +2300,9 @@ def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
// if (Pv) memh(Rx++#s4:1)=Rt
// if (Pv.new) memh(Rx++#s4:1)=Rt
-let mayStore = 1, hasCtrlDep = 1 in
-def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if ($src1.new) memh($src3++#$offset) = $src2",
[],"$src3 = $dst">,
@@ -1454,8 +2310,9 @@ def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
// if (!Pv) memh(Rx++#s4:1)=Rt
// if (!Pv.new) memh(Rx++#s4:1)=Rt
-let mayStore = 1, hasCtrlDep = 1 in
-def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if (!$src1.new) memh($src3++#$offset) = $src2",
[],"$src3 = $dst">,
@@ -1466,13 +2323,22 @@ def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
// memw(Re=#U6)=Rt
// TODO: Needs to be implemented.
-// memw(Rs+#s11:2)=Rt
+// Store predicate:
+let neverHasSideEffects = 1 in
+def STriw_pred_V4 : STInst2<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>,
+ Requires<[HasV4T]>;
+
+
// memw(Rs+#u6:2)=#S8
let AddedComplexity = 10, isPredicable = 1 in
def STriw_imm_V4 : STInst<(outs),
(ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
"memw($src1+#$src2) = #$src3",
- [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+ [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+ u6_2ImmPred:$src2))]>,
Requires<[HasV4T]>;
// memw(Rs+Ru<<#u2)=Rt
@@ -1480,8 +2346,9 @@ let AddedComplexity = 10, isPredicable = 1 in
def STriw_indexed_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
"memw($src1+$src2<<#$src3) = $src4",
- [(store IntRegs:$src4, (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+ [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1),
+ (shl (i32 IntRegs:$src2),
+ u2ImmPred:$src3)))]>,
Requires<[HasV4T]>;
// memw(Ru<<#u2+#U6)=Rt
@@ -1489,8 +2356,9 @@ let AddedComplexity = 10 in
def STriw_shl_V4 : STInst<(outs),
(ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
"memw($src1<<#$src2+#$src3) = $src4",
- [(store IntRegs:$src4, (shl IntRegs:$src1,
- (add u2ImmPred:$src2, u6ImmPred:$src3)))]>,
+ [(store (i32 IntRegs:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u6ImmPred:$src3))]>,
Requires<[HasV4T]>;
// memw(Rx++#s4:2)=Rt
@@ -1502,37 +2370,39 @@ def STriw_shl_V4 : STInst<(outs),
// Store word conditionally.
-// if ([!]Pv[.new]) memw(#u6)=Rt
-// TODO: Needs to be implemented.
// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
// if (Pv) memw(Rs+#u6:2)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_imm_cPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
"if ($src1) memw($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (Pv.new) memw(Rs+#u6:2)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_imm_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
"if ($src1.new) memw($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv) memw(Rs+#u6:2)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_imm_cNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
"if (!$src1) memw($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rs+#u6:2)=#S6
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_imm_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_imm_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
"if (!$src1.new) memw($src2+#$src3) = #$src4",
[]>,
@@ -1541,8 +2411,9 @@ def STriw_imm_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
// if (Pv) memw(Rs+#u6:2)=Rt
// if (Pv.new) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1.new) memw($addr) = $src2",
[]>,
@@ -1550,8 +2421,9 @@ def STriw_cdnPt_V4 : STInst<(outs),
// if (!Pv) memw(Rs+#u6:2)=Rt
// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1.new) memw($addr) = $src2",
[]>,
@@ -1560,16 +2432,18 @@ def STriw_cdnNotPt_V4 : STInst<(outs),
// if (Pv) memw(Rs+#u6:2)=Rt
// if (!Pv) memw(Rs+#u6:2)=Rt
// if (Pv.new) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_indexed_cdnPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if ($src1.new) memw($src2+#$src3) = $src4",
[]>,
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
+let neverHasSideEffects = 1,
+ isPredicated = 1 in
+def STriw_indexed_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if (!$src1.new) memw($src2+#$src3) = $src4",
[]>,
@@ -1577,8 +2451,9 @@ def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
// if (Pv) memw(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STriw_indexed_shl_cPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if ($src1) memw($src2+$src3<<#$src4) = $src5",
@@ -1586,8 +2461,9 @@ def STriw_indexed_shl_cPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
@@ -1595,8 +2471,9 @@ def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if (!$src1) memw($src2+$src3<<#$src4) = $src5",
@@ -1604,8 +2481,9 @@ def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
-let mayStore = 1, AddedComplexity = 10 in
-def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+let AddedComplexity = 10,
+ isPredicated = 1 in
+def STriw_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
"if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
@@ -1615,8 +2493,9 @@ def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
// if (Pv) memw(Rx++#s4:2)=Rt
// if (Pv.new) memw(Rx++#s4:2)=Rt
-let mayStore = 1, hasCtrlDep = 1 in
-def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if ($src1.new) memw($src3++#$offset) = $src2",
[],"$src3 = $dst">,
@@ -1624,14 +2503,456 @@ def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
// if (!Pv) memw(Rx++#s4:2)=Rt
// if (!Pv.new) memw(Rx++#s4:2)=Rt
-let mayStore = 1, hasCtrlDep = 1 in
-def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+let hasCtrlDep = 1,
+ isPredicated = 1 in
+def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if (!$src1.new) memw($src3++#$offset) = $src2",
[],"$src3 = $dst">,
Requires<[HasV4T]>;
+/// store to global address
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STrid_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+ "memd(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if ($src1) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if (!$src1) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if ($src1.new) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ DoubleRegs:$src2),
+ "if (!$src1.new) memd(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STrib_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memb(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STrih_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memh(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STriw_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memw(##$global+$offset) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memd(#global)=Rtt
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STd_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, DoubleRegs:$src),
+ "memd(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(##global) = Rtt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if ($src1) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(##global) = Rtt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if (!$src1) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(##global) = Rtt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if ($src1.new) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(##global) = Rtt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+ "if (!$src1.new) memd(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(#global)=Rt
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STb_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memb(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(#global)=Rt
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STh_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memh(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(#global)=Rt
+let isPredicable = 1, neverHasSideEffects = 1 in
+def STw_GP_V4 : STInst2<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memw(#$global) = $src",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memw(##$global) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+ (i64 DoubleRegs:$src1)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+// to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i64 DoubleRegs:$src1)),
+ (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset),
+ (i32 IntRegs:$src1)),
+ (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1),
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+ (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+
+
//===----------------------------------------------------------------------===
// ST -
//===----------------------------------------------------------------------===
@@ -1696,11 +3017,19 @@ def STrib_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
+// memb(#global)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
// Store new-value byte conditionally.
// if ([!]Pv[.new]) memb(#u6)=Nt.new
// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1) memb($addr) = $src2.new",
@@ -1708,7 +3037,8 @@ def STrib_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1.new) memb($addr) = $src2.new",
@@ -1716,7 +3046,8 @@ def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1) memb($addr) = $src2.new",
@@ -1724,7 +3055,8 @@ def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1.new) memb($addr) = $src2.new",
@@ -1732,7 +3064,8 @@ def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if ($src1) memb($src2+#$src3) = $src4.new",
@@ -1740,7 +3073,8 @@ def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if ($src1.new) memb($src2+#$src3) = $src4.new",
@@ -1748,7 +3082,8 @@ def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if (!$src1) memb($src2+#$src3) = $src4.new",
@@ -1756,7 +3091,8 @@ def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
"if (!$src1.new) memb($src2+#$src3) = $src4.new",
@@ -1766,7 +3102,8 @@ def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1775,7 +3112,8 @@ def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1784,7 +3122,8 @@ def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1793,7 +3132,8 @@ def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1803,7 +3143,8 @@ def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
// if (Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if ($src1) memb($src3++#$offset) = $src2.new",
@@ -1811,7 +3152,8 @@ def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if ($src1.new) memb($src3++#$offset) = $src2.new",
@@ -1819,7 +3161,8 @@ def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if (!$src1) memb($src3++#$offset) = $src2.new",
@@ -1827,7 +3170,8 @@ def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
"if (!$src1.new) memb($src3++#$offset) = $src2.new",
@@ -1889,6 +3233,14 @@ def STrih_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
+// memh(#global)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
// Store new-value halfword conditionally.
@@ -1896,7 +3248,8 @@ def STrih_GP_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1) memh($addr) = $src2.new",
@@ -1904,7 +3257,8 @@ def STrih_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1.new) memh($addr) = $src2.new",
@@ -1912,7 +3266,8 @@ def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1) memh($addr) = $src2.new",
@@ -1920,7 +3275,8 @@ def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1.new) memh($addr) = $src2.new",
@@ -1928,7 +3284,8 @@ def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if ($src1) memh($src2+#$src3) = $src4.new",
@@ -1936,7 +3293,8 @@ def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if ($src1.new) memh($src2+#$src3) = $src4.new",
@@ -1944,7 +3302,8 @@ def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if (!$src1) memh($src2+#$src3) = $src4.new",
@@ -1952,7 +3311,8 @@ def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
"if (!$src1.new) memh($src2+#$src3) = $src4.new",
@@ -1961,7 +3321,8 @@ def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1970,7 +3331,8 @@ def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1979,7 +3341,8 @@ def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1988,7 +3351,8 @@ def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -1998,7 +3362,8 @@ def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
// if (Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if ($src1) memh($src3++#$offset) = $src2.new",
@@ -2006,7 +3371,8 @@ def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if ($src1.new) memh($src3++#$offset) = $src2.new",
@@ -2014,7 +3380,8 @@ def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if (!$src1) memh($src3++#$offset) = $src2.new",
@@ -2022,7 +3389,8 @@ def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
"if (!$src1.new) memh($src3++#$offset) = $src2.new",
@@ -2085,6 +3453,12 @@ def STriw_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memw(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
// Store new-value word conditionally.
@@ -2092,7 +3466,8 @@ def STriw_GP_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1) memw($addr) = $src2.new",
@@ -2100,7 +3475,8 @@ def STriw_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if ($src1.new) memw($addr) = $src2.new",
@@ -2108,7 +3484,8 @@ def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1) memw($addr) = $src2.new",
@@ -2116,7 +3493,8 @@ def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
"if (!$src1.new) memw($addr) = $src2.new",
@@ -2124,7 +3502,8 @@ def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if ($src1) memw($src2+#$src3) = $src4.new",
@@ -2132,7 +3511,8 @@ def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if ($src1.new) memw($src2+#$src3) = $src4.new",
@@ -2140,7 +3520,8 @@ def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if (!$src1) memw($src2+#$src3) = $src4.new",
@@ -2148,7 +3529,8 @@ def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1,
+ isPredicated = 1 in
def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
"if (!$src1.new) memw($src2+#$src3) = $src4.new",
@@ -2158,7 +3540,8 @@ def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -2167,7 +3550,8 @@ def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -2176,7 +3560,8 @@ def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -2185,7 +3570,8 @@ def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let mayStore = 1, AddedComplexity = 10,
+ isPredicated = 1 in
def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
IntRegs:$src5),
@@ -2195,7 +3581,8 @@ def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
// if (Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if ($src1) memw($src3++#$offset) = $src2.new",
@@ -2203,7 +3590,8 @@ def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if ($src1.new) memw($src3++#$offset) = $src2.new",
@@ -2211,7 +3599,8 @@ def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if (!$src1) memw($src3++#$offset) = $src2.new",
@@ -2219,7 +3608,8 @@ def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1 in
+let mayStore = 1, hasCtrlDep = 1,
+ isPredicated = 1 in
def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
"if (!$src1.new) memw($src3++#$offset) = $src2.new",
@@ -2227,6 +3617,199 @@ def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memb(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memh(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if ($src1.new) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+ "if (!$src1.new) memw(##$global) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memb(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memh(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if ($src1.new) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+ IntRegs:$src2),
+ "if (!$src1.new) memw(##$global+$offset) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
//===----------------------------------------------------------------------===//
// NV/ST -
//===----------------------------------------------------------------------===//
@@ -2253,7 +3836,8 @@ multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
Requires<[HasV4T]>;
}
-multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr, string TakenStr> {
+multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr,
+ string TakenStr> {
def _ie_nv_V4 : NVInst_V4<(outs),
(ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
!strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
@@ -2307,7 +3891,8 @@ multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
Requires<[HasV4T]>;
}
-multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr, string TakenStr> {
+multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr,
+ string TakenStr> {
def _ie_nv_V4 : NVInst_V4<(outs),
(ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
!strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
@@ -2416,16 +4001,18 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
"$dst = add($src1, add($src2, #$src3))",
- [(set IntRegs:$dst,
- (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>,
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
+ s6ImmPred:$src3)))]>,
Requires<[HasV4T]>;
// Rd=add(Rs,sub(#s6,Ru))
def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
"$dst = add($src1, sub(#$src2, $src3))",
- [(set IntRegs:$dst,
- (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>,
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2,
+ (i32 IntRegs:$src3))))]>,
Requires<[HasV4T]>;
// Generates the same instruction as ADDr_SUBri_V4 but matches different
@@ -2434,8 +4021,9 @@ def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
"$dst = add($src1, sub(#$src2, $src3))",
- [(set IntRegs:$dst,
- (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>,
+ [(set (i32 IntRegs:$dst),
+ (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2),
+ (i32 IntRegs:$src3)))]>,
Requires<[HasV4T]>;
@@ -2451,16 +4039,16 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2),
"$dst = and($src1, ~$src2)",
- [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
- (not DoubleRegs:$src2)))]>,
+ [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
+ (not (i64 DoubleRegs:$src2))))]>,
Requires<[HasV4T]>;
// Rdd=or(Rtt,~Rss)
def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2),
"$dst = or($src1, ~$src2)",
- [(set DoubleRegs:$dst,
- (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>,
+ [(set (i64 DoubleRegs:$dst),
+ (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>,
Requires<[HasV4T]>;
@@ -2469,8 +4057,9 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
"$dst ^= xor($src2, $src3)",
- [(set DoubleRegs:$dst,
- (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))],
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2480,8 +4069,9 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
"$dst = or($src1, and($src2, #$src3))",
- [(set IntRegs:$dst,
- (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ImmPred:$src3)))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2490,8 +4080,9 @@ def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= and($src2, $src3)",
- [(set IntRegs:$dst,
- (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2499,8 +4090,9 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= and($src2, $src3)",
- [(set IntRegs:$dst,
- (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2508,8 +4100,9 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= and($src2, $src3)",
- [(set IntRegs:$dst,
- (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2518,8 +4111,9 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= and($src2, ~$src3)",
- [(set IntRegs:$dst,
- (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2527,8 +4121,9 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= and($src2, ~$src3)",
- [(set IntRegs:$dst,
- (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2536,8 +4131,9 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= and($src2, ~$src3)",
- [(set IntRegs:$dst,
- (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ (not (i32 IntRegs:$src3)))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2546,8 +4142,9 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= or($src2, $src3)",
- [(set IntRegs:$dst,
- (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2555,8 +4152,9 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= or($src2, $src3)",
- [(set IntRegs:$dst,
- (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2564,8 +4162,9 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= or($src2, $src3)",
- [(set IntRegs:$dst,
- (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2574,8 +4173,9 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= xor($src2, $src3)",
- [(set IntRegs:$dst,
- (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2583,8 +4183,9 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= xor($src2, $src3)",
- [(set IntRegs:$dst,
- (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2592,8 +4193,9 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= xor($src2, $src3)",
- [(set IntRegs:$dst,
- (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2601,8 +4203,9 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
"$dst |= and($src2, #$src3)",
- [(set IntRegs:$dst,
- (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ImmPred:$src3)))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2610,8 +4213,9 @@ def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
"$dst |= or($src2, #$src3)",
- [(set IntRegs:$dst,
- (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+ s10ImmPred:$src3)))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2663,8 +4267,9 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
(ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
"$dst = add(#$src1, mpyi($src2, #$src3))",
- [(set IntRegs:$dst,
- (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>,
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+ u6ImmPred:$src1))]>,
Requires<[HasV4T]>;
// Rd=add(#u6,mpyi(Rs,Rt))
@@ -2672,32 +4277,36 @@ def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
(ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst = add(#$src1, mpyi($src2, $src3))",
- [(set IntRegs:$dst,
- (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>,
+ [(set (i32 IntRegs:$dst),
+ (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ u6ImmPred:$src1))]>,
Requires<[HasV4T]>;
// Rd=add(Ru,mpyi(#u6:2,Rs))
def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
"$dst = add($src1, mpyi(#$src2, $src3))",
- [(set IntRegs:$dst,
- (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>,
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3),
+ u6_2ImmPred:$src2)))]>,
Requires<[HasV4T]>;
// Rd=add(Ru,mpyi(Rs,#u6))
def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
"$dst = add($src1, mpyi($src2, #$src3))",
- [(set IntRegs:$dst,
- (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>,
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ u6ImmPred:$src3)))]>,
Requires<[HasV4T]>;
// Rx=add(Ru,mpyi(Rx,Rs))
def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst = add($src1, mpyi($src2, $src3))",
- [(set IntRegs:$dst,
- (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))],
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2745,8 +4354,9 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = add(#$src1, asl($src2, #$src3))",
- [(set IntRegs:$dst,
- (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2754,8 +4364,9 @@ def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = add(#$src1, lsr($src2, #$src3))",
- [(set IntRegs:$dst,
- (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2763,8 +4374,9 @@ def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = sub(#$src1, asl($src2, #$src3))",
- [(set IntRegs:$dst,
- (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2772,8 +4384,9 @@ def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = sub(#$src1, lsr($src2, #$src3))",
- [(set IntRegs:$dst,
- (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2783,8 +4396,9 @@ def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = and(#$src1, asl($src2, #$src3))",
- [(set IntRegs:$dst,
- (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2792,26 +4406,31 @@ def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = and(#$src1, lsr($src2, #$src3))",
- [(set IntRegs:$dst,
- (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=or(#u8,asl(Rx,#U5))
+let AddedComplexity = 30 in
def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = or(#$src1, asl($src2, #$src3))",
- [(set IntRegs:$dst,
- (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=or(#u8,lsr(Rx,#U5))
+let AddedComplexity = 30 in
def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
(ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = or(#$src1, lsr($src2, #$src3))",
- [(set IntRegs:$dst,
- (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ [(set (i32 IntRegs:$dst),
+ (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+ u8ImmPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
@@ -2820,7 +4439,8 @@ def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
//Rd=lsl(#s6,Rt)
def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
"$dst = lsl(#$src1, $src2)",
- [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>,
+ [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
+ (i32 IntRegs:$src2)))]>,
Requires<[HasV4T]>;
@@ -2829,8 +4449,9 @@ def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
"$dst ^= asl($src2, $src3)",
- [(set DoubleRegs:$dst,
- (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2838,8 +4459,9 @@ def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
"$dst ^= asr($src2, $src3)",
- [(set DoubleRegs:$dst,
- (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))],
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2847,8 +4469,9 @@ def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
"$dst ^= lsl($src2, $src3)",
- [(set DoubleRegs:$dst,
- (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+ [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
+ (shl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2856,8 +4479,9 @@ def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
"$dst ^= lsr($src2, $src3)",
- [(set DoubleRegs:$dst,
- (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))],
+ [(set (i64 DoubleRegs:$dst),
+ (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2),
+ (i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
@@ -2903,16 +4527,16 @@ let AddedComplexity = 30 in
def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
"Error; should not emit",
- [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
-m6ImmPred:$addend),
- (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memw(Rs+#u6:2) += #U5
let AddedComplexity = 30 in
def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
- "memw($base+#$offset) += $addend",
+ "memw($base+#$offset) += #$addend",
[]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -2920,7 +4544,7 @@ def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
let AddedComplexity = 30 in
def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
- "memw($base+#$offset) -= $subend",
+ "memw($base+#$offset) -= #$subend",
[]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -2929,9 +4553,9 @@ let AddedComplexity = 30 in
def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
"memw($base+#$offset) += $addend",
- [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
-IntRegs:$addend),
- (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$addend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memw(Rs+#u6:2) -= Rt
@@ -2939,19 +4563,19 @@ let AddedComplexity = 30 in
def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
"memw($base+#$offset) -= $subend",
- [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)),
-IntRegs:$subend),
- (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$subend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memw(Rs+#u6:2) &= Rt
let AddedComplexity = 30 in
def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
- "memw($base+#$offset) += $andend",
- [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)),
-IntRegs:$andend),
- (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ "memw($base+#$offset) &= $andend",
+ [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$andend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memw(Rs+#u6:2) |= Rt
@@ -2959,9 +4583,9 @@ let AddedComplexity = 30 in
def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
"memw($base+#$offset) |= $orend",
- [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)),
- IntRegs:$orend),
- (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// MEMw_ADDSUBi_V4:
@@ -2996,7 +4620,7 @@ let AddedComplexity = 30 in
def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$addend),
"memw($addr) += $addend",
- [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend),
+ [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
ADDRriU6_2:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -3005,7 +4629,7 @@ let AddedComplexity = 30 in
def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$subend),
"memw($addr) -= $subend",
- [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend),
+ [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
ADDRriU6_2:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -3014,7 +4638,7 @@ let AddedComplexity = 30 in
def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$andend),
"memw($addr) &= $andend",
- [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend),
+ [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
ADDRriU6_2:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -3023,8 +4647,8 @@ let AddedComplexity = 30 in
def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$orend),
"memw($addr) |= $orend",
- [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend),
-ADDRriU6_2:$addr)]>,
+ [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
+ ADDRriU6_2:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
//===----------------------------------------------------------------------===//
@@ -3060,10 +4684,10 @@ let AddedComplexity = 30 in
def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
"Error; should not emit",
- [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+ [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
u6_1ImmPred:$offset)),
m6ImmPred:$addend),
- (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) += #U5
@@ -3087,10 +4711,10 @@ let AddedComplexity = 30 in
def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
"memh($base+#$offset) += $addend",
- [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+ [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
u6_1ImmPred:$offset)),
- IntRegs:$addend),
- (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ (i32 IntRegs:$addend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) -= Rt
@@ -3098,10 +4722,10 @@ let AddedComplexity = 30 in
def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
"memh($base+#$offset) -= $subend",
- [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base,
+ [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
u6_1ImmPred:$offset)),
- IntRegs:$subend),
- (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ (i32 IntRegs:$subend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) &= Rt
@@ -3109,10 +4733,10 @@ let AddedComplexity = 30 in
def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
"memh($base+#$offset) += $andend",
- [(truncstorei16 (and (sextloadi16 (add IntRegs:$base,
+ [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
u6_1ImmPred:$offset)),
- IntRegs:$andend),
- (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ (i32 IntRegs:$andend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) |= Rt
@@ -3120,10 +4744,10 @@ let AddedComplexity = 30 in
def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
"memh($base+#$offset) |= $orend",
- [(truncstorei16 (or (sextloadi16 (add IntRegs:$base,
+ [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
u6_1ImmPred:$offset)),
- IntRegs:$orend),
- (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ (i32 IntRegs:$orend)),
+ (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// MEMh_ADDSUBi_V4:
@@ -3159,7 +4783,7 @@ def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$addend),
"memh($addr) += $addend",
[(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
- IntRegs:$addend), ADDRriU6_1:$addr)]>,
+ (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) -= Rt
@@ -3168,7 +4792,7 @@ def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$subend),
"memh($addr) -= $subend",
[(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
- IntRegs:$subend), ADDRriU6_1:$addr)]>,
+ (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) &= Rt
@@ -3177,7 +4801,7 @@ def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$andend),
"memh($addr) &= $andend",
[(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
- IntRegs:$andend), ADDRriU6_1:$addr)]>,
+ (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
// memh(Rs+#u6:1) |= Rt
@@ -3186,7 +4810,7 @@ def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$orend),
"memh($addr) |= $orend",
[(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
- IntRegs:$orend), ADDRriU6_1:$addr)]>,
+ (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -3223,10 +4847,10 @@ let AddedComplexity = 30 in
def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
"Error; should not emit",
- [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+ [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
u6_0ImmPred:$offset)),
m6ImmPred:$addend),
- (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) += #U5
@@ -3250,10 +4874,10 @@ let AddedComplexity = 30 in
def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
"memb($base+#$offset) += $addend",
- [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+ [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
u6_0ImmPred:$offset)),
- IntRegs:$addend),
- (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ (i32 IntRegs:$addend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) -= Rt
@@ -3261,10 +4885,10 @@ let AddedComplexity = 30 in
def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
"memb($base+#$offset) -= $subend",
- [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base,
+ [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
u6_0ImmPred:$offset)),
- IntRegs:$subend),
- (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ (i32 IntRegs:$subend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) &= Rt
@@ -3272,10 +4896,10 @@ let AddedComplexity = 30 in
def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
"memb($base+#$offset) += $andend",
- [(truncstorei8 (and (sextloadi8 (add IntRegs:$base,
+ [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
u6_0ImmPred:$offset)),
- IntRegs:$andend),
- (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ (i32 IntRegs:$andend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) |= Rt
@@ -3283,10 +4907,10 @@ let AddedComplexity = 30 in
def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
(ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
"memb($base+#$offset) |= $orend",
- [(truncstorei8 (or (sextloadi8 (add IntRegs:$base,
+ [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
u6_0ImmPred:$offset)),
- IntRegs:$orend),
- (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ (i32 IntRegs:$orend)),
+ (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
// MEMb_ADDSUBi_V4:
@@ -3322,7 +4946,7 @@ def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$addend),
"memb($addr) += $addend",
[(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
- IntRegs:$addend), ADDRriU6_0:$addr)]>,
+ (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) -= Rt
@@ -3331,7 +4955,7 @@ def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$subend),
"memb($addr) -= $subend",
[(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
- IntRegs:$subend), ADDRriU6_0:$addr)]>,
+ (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) &= Rt
@@ -3340,7 +4964,7 @@ def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$andend),
"memb($addr) &= $andend",
[(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
- IntRegs:$andend), ADDRriU6_0:$addr)]>,
+ (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
// memb(Rs+#u6:0) |= Rt
@@ -3349,7 +4973,7 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
(ins MEMri:$addr, IntRegs:$orend),
"memb($addr) |= $orend",
[(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
- IntRegs:$orend), ADDRriU6_0:$addr)]>,
+ (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
Requires<[HasV4T, UseMEMOP]>;
@@ -3364,13 +4988,16 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
// The implemented patterns are: EQ/GT/GTU.
// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+// Following instruction is not being extended as it results into the
+// incorrect code for negative numbers.
// Pd=cmpb.eq(Rs,#u8)
+
let isCompare = 1 in
def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u8Imm:$src2),
"$dst = cmpb.eq($src1, #$src2)",
- [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255),
- u8ImmPred:$src2))]>,
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
Requires<[HasV4T]>;
// Pd=cmpb.eq(Rs,Rt)
@@ -3378,10 +5005,9 @@ let isCompare = 1 in
def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.eq($src1, $src2)",
- [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
- IntRegs:$src2),
- 255),
- 0))]>,
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)), 255), 0))]>,
Requires<[HasV4T]>;
// Pd=cmpb.eq(Rs,Rt)
@@ -3389,17 +5015,9 @@ let isCompare = 1 in
def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.eq($src1, $src2)",
- [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)),
- (shl IntRegs:$src2, (i32 24))))]>,
- Requires<[HasV4T]>;
-
-// Pd=cmpb.gt(Rs,#s8)
-let isCompare = 1 in
-def CMPbGTri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s32Imm:$src2),
- "$dst = cmpb.gt($src1, #$src2)",
- [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
- s32_24ImmPred:$src2))]>,
+ [(set (i1 PredRegs:$dst),
+ (seteq (shl (i32 IntRegs:$src1), (i32 24)),
+ (shl (i32 IntRegs:$src2), (i32 24))))]>,
Requires<[HasV4T]>;
// Pd=cmpb.gt(Rs,Rt)
@@ -3407,8 +5025,9 @@ let isCompare = 1 in
def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.gt($src1, $src2)",
- [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
- (shl IntRegs:$src2, (i32 24))))]>,
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 24)),
+ (shl (i32 IntRegs:$src2), (i32 24))))]>,
Requires<[HasV4T]>;
// Pd=cmpb.gtu(Rs,#u7)
@@ -3416,8 +5035,8 @@ let isCompare = 1 in
def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u7Imm:$src2),
"$dst = cmpb.gtu($src1, #$src2)",
- [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
- u7ImmPred:$src2))]>,
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+ u7ImmPred:$src2))]>,
Requires<[HasV4T]>;
// Pd=cmpb.gtu(Rs,Rt)
@@ -3425,18 +5044,21 @@ let isCompare = 1 in
def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.gtu($src1, $src2)",
- [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
- (and IntRegs:$src2, 255)))]>,
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+ (and (i32 IntRegs:$src2), 255)))]>,
Requires<[HasV4T]>;
+// Following instruction is not being extended as it results into the incorrect
+// code for negative numbers.
+
// Signed half compare(.eq) ri.
// Pd=cmph.eq(Rs,#s8)
let isCompare = 1 in
def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u16Imm:$src2),
+ (ins IntRegs:$src1, s8Imm:$src2),
"$dst = cmph.eq($src1, #$src2)",
- [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535),
- u16_s8ImmPred:$src2))]>,
+ [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535),
+ s8ImmPred:$src2))]>,
Requires<[HasV4T]>;
// Signed half compare(.eq) rr.
@@ -3449,10 +5071,9 @@ let isCompare = 1 in
def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.eq($src1, $src2)",
- [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
- IntRegs:$src2),
- 65535),
- 0))]>,
+ [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)),
+ 65535), 0))]>,
Requires<[HasV4T]>;
// Signed half compare(.eq) rr.
@@ -3465,19 +5086,25 @@ let isCompare = 1 in
def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.eq($src1, $src2)",
- [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)),
- (shl IntRegs:$src2, (i32 16))))]>,
+ [(set (i1 PredRegs:$dst),
+ (seteq (shl (i32 IntRegs:$src1), (i32 16)),
+ (shl (i32 IntRegs:$src2), (i32 16))))]>,
Requires<[HasV4T]>;
+/* Incorrect Pattern -- immediate should be right shifted before being
+used in the cmph.gt instruction.
// Signed half compare(.gt) ri.
// Pd=cmph.gt(Rs,#s8)
+
let isCompare = 1 in
def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s32Imm:$src2),
+ (ins IntRegs:$src1, s8Imm:$src2),
"$dst = cmph.gt($src1, #$src2)",
- [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
- s32_16s8ImmPred:$src2))]>,
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+ s8ImmPred:$src2))]>,
Requires<[HasV4T]>;
+*/
// Signed half compare(.gt) rr.
// Pd=cmph.gt(Rs,Rt)
@@ -3485,8 +5112,9 @@ let isCompare = 1 in
def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.gt($src1, $src2)",
- [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
- (shl IntRegs:$src2, (i32 16))))]>,
+ [(set (i1 PredRegs:$dst),
+ (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+ (shl (i32 IntRegs:$src2), (i32 16))))]>,
Requires<[HasV4T]>;
// Unsigned half compare rr (.gtu).
@@ -3495,8 +5123,9 @@ let isCompare = 1 in
def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.gtu($src1, $src2)",
- [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
- (and IntRegs:$src2, 65535)))]>,
+ [(set (i1 PredRegs:$dst),
+ (setugt (and (i32 IntRegs:$src1), 65535),
+ (and (i32 IntRegs:$src2), 65535)))]>,
Requires<[HasV4T]>;
// Unsigned half compare ri (.gtu).
@@ -3505,8 +5134,8 @@ let isCompare = 1 in
def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u7Imm:$src2),
"$dst = cmph.gtu($src1, #$src2)",
- [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
- u7ImmPred:$src2))]>,
+ [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
+ u7ImmPred:$src2))]>,
Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
@@ -3523,10 +5152,42 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
Requires<[HasV4T]>;
}
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs),
+ (ins calltarget:$dst),
+ "jump $dst // Restore_and_dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, isBarrier = 1,
+ Defs = [R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs),
+ (ins calltarget:$dst),
+ "call $dst // Restore_and_dealloc_before_tailcall",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// Save registers function call.
+let isCall = 1, isBarrier = 1,
+ Uses = [R29, R31] in {
+ def SAVE_REGISTERS_CALL_V4 : JInst<(outs),
+ (ins calltarget:$dst),
+ "call $dst // Save_calle_saved_registers",
+ []>,
+ Requires<[HasV4T]>;
+}
+
// if (Ps) dealloc_return
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
- def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1),
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, i32imm:$amt1),
"if ($src1) dealloc_return",
[]>,
Requires<[HasV4T]>;
@@ -3534,7 +5195,8 @@ let isReturn = 1, isTerminator = 1,
// if (!Ps) dealloc_return
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
i32imm:$amt1),
"if (!$src1) dealloc_return",
@@ -3544,7 +5206,8 @@ let isReturn = 1, isTerminator = 1,
// if (Ps.new) dealloc_return:nt
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
i32imm:$amt1),
"if ($src1.new) dealloc_return:nt",
@@ -3554,7 +5217,8 @@ let isReturn = 1, isTerminator = 1,
// if (!Ps.new) dealloc_return:nt
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
i32imm:$amt1),
"if (!$src1.new) dealloc_return:nt",
@@ -3564,7 +5228,8 @@ let isReturn = 1, isTerminator = 1,
// if (Ps.new) dealloc_return:t
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
i32imm:$amt1),
"if ($src1.new) dealloc_return:t",
@@ -3574,10 +5239,539 @@ let isReturn = 1, isTerminator = 1,
// if (!Ps.new) dealloc_return:nt
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+ isPredicated = 1 in {
def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
i32imm:$amt1),
"if (!$src1.new) dealloc_return:t",
[]>,
Requires<[HasV4T]>;
}
+
+
+// Load/Store with absolute addressing mode
+// memw(#u6)=Rt
+
+multiclass ST_abs<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : STInst2<(outs),
+ (ins globaladdress:$absaddr, IntRegs:$src),
+ !strconcat(OpcStr, "(##$absaddr) = $src"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1.new)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1.new)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_nv_V4 : STInst2<(outs),
+ (ins globaladdress:$absaddr, IntRegs:$src),
+ !strconcat(OpcStr, "(##$absaddr) = $src.new"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if ($src1.new)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+ !strconcat("if (!$src1.new)",
+ !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+let AddedComplexity = 30, isPredicable = 1 in
+def STrid_abs_V4 : STInst<(outs),
+ (ins globaladdress:$absaddr, DoubleRegs:$src),
+ "memd(##$absaddr) = $src",
+ [(store (i64 DoubleRegs:$src),
+ (HexagonCONST32 tglobaladdr:$absaddr))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if ($src1) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if (!$src1) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if ($src1.new) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+ "if (!$src1.new) memd(##$absaddr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+defm STrib : ST_abs<"memb">;
+defm STrih : ST_abs<"memh">;
+defm STriw : ST_abs<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+
+multiclass LD_abs<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins globaladdress:$absaddr),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if ($src1) $dst = ",
+ !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if (!$src1) $dst = ",
+ !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if ($src1.new) $dst = ",
+ !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ !strconcat("if (!$src1.new) $dst = ",
+ !strconcat(OpcStr, "(##$absaddr)")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+let AddedComplexity = 30 in
+def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$absaddr),
+ "$dst = memd(##$absaddr)",
+ [(set (i64 DoubleRegs:$dst),
+ (load (HexagonCONST32 tglobaladdr:$absaddr)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if ($src1) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if (!$src1) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if ($src1.new) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$absaddr),
+ "if (!$src1.new) $dst = memd(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+
+defm LDrib : LD_abs<"memb">;
+defm LDriub : LD_abs<"memub">;
+defm LDrih : LD_abs<"memh">;
+defm LDriuh : LD_abs<"memuh">;
+defm LDriw : LD_abs<"memw">;
+
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriw_abs_V4 tglobaladdr: $absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDrib_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriub_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDrih_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+
+// Transfer global address into a register
+let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
+ "$dst = ##$src1",
+ [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if($src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if(!$src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if($src1.new) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, globaladdress:$src2),
+ "if(!$src1.new) $dst = ##$src2",
+ []>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 50, Predicates = [HasV4T] in
+def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
+ (TFRI_V4 tglobaladdr:$src1)>;
+
+
+// Load - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ "$dst=memd($src1<<#$src2+##$offset)",
+ [(set (i64 DoubleRegs:$dst),
+ (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+ def _lo_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ !strconcat("$dst = ",
+ !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
+ [(set IntRegs:$dst,
+ (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$offset)))))]>,
+ Requires<[HasV4T]>;
+}
+
+defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
+defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
+defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriw_ind : LD_indirect_lo<"memw", load>;
+
+// Store - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def STrid_ind_lo_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+ DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store (i64 DoubleRegs:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
+ def _lo_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+ IntRegs:$src4),
+ !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
+ [(OpNode (i32 IntRegs:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))]>,
+ Requires<[HasV4T]>;
+}
+
+defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
+defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
+defm STriw_ind : ST_indirect_lo<"memw", store>;
+
+// Store - absolute addressing mode: These instruction take constant
+// value as the extended operand
+multiclass ST_absimm<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : STInst2<(outs),
+ (ins u6Imm:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(#$src1) = $src2"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1.new)",
+ !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1.new)",
+ !strconcat(OpcStr, "(#$src2) = $src3")),
+ []>,
+ Requires<[HasV4T]>;
+
+ def _abs_nv_V4 : STInst2<(outs),
+ (ins u6Imm:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(#$src1) = $src2.new"),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)",
+ !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)",
+ !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if ($src1.new)",
+ !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1.new)",
+ !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+defm STrib_imm : ST_absimm<"memb">;
+defm STrih_imm : ST_absimm<"memh">;
+defm STriw_imm : ST_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2),
+ (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2),
+ (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2),
+ (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+
+// Load - absolute addressing mode: These instruction take constant
+// value as the extended operand
+
+multiclass LD_absimm<string OpcStr> {
+ let isPredicable = 1 in
+ def _abs_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins u6Imm:$src),
+ !strconcat("$dst = ",
+ !strconcat(OpcStr, "(#$src)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if ($src1) $dst = ",
+ !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if (!$src1) $dst = ",
+ !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if ($src1.new) $dst = ",
+ !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in
+ def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, u6Imm:$src2),
+ !strconcat("if (!$src1.new) $dst = ",
+ !strconcat(OpcStr, "(#$src2)")),
+ []>,
+ Requires<[HasV4T]>;
+}
+
+defm LDrib_imm : LD_absimm<"memb">;
+defm LDriub_imm : LD_absimm<"memub">;
+defm LDrih_imm : LD_absimm<"memh">;
+defm LDriuh_imm : LD_absimm<"memuh">;
+defm LDriw_imm : LD_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity = 30 in
+def : Pat<(i32 (load u6ImmPred:$src)),
+ (LDriw_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 u6ImmPred:$src)),
+ (LDrib_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 u6ImmPred:$src)),
+ (LDriub_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 u6ImmPred:$src)),
+ (LDrih_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 u6ImmPred:$src)),
+ (LDriuh_imm_abs_V4 u6ImmPred:$src)>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STriw_offset_ext_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3),
+ "memw($src1+#$src2) = ##$src3",
+ [(store (HexagonCONST32 tglobaladdr:$src3),
+ (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STrih_offset_ext_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3),
+ "memh($src1+#$src2) = ##$src3",
+ [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
+ (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
new file mode 100644
index 000000000000..92d098cc0446
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -0,0 +1,626 @@
+def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [
+ SDTCisVT<0, f32>,
+ SDTCisPtrTy<1>]>;
+def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set (f32 IntRegs:$dst),
+ (HexagonFCONST32 tglobaladdr:$global))]>,
+ Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
+ "$dst = CONST64(#$src1)",
+ [(set DoubleRegs:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
+ "$dst = CONST32(#$src1)",
+ [(set IntRegs:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+// Transfer immediate float.
+// Only works with single precision fp value.
+// For double precision, use CONST64_float_real, as 64bit transfer
+// can only hold 40-bit values - 32 from const ext + 8 bit immediate.
+let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in
+def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32imm:$src1),
+ "$dst = ##$src1",
+ [(set IntRegs:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2),
+ "if ($src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV5T]>;
+
+let isPredicated = 1 in
+def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2),
+ "if (!$src1) $dst = ##$src2",
+ []>,
+ Requires<[HasV5T]>;
+
+// Convert single precision to double precision and vice-versa.
+def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2df($src)",
+ [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2sf($src)",
+ [(set IntRegs:$dst, (fround DoubleRegs:$src))]>,
+ Requires<[HasV5T]>;
+
+
+// Load.
+def LDrid_f : LDInst<(outs DoubleRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memd($addr)",
+ [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>,
+ Requires<[HasV5T]>;
+
+
+let AddedComplexity = 20 in
+def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s11_3Imm:$offset),
+ "$dst = memd($src1+#$offset)",
+ [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1,
+ s11_3ImmPred:$offset))))]>,
+ Requires<[HasV5T]>;
+
+def LDriw_f : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr), "$dst = memw($addr)",
+ [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>,
+ Requires<[HasV5T]>;
+
+
+let AddedComplexity = 20 in
+def LDriw_indexed_f : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_2Imm:$offset),
+ "$dst = memw($src1+#$offset)",
+ [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1,
+ s11_2ImmPred:$offset))))]>,
+ Requires<[HasV5T]>;
+
+// Store.
+def STriw_f : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1",
+ [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 10 in
+def STriw_indexed_f : STInst<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3",
+ [(store (f32 IntRegs:$src3),
+ (add IntRegs:$src1, s11_2ImmPred:$src2))]>,
+ Requires<[HasV5T]>;
+
+def STrid_f : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memd($addr) = $src1",
+ [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>,
+ Requires<[HasV5T]>;
+
+// Indexed store double word.
+let AddedComplexity = 10 in
+def STrid_indexed_f : STInst<(outs),
+ (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
+ "memd($src1+#$src2) = $src3",
+ [(store (f64 DoubleRegs:$src3),
+ (add IntRegs:$src1, s11_3ImmPred:$src2))]>,
+ Requires<[HasV5T]>;
+
+
+// Add
+let isCommutable = 1 in
+def fADD_rr : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfadd($src1, $src2)",
+ [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+let isCommutable = 1 in
+def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = dfadd($src1, $src2)",
+ [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+def fSUB_rr : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfsub($src1, $src2)",
+ [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = dfsub($src1, $src2)",
+ [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+let isCommutable = 1 in
+def fMUL_rr : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfmpy($src1, $src2)",
+ [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+let isCommutable = 1 in
+def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = dfmpy($src1, $src2)",
+ [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+ Requires<[HasV5T]>;
+
+// Compare.
+let isCompare = 1 in {
+multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> {
+ def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst,
+ (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>,
+ Requires<[HasV5T]>;
+}
+
+multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> {
+ def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst,
+ (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>,
+ Requires<[HasV5T]>;
+}
+}
+
+defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>;
+defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>;
+defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>;
+defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>;
+defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>;
+defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>;
+
+defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>;
+defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>;
+defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>;
+defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>;
+defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>;
+defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>;
+
+// olt.
+def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ (f64 DoubleRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+// gt.
+def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1),
+ (f64 (CONST64_Float_Real fpimm:$src2))))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>,
+ Requires<[HasV5T]>;
+
+// ult.
+def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ (f64 DoubleRegs:$src1)))>,
+ Requires<[HasV5T]>;
+
+// le.
+// rs <= rt -> rt >= rs.
+def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+
+// Rss <= Rtt -> Rtt >= Rss.
+def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+// rs <= rt -> rt >= rs.
+def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+// Rss <= Rtt -> Rtt >= Rss.
+def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
+ DoubleRegs:$src1))>,
+ Requires<[HasV5T]>;
+
+// ne.
+def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1,
+ (f64 (CONST64_Float_Real fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))),
+ (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1,
+ (f64 (CONST64_Float_Real fpimm:$src2)))))>,
+ Requires<[HasV5T]>;
+
+// Convert Integer to Floating Point.
+def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_d2sf($src)",
+ [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_ud2sf($src)",
+ [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_uw2sf($src)",
+ [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_w2sf($src)",
+ [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_d2df($src)",
+ [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_ud2df($src)",
+ [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_uw2df($src)",
+ [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_w2df($src)",
+ [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+// Convert Floating Point to Integer - default.
+def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2uw($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2w($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2uw($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2w($src):chop",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2d($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2ud($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2d($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2ud($src):chop",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T]>;
+
+// Convert Floating Point to Integer: non-chopped.
+let AddedComplexity = 20 in
+def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2uw($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2w($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2uw($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2w($src)",
+ [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2d($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ "$dst = convert_df2ud($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2d($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+let AddedComplexity = 20 in
+def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ "$dst = convert_sf2ud($src)",
+ [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
+ Requires<[HasV5T, IEEERndNearV5T]>;
+
+
+
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+def : Pat <(i32 (bitconvert (f32 IntRegs:$src))),
+ (i32 (TFR IntRegs:$src))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(f32 (bitconvert (i32 IntRegs:$src))),
+ (f32 (TFR IntRegs:$src))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))),
+ (i64 (TFR64 DoubleRegs:$src))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))),
+ (f64 (TFR64 DoubleRegs:$src))>,
+ Requires<[HasV5T]>;
+
+// Floating point fused multiply-add.
+def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ "$dst += dfmpy($src2, $src3)",
+ [(set (f64 DoubleRegs:$dst),
+ (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))],
+ "$src1 = $dst">,
+ Requires<[HasV5T]>;
+
+def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += sfmpy($src2, $src3)",
+ [(set (f32 IntRegs:$dst),
+ (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))],
+ "$src1 = $dst">,
+ Requires<[HasV5T]>;
+
+
+// Floating point max/min.
+let AddedComplexity = 100 in
+def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = dfmax($src1, $src2)",
+ [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100 in
+def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfmax($src1, $src2)",
+ [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100 in
+def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = dfmin($src1, $src2)",
+ [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100 in
+def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sfmin($src1, $src2)",
+ [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV5T]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "Error; should not emit",
+ [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3)))]>,
+ Requires<[HasV5T]>;
+
+
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst,
+ (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst,
+ (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let AddedComplexity = 100, isPredicated = 1 in
+def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
+ fpimm:$src2,
+ fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+
+def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
+ (f32 IntRegs:$src3),
+ (f32 IntRegs:$src4)),
+ (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
+ IntRegs:$src3)>, Requires<[HasV5T]>;
+
+def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
+ (f64 DoubleRegs:$src3),
+ (f64 DoubleRegs:$src4)),
+ (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1),
+ DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3),
+ (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = TFR_condset_ri(p0, r1, #i)
+def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3),
+ (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = TFR_condset_ir(p0, #i, r1)
+def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3),
+ (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>;
+
+def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))),
+ (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fabs (f32 IntRegs:$src1)),
+ (CLRBIT_31 (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fneg (f32 IntRegs:$src1)),
+ (TOGBIT_31 (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+/*
+def : Pat <(fabs (f64 DoubleRegs:$src1)),
+ (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fabs (f64 DoubleRegs:$src1)),
+ (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
+ Requires<[HasV5T]>;
+ */
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index b15e293fdfb4..99f59d5ea669 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -551,13 +551,6 @@ class di_SInst_diu6u6<string opc, Intrinsic IntID>
[(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
imm:$src3))]>;
-class di_SInst_didisi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3))]>;
-
class di_SInst_didiqi<string opc, Intrinsic IntID>
: SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
IntRegs:$src3),
@@ -818,6 +811,11 @@ class di_MInst_s8s8<string opc, Intrinsic IntID>
!strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
[(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+class si_MInst_sis9<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
class si_MInst_sisi<string opc, Intrinsic IntID>
: MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
!strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
@@ -952,6 +950,17 @@ class si_SInst_sisi_sat<string opc, Intrinsic IntID>
!strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
[(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+class si_SInst_didi_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
: MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
!strconcat("$dst = ", !strconcat(opc ,
@@ -1612,6 +1621,18 @@ class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
DoubleRegs:$src2))],
"$dst2 = $dst">;
+class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+
class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
: MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
DoubleRegs:$src1,
@@ -1822,53 +1843,63 @@ class si_MInst_didi<string opc, Intrinsic IntID>
!strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
[(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+//
+// LDInst classes.
+//
+let mayLoad = 1, neverHasSideEffects = 1 in
+class di_LDInstPI_diu4<string opc, Intrinsic IntID>
+ : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2),
+ (ins IntRegs:$src1, IntRegs:$src2, CRRegs:$src3, s4Imm:$offset),
+ "$dst2 = memd($src1++#$offset:circ($src3))",
+ [],
+ "$src1 = $dst">;
/********************************************************************
* ALU32/ALU *
*********************************************************************/
// ALU32 / ALU / Add.
-def Hexagon_A2_add:
+def HEXAGON_A2_add:
si_ALU32_sisi <"add", int_hexagon_A2_add>;
-def Hexagon_A2_addi:
+def HEXAGON_A2_addi:
si_ALU32_sis16 <"add", int_hexagon_A2_addi>;
// ALU32 / ALU / Logical operations.
-def Hexagon_A2_and:
+def HEXAGON_A2_and:
si_ALU32_sisi <"and", int_hexagon_A2_and>;
-def Hexagon_A2_andir:
+def HEXAGON_A2_andir:
si_ALU32_sis10 <"and", int_hexagon_A2_andir>;
-def Hexagon_A2_not:
+def HEXAGON_A2_not:
si_ALU32_si <"not", int_hexagon_A2_not>;
-def Hexagon_A2_or:
+def HEXAGON_A2_or:
si_ALU32_sisi <"or", int_hexagon_A2_or>;
-def Hexagon_A2_orir:
+def HEXAGON_A2_orir:
si_ALU32_sis10 <"or", int_hexagon_A2_orir>;
-def Hexagon_A2_xor:
+def HEXAGON_A2_xor:
si_ALU32_sisi <"xor", int_hexagon_A2_xor>;
// ALU32 / ALU / Negate.
-def Hexagon_A2_neg:
+def HEXAGON_A2_neg:
si_ALU32_si <"neg", int_hexagon_A2_neg>;
// ALU32 / ALU / Subtract.
-def Hexagon_A2_sub:
+def HEXAGON_A2_sub:
si_ALU32_sisi <"sub", int_hexagon_A2_sub>;
-def Hexagon_A2_subri:
+def HEXAGON_A2_subri:
si_ALU32_s10si <"sub", int_hexagon_A2_subri>;
// ALU32 / ALU / Transfer Immediate.
-def Hexagon_A2_tfril:
+def HEXAGON_A2_tfril:
si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>;
-def Hexagon_A2_tfrih:
+def HEXAGON_A2_tfrih:
si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>;
-def Hexagon_A2_tfrsi:
+def HEXAGON_A2_tfrsi:
si_ALU32_s16 <"", int_hexagon_A2_tfrsi>;
-def Hexagon_A2_tfrpi:
+def HEXAGON_A2_tfrpi:
di_ALU32_s8 <"", int_hexagon_A2_tfrpi>;
// ALU32 / ALU / Transfer Register.
-def Hexagon_A2_tfr:
+def HEXAGON_A2_tfr:
si_ALU32_si_tfr <"", int_hexagon_A2_tfr>;
/********************************************************************
@@ -1876,45 +1907,45 @@ def Hexagon_A2_tfr:
*********************************************************************/
// ALU32 / PERM / Combine.
-def Hexagon_A2_combinew:
+def HEXAGON_A2_combinew:
di_ALU32_sisi <"combine", int_hexagon_A2_combinew>;
-def Hexagon_A2_combine_hh:
+def HEXAGON_A2_combine_hh:
si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>;
-def Hexagon_A2_combine_lh:
+def HEXAGON_A2_combine_lh:
si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>;
-def Hexagon_A2_combine_hl:
+def HEXAGON_A2_combine_hl:
si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>;
-def Hexagon_A2_combine_ll:
+def HEXAGON_A2_combine_ll:
si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>;
-def Hexagon_A2_combineii:
+def HEXAGON_A2_combineii:
di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>;
// ALU32 / PERM / Mux.
-def Hexagon_C2_mux:
+def HEXAGON_C2_mux:
si_ALU32_qisisi <"mux", int_hexagon_C2_mux>;
-def Hexagon_C2_muxri:
+def HEXAGON_C2_muxri:
si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>;
-def Hexagon_C2_muxir:
+def HEXAGON_C2_muxir:
si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>;
-def Hexagon_C2_muxii:
+def HEXAGON_C2_muxii:
si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>;
// ALU32 / PERM / Shift halfword.
-def Hexagon_A2_aslh:
+def HEXAGON_A2_aslh:
si_ALU32_si <"aslh", int_hexagon_A2_aslh>;
-def Hexagon_A2_asrh:
+def HEXAGON_A2_asrh:
si_ALU32_si <"asrh", int_hexagon_A2_asrh>;
def SI_to_SXTHI_asrh:
si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>;
// ALU32 / PERM / Sign/zero extend.
-def Hexagon_A2_sxth:
+def HEXAGON_A2_sxth:
si_ALU32_si <"sxth", int_hexagon_A2_sxth>;
-def Hexagon_A2_sxtb:
+def HEXAGON_A2_sxtb:
si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>;
-def Hexagon_A2_zxth:
+def HEXAGON_A2_zxth:
si_ALU32_si <"zxth", int_hexagon_A2_zxth>;
-def Hexagon_A2_zxtb:
+def HEXAGON_A2_zxtb:
si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>;
/********************************************************************
@@ -1922,25 +1953,25 @@ def Hexagon_A2_zxtb:
*********************************************************************/
// ALU32 / PRED / Compare.
-def Hexagon_C2_cmpeq:
+def HEXAGON_C2_cmpeq:
qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>;
-def Hexagon_C2_cmpeqi:
+def HEXAGON_C2_cmpeqi:
qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>;
-def Hexagon_C2_cmpgei:
+def HEXAGON_C2_cmpgei:
qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>;
-def Hexagon_C2_cmpgeui:
+def HEXAGON_C2_cmpgeui:
qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>;
-def Hexagon_C2_cmpgt:
+def HEXAGON_C2_cmpgt:
qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>;
-def Hexagon_C2_cmpgti:
+def HEXAGON_C2_cmpgti:
qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>;
-def Hexagon_C2_cmpgtu:
+def HEXAGON_C2_cmpgtu:
qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>;
-def Hexagon_C2_cmpgtui:
+def HEXAGON_C2_cmpgtui:
qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>;
-def Hexagon_C2_cmplt:
+def HEXAGON_C2_cmplt:
qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>;
-def Hexagon_C2_cmpltu:
+def HEXAGON_C2_cmpltu:
qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>;
/********************************************************************
@@ -1949,27 +1980,27 @@ def Hexagon_C2_cmpltu:
// ALU32 / VH / Vector add halfwords.
// Rd32=vadd[u]h(Rs32,Rt32:sat]
-def Hexagon_A2_svaddh:
+def HEXAGON_A2_svaddh:
si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>;
-def Hexagon_A2_svaddhs:
+def HEXAGON_A2_svaddhs:
si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>;
-def Hexagon_A2_svadduhs:
+def HEXAGON_A2_svadduhs:
si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>;
// ALU32 / VH / Vector average halfwords.
-def Hexagon_A2_svavgh:
+def HEXAGON_A2_svavgh:
si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>;
-def Hexagon_A2_svavghs:
+def HEXAGON_A2_svavghs:
si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>;
-def Hexagon_A2_svnavgh:
+def HEXAGON_A2_svnavgh:
si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>;
// ALU32 / VH / Vector subtract halfwords.
-def Hexagon_A2_svsubh:
+def HEXAGON_A2_svsubh:
si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>;
-def Hexagon_A2_svsubhs:
+def HEXAGON_A2_svsubhs:
si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>;
-def Hexagon_A2_svsubuhs:
+def HEXAGON_A2_svsubuhs:
si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>;
/********************************************************************
@@ -1977,109 +2008,109 @@ def Hexagon_A2_svsubuhs:
*********************************************************************/
// ALU64 / ALU / Add.
-def Hexagon_A2_addp:
+def HEXAGON_A2_addp:
di_ALU64_didi <"add", int_hexagon_A2_addp>;
-def Hexagon_A2_addsat:
+def HEXAGON_A2_addsat:
si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>;
// ALU64 / ALU / Add halfword.
// Even though the definition says hl, it should be lh -
//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
-def Hexagon_A2_addh_l16_hl:
+def HEXAGON_A2_addh_l16_hl:
si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>;
-def Hexagon_A2_addh_l16_ll:
+def HEXAGON_A2_addh_l16_ll:
si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>;
-def Hexagon_A2_addh_l16_sat_hl:
+def HEXAGON_A2_addh_l16_sat_hl:
si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>;
-def Hexagon_A2_addh_l16_sat_ll:
+def HEXAGON_A2_addh_l16_sat_ll:
si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>;
-def Hexagon_A2_addh_h16_hh:
+def HEXAGON_A2_addh_h16_hh:
si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>;
-def Hexagon_A2_addh_h16_hl:
+def HEXAGON_A2_addh_h16_hl:
si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>;
-def Hexagon_A2_addh_h16_lh:
+def HEXAGON_A2_addh_h16_lh:
si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>;
-def Hexagon_A2_addh_h16_ll:
+def HEXAGON_A2_addh_h16_ll:
si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>;
-def Hexagon_A2_addh_h16_sat_hh:
+def HEXAGON_A2_addh_h16_sat_hh:
si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>;
-def Hexagon_A2_addh_h16_sat_hl:
+def HEXAGON_A2_addh_h16_sat_hl:
si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>;
-def Hexagon_A2_addh_h16_sat_lh:
+def HEXAGON_A2_addh_h16_sat_lh:
si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>;
-def Hexagon_A2_addh_h16_sat_ll:
+def HEXAGON_A2_addh_h16_sat_ll:
si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>;
// ALU64 / ALU / Compare.
-def Hexagon_C2_cmpeqp:
+def HEXAGON_C2_cmpeqp:
qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>;
-def Hexagon_C2_cmpgtp:
+def HEXAGON_C2_cmpgtp:
qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>;
-def Hexagon_C2_cmpgtup:
+def HEXAGON_C2_cmpgtup:
qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>;
// ALU64 / ALU / Logical operations.
-def Hexagon_A2_andp:
+def HEXAGON_A2_andp:
di_ALU64_didi <"and", int_hexagon_A2_andp>;
-def Hexagon_A2_orp:
+def HEXAGON_A2_orp:
di_ALU64_didi <"or", int_hexagon_A2_orp>;
-def Hexagon_A2_xorp:
+def HEXAGON_A2_xorp:
di_ALU64_didi <"xor", int_hexagon_A2_xorp>;
// ALU64 / ALU / Maximum.
-def Hexagon_A2_max:
+def HEXAGON_A2_max:
si_ALU64_sisi <"max", int_hexagon_A2_max>;
-def Hexagon_A2_maxu:
+def HEXAGON_A2_maxu:
si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>;
// ALU64 / ALU / Minimum.
-def Hexagon_A2_min:
+def HEXAGON_A2_min:
si_ALU64_sisi <"min", int_hexagon_A2_min>;
-def Hexagon_A2_minu:
+def HEXAGON_A2_minu:
si_ALU64_sisi <"minu", int_hexagon_A2_minu>;
// ALU64 / ALU / Subtract.
-def Hexagon_A2_subp:
+def HEXAGON_A2_subp:
di_ALU64_didi <"sub", int_hexagon_A2_subp>;
-def Hexagon_A2_subsat:
+def HEXAGON_A2_subsat:
si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>;
// ALU64 / ALU / Subtract halfword.
// Even though the definition says hl, it should be lh -
//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
-def Hexagon_A2_subh_l16_hl:
+def HEXAGON_A2_subh_l16_hl:
si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>;
-def Hexagon_A2_subh_l16_ll:
+def HEXAGON_A2_subh_l16_ll:
si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>;
-def Hexagon_A2_subh_l16_sat_hl:
+def HEXAGON_A2_subh_l16_sat_hl:
si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>;
-def Hexagon_A2_subh_l16_sat_ll:
+def HEXAGON_A2_subh_l16_sat_ll:
si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>;
-def Hexagon_A2_subh_h16_hh:
+def HEXAGON_A2_subh_h16_hh:
si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>;
-def Hexagon_A2_subh_h16_hl:
+def HEXAGON_A2_subh_h16_hl:
si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>;
-def Hexagon_A2_subh_h16_lh:
+def HEXAGON_A2_subh_h16_lh:
si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>;
-def Hexagon_A2_subh_h16_ll:
+def HEXAGON_A2_subh_h16_ll:
si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>;
-def Hexagon_A2_subh_h16_sat_hh:
+def HEXAGON_A2_subh_h16_sat_hh:
si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>;
-def Hexagon_A2_subh_h16_sat_hl:
+def HEXAGON_A2_subh_h16_sat_hl:
si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>;
-def Hexagon_A2_subh_h16_sat_lh:
+def HEXAGON_A2_subh_h16_sat_lh:
si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>;
-def Hexagon_A2_subh_h16_sat_ll:
+def HEXAGON_A2_subh_h16_sat_ll:
si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>;
// ALU64 / ALU / Transfer register.
-def Hexagon_A2_tfrp:
+def HEXAGON_A2_tfrp:
di_ALU64_di <"", int_hexagon_A2_tfrp>;
/********************************************************************
@@ -2087,7 +2118,7 @@ def Hexagon_A2_tfrp:
*********************************************************************/
// ALU64 / BIT / Masked parity.
-def Hexagon_S2_parityp:
+def HEXAGON_S2_parityp:
si_ALU64_didi <"parity", int_hexagon_S2_parityp>;
/********************************************************************
@@ -2095,7 +2126,7 @@ def Hexagon_S2_parityp:
*********************************************************************/
// ALU64 / PERM / Vector pack high and low halfwords.
-def Hexagon_S2_packhl:
+def HEXAGON_S2_packhl:
di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>;
/********************************************************************
@@ -2103,37 +2134,37 @@ def Hexagon_S2_packhl:
*********************************************************************/
// ALU64 / VB / Vector add unsigned bytes.
-def Hexagon_A2_vaddub:
+def HEXAGON_A2_vaddub:
di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>;
-def Hexagon_A2_vaddubs:
+def HEXAGON_A2_vaddubs:
di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>;
// ALU64 / VB / Vector average unsigned bytes.
-def Hexagon_A2_vavgub:
+def HEXAGON_A2_vavgub:
di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>;
-def Hexagon_A2_vavgubr:
+def HEXAGON_A2_vavgubr:
di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>;
// ALU64 / VB / Vector compare unsigned bytes.
-def Hexagon_A2_vcmpbeq:
+def HEXAGON_A2_vcmpbeq:
qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
-def Hexagon_A2_vcmpbgtu:
+def HEXAGON_A2_vcmpbgtu:
qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
// ALU64 / VB / Vector maximum/minimum unsigned bytes.
-def Hexagon_A2_vmaxub:
+def HEXAGON_A2_vmaxub:
di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>;
-def Hexagon_A2_vminub:
+def HEXAGON_A2_vminub:
di_ALU64_didi <"vminub", int_hexagon_A2_vminub>;
// ALU64 / VB / Vector subtract unsigned bytes.
-def Hexagon_A2_vsubub:
+def HEXAGON_A2_vsubub:
di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>;
-def Hexagon_A2_vsububs:
+def HEXAGON_A2_vsububs:
di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>;
// ALU64 / VB / Vector mux.
-def Hexagon_C2_vmux:
+def HEXAGON_C2_vmux:
di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>;
@@ -2143,58 +2174,58 @@ def Hexagon_C2_vmux:
// ALU64 / VH / Vector add halfwords.
// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
-def Hexagon_A2_vaddh:
+def HEXAGON_A2_vaddh:
di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>;
-def Hexagon_A2_vaddhs:
+def HEXAGON_A2_vaddhs:
di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>;
-def Hexagon_A2_vadduhs:
+def HEXAGON_A2_vadduhs:
di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>;
// ALU64 / VH / Vector average halfwords.
// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
-def Hexagon_A2_vavgh:
+def HEXAGON_A2_vavgh:
di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>;
-def Hexagon_A2_vavghcr:
+def HEXAGON_A2_vavghcr:
di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>;
-def Hexagon_A2_vavghr:
+def HEXAGON_A2_vavghr:
di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>;
-def Hexagon_A2_vavguh:
+def HEXAGON_A2_vavguh:
di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>;
-def Hexagon_A2_vavguhr:
+def HEXAGON_A2_vavguhr:
di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>;
-def Hexagon_A2_vnavgh:
+def HEXAGON_A2_vnavgh:
di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>;
-def Hexagon_A2_vnavghcr:
+def HEXAGON_A2_vnavghcr:
di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>;
-def Hexagon_A2_vnavghr:
+def HEXAGON_A2_vnavghr:
di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>;
// ALU64 / VH / Vector compare halfwords.
-def Hexagon_A2_vcmpheq:
+def HEXAGON_A2_vcmpheq:
qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>;
-def Hexagon_A2_vcmphgt:
+def HEXAGON_A2_vcmphgt:
qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>;
-def Hexagon_A2_vcmphgtu:
+def HEXAGON_A2_vcmphgtu:
qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
// ALU64 / VH / Vector maximum halfwords.
-def Hexagon_A2_vmaxh:
+def HEXAGON_A2_vmaxh:
di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>;
-def Hexagon_A2_vmaxuh:
+def HEXAGON_A2_vmaxuh:
di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>;
// ALU64 / VH / Vector minimum halfwords.
-def Hexagon_A2_vminh:
+def HEXAGON_A2_vminh:
di_ALU64_didi <"vminh", int_hexagon_A2_vminh>;
-def Hexagon_A2_vminuh:
+def HEXAGON_A2_vminuh:
di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>;
// ALU64 / VH / Vector subtract halfwords.
-def Hexagon_A2_vsubh:
+def HEXAGON_A2_vsubh:
di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>;
-def Hexagon_A2_vsubhs:
+def HEXAGON_A2_vsubhs:
di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>;
-def Hexagon_A2_vsubuhs:
+def HEXAGON_A2_vsubuhs:
di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>;
@@ -2204,53 +2235,53 @@ def Hexagon_A2_vsubuhs:
// ALU64 / VW / Vector add words.
// Rdd32=vaddw(Rss32,Rtt32)[:sat]
-def Hexagon_A2_vaddw:
+def HEXAGON_A2_vaddw:
di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>;
-def Hexagon_A2_vaddws:
+def HEXAGON_A2_vaddws:
di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>;
// ALU64 / VW / Vector average words.
-def Hexagon_A2_vavguw:
+def HEXAGON_A2_vavguw:
di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>;
-def Hexagon_A2_vavguwr:
+def HEXAGON_A2_vavguwr:
di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>;
-def Hexagon_A2_vavgw:
+def HEXAGON_A2_vavgw:
di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>;
-def Hexagon_A2_vavgwcr:
+def HEXAGON_A2_vavgwcr:
di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>;
-def Hexagon_A2_vavgwr:
+def HEXAGON_A2_vavgwr:
di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>;
-def Hexagon_A2_vnavgw:
+def HEXAGON_A2_vnavgw:
di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>;
-def Hexagon_A2_vnavgwcr:
+def HEXAGON_A2_vnavgwcr:
di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>;
-def Hexagon_A2_vnavgwr:
+def HEXAGON_A2_vnavgwr:
di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>;
// ALU64 / VW / Vector compare words.
-def Hexagon_A2_vcmpweq:
+def HEXAGON_A2_vcmpweq:
qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
-def Hexagon_A2_vcmpwgt:
+def HEXAGON_A2_vcmpwgt:
qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
-def Hexagon_A2_vcmpwgtu:
+def HEXAGON_A2_vcmpwgtu:
qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
// ALU64 / VW / Vector maximum words.
-def Hexagon_A2_vmaxw:
+def HEXAGON_A2_vmaxw:
di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>;
-def Hexagon_A2_vmaxuw:
+def HEXAGON_A2_vmaxuw:
di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>;
// ALU64 / VW / Vector minimum words.
-def Hexagon_A2_vminw:
+def HEXAGON_A2_vminw:
di_ALU64_didi <"vminw", int_hexagon_A2_vminw>;
-def Hexagon_A2_vminuw:
+def HEXAGON_A2_vminuw:
di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>;
// ALU64 / VW / Vector subtract words.
-def Hexagon_A2_vsubw:
+def HEXAGON_A2_vsubw:
di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>;
-def Hexagon_A2_vsubws:
+def HEXAGON_A2_vsubws:
di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>;
@@ -2259,25 +2290,25 @@ def Hexagon_A2_vsubws:
*********************************************************************/
// CR / Logical reductions on predicates.
-def Hexagon_C2_all8:
+def HEXAGON_C2_all8:
qi_SInst_qi <"all8", int_hexagon_C2_all8>;
-def Hexagon_C2_any8:
+def HEXAGON_C2_any8:
qi_SInst_qi <"any8", int_hexagon_C2_any8>;
// CR / Logical operations on predicates.
-def Hexagon_C2_pxfer_map:
+def HEXAGON_C2_pxfer_map:
qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>;
-def Hexagon_C2_and:
+def HEXAGON_C2_and:
qi_SInst_qiqi <"and", int_hexagon_C2_and>;
-def Hexagon_C2_andn:
+def HEXAGON_C2_andn:
qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>;
-def Hexagon_C2_not:
+def HEXAGON_C2_not:
qi_SInst_qi <"not", int_hexagon_C2_not>;
-def Hexagon_C2_or:
+def HEXAGON_C2_or:
qi_SInst_qiqi <"or", int_hexagon_C2_or>;
-def Hexagon_C2_orn:
+def HEXAGON_C2_orn:
qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>;
-def Hexagon_C2_xor:
+def HEXAGON_C2_xor:
qi_SInst_qiqi <"xor", int_hexagon_C2_xor>;
@@ -2286,27 +2317,27 @@ def Hexagon_C2_xor:
*********************************************************************/
// MTYPE / ALU / Add and accumulate.
-def Hexagon_M2_acci:
+def HEXAGON_M2_acci:
si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>;
-def Hexagon_M2_accii:
+def HEXAGON_M2_accii:
si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>;
-def Hexagon_M2_nacci:
+def HEXAGON_M2_nacci:
si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>;
-def Hexagon_M2_naccii:
+def HEXAGON_M2_naccii:
si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>;
// MTYPE / ALU / Subtract and accumulate.
-def Hexagon_M2_subacc:
+def HEXAGON_M2_subacc:
si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>;
// MTYPE / ALU / Vector absolute difference.
-def Hexagon_M2_vabsdiffh:
+def HEXAGON_M2_vabsdiffh:
di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
-def Hexagon_M2_vabsdiffw:
+def HEXAGON_M2_vabsdiffw:
di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
// MTYPE / ALU / XOR and xor with destination.
-def Hexagon_M2_xor_xacc:
+def HEXAGON_M2_xor_xacc:
si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>;
@@ -2316,91 +2347,91 @@ def Hexagon_M2_xor_xacc:
// MTYPE / COMPLEX / Complex multiply.
// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
-def Hexagon_M2_cmpys_s1:
+def HEXAGON_M2_cmpys_s1:
di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>;
-def Hexagon_M2_cmpys_s0:
+def HEXAGON_M2_cmpys_s0:
di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>;
-def Hexagon_M2_cmpysc_s1:
+def HEXAGON_M2_cmpysc_s1:
di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>;
-def Hexagon_M2_cmpysc_s0:
+def HEXAGON_M2_cmpysc_s0:
di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>;
-def Hexagon_M2_cmacs_s1:
+def HEXAGON_M2_cmacs_s1:
di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>;
-def Hexagon_M2_cmacs_s0:
+def HEXAGON_M2_cmacs_s0:
di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>;
-def Hexagon_M2_cmacsc_s1:
+def HEXAGON_M2_cmacsc_s1:
di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>;
-def Hexagon_M2_cmacsc_s0:
+def HEXAGON_M2_cmacsc_s0:
di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>;
-def Hexagon_M2_cnacs_s1:
+def HEXAGON_M2_cnacs_s1:
di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>;
-def Hexagon_M2_cnacs_s0:
+def HEXAGON_M2_cnacs_s0:
di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>;
-def Hexagon_M2_cnacsc_s1:
+def HEXAGON_M2_cnacsc_s1:
di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>;
-def Hexagon_M2_cnacsc_s0:
+def HEXAGON_M2_cnacsc_s0:
di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>;
// MTYPE / COMPLEX / Complex multiply real or imaginary.
-def Hexagon_M2_cmpyr_s0:
+def HEXAGON_M2_cmpyr_s0:
di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>;
-def Hexagon_M2_cmacr_s0:
+def HEXAGON_M2_cmacr_s0:
di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>;
-def Hexagon_M2_cmpyi_s0:
+def HEXAGON_M2_cmpyi_s0:
di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>;
-def Hexagon_M2_cmaci_s0:
+def HEXAGON_M2_cmaci_s0:
di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>;
// MTYPE / COMPLEX / Complex multiply with round and pack.
// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
-def Hexagon_M2_cmpyrs_s0:
+def HEXAGON_M2_cmpyrs_s0:
si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>;
-def Hexagon_M2_cmpyrs_s1:
+def HEXAGON_M2_cmpyrs_s1:
si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>;
-def Hexagon_M2_cmpyrsc_s0:
+def HEXAGON_M2_cmpyrsc_s0:
si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>;
-def Hexagon_M2_cmpyrsc_s1:
+def HEXAGON_M2_cmpyrsc_s1:
si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>;
//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
-def Hexagon_M2_vcmpy_s0_sat_i:
+def HEXAGON_M2_vcmpy_s0_sat_i:
di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>;
-def Hexagon_M2_vcmpy_s1_sat_i:
+def HEXAGON_M2_vcmpy_s1_sat_i:
di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>;
-def Hexagon_M2_vcmpy_s0_sat_r:
+def HEXAGON_M2_vcmpy_s0_sat_r:
di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>;
-def Hexagon_M2_vcmpy_s1_sat_r:
+def HEXAGON_M2_vcmpy_s1_sat_r:
di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>;
-def Hexagon_M2_vcmac_s0_sat_i:
+def HEXAGON_M2_vcmac_s0_sat_i:
di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>;
-def Hexagon_M2_vcmac_s0_sat_r:
+def HEXAGON_M2_vcmac_s0_sat_r:
di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>;
//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
-def Hexagon_M2_vrcmpyi_s0:
+def HEXAGON_M2_vrcmpyi_s0:
di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>;
-def Hexagon_M2_vrcmpyr_s0:
+def HEXAGON_M2_vrcmpyr_s0:
di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>;
-def Hexagon_M2_vrcmpyi_s0c:
+def HEXAGON_M2_vrcmpyi_s0c:
di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>;
-def Hexagon_M2_vrcmpyr_s0c:
+def HEXAGON_M2_vrcmpyr_s0c:
di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>;
-def Hexagon_M2_vrcmaci_s0:
+def HEXAGON_M2_vrcmaci_s0:
di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>;
-def Hexagon_M2_vrcmacr_s0:
+def HEXAGON_M2_vrcmacr_s0:
di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>;
-def Hexagon_M2_vrcmaci_s0c:
+def HEXAGON_M2_vrcmaci_s0c:
di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>;
-def Hexagon_M2_vrcmacr_s0c:
+def HEXAGON_M2_vrcmacr_s0c:
di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>;
@@ -2409,115 +2440,120 @@ def Hexagon_M2_vrcmacr_s0c:
*********************************************************************/
// MTYPE / MPYH / Multiply and use lower result.
-//def Hexagon_M2_mpysmi:
+//def HEXAGON_M2_mpysmi:
+//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9,
+// not si_MInst_sis9 - but for now, we will use s9.
+// def Hexagon_M2_mpysmi:
// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>;
-def Hexagon_M2_mpyi:
+def Hexagon_M2_mpysmi:
+ si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>;
+def HEXAGON_M2_mpyi:
si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>;
-def Hexagon_M2_mpyui:
+def HEXAGON_M2_mpyui:
si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>;
-def Hexagon_M2_macsip:
+def HEXAGON_M2_macsip:
si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>;
-def Hexagon_M2_maci:
+def HEXAGON_M2_maci:
si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>;
-def Hexagon_M2_macsin:
+def HEXAGON_M2_macsin:
si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>;
// MTYPE / MPYH / Multiply word by half (32x16).
//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
-def Hexagon_M2_mmpyl_rs1:
+def HEXAGON_M2_mmpyl_rs1:
di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>;
-def Hexagon_M2_mmpyl_s1:
+def HEXAGON_M2_mmpyl_s1:
di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>;
-def Hexagon_M2_mmpyl_rs0:
+def HEXAGON_M2_mmpyl_rs0:
di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>;
-def Hexagon_M2_mmpyl_s0:
+def HEXAGON_M2_mmpyl_s0:
di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>;
-def Hexagon_M2_mmpyh_rs1:
+def HEXAGON_M2_mmpyh_rs1:
di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>;
-def Hexagon_M2_mmpyh_s1:
+def HEXAGON_M2_mmpyh_s1:
di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>;
-def Hexagon_M2_mmpyh_rs0:
+def HEXAGON_M2_mmpyh_rs0:
di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>;
-def Hexagon_M2_mmpyh_s0:
+def HEXAGON_M2_mmpyh_s0:
di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>;
-def Hexagon_M2_mmacls_rs1:
+def HEXAGON_M2_mmacls_rs1:
di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>;
-def Hexagon_M2_mmacls_s1:
+def HEXAGON_M2_mmacls_s1:
di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>;
-def Hexagon_M2_mmacls_rs0:
+def HEXAGON_M2_mmacls_rs0:
di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>;
-def Hexagon_M2_mmacls_s0:
+def HEXAGON_M2_mmacls_s0:
di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>;
-def Hexagon_M2_mmachs_rs1:
+def HEXAGON_M2_mmachs_rs1:
di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>;
-def Hexagon_M2_mmachs_s1:
+def HEXAGON_M2_mmachs_s1:
di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>;
-def Hexagon_M2_mmachs_rs0:
+def HEXAGON_M2_mmachs_rs0:
di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>;
-def Hexagon_M2_mmachs_s0:
+def HEXAGON_M2_mmachs_s0:
di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>;
// MTYPE / MPYH / Multiply word by unsigned half (32x16).
//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
-def Hexagon_M2_mmpyul_rs1:
+def HEXAGON_M2_mmpyul_rs1:
di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
-def Hexagon_M2_mmpyul_s1:
+def HEXAGON_M2_mmpyul_s1:
di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
-def Hexagon_M2_mmpyul_rs0:
+def HEXAGON_M2_mmpyul_rs0:
di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
-def Hexagon_M2_mmpyul_s0:
+def HEXAGON_M2_mmpyul_s0:
di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
-def Hexagon_M2_mmpyuh_rs1:
+def HEXAGON_M2_mmpyuh_rs1:
di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
-def Hexagon_M2_mmpyuh_s1:
+def HEXAGON_M2_mmpyuh_s1:
di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
-def Hexagon_M2_mmpyuh_rs0:
+def HEXAGON_M2_mmpyuh_rs0:
di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
-def Hexagon_M2_mmpyuh_s0:
+def HEXAGON_M2_mmpyuh_s0:
di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
-def Hexagon_M2_mmaculs_rs1:
+def HEXAGON_M2_mmaculs_rs1:
di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
-def Hexagon_M2_mmaculs_s1:
+def HEXAGON_M2_mmaculs_s1:
di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
-def Hexagon_M2_mmaculs_rs0:
+def HEXAGON_M2_mmaculs_rs0:
di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
-def Hexagon_M2_mmaculs_s0:
+def HEXAGON_M2_mmaculs_s0:
di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
-def Hexagon_M2_mmacuhs_rs1:
+def HEXAGON_M2_mmacuhs_rs1:
di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
-def Hexagon_M2_mmacuhs_s1:
+def HEXAGON_M2_mmacuhs_s1:
di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
-def Hexagon_M2_mmacuhs_rs0:
+def HEXAGON_M2_mmacuhs_rs0:
di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
-def Hexagon_M2_mmacuhs_s0:
+def HEXAGON_M2_mmacuhs_s0:
di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
// MTYPE / MPYH / Multiply and use upper result.
-def Hexagon_M2_hmmpyh_rs1:
+def HEXAGON_M2_hmmpyh_rs1:
si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>;
-def Hexagon_M2_hmmpyl_rs1:
+def HEXAGON_M2_hmmpyl_rs1:
si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>;
-def Hexagon_M2_mpy_up:
+def HEXAGON_M2_mpy_up:
si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>;
-def Hexagon_M2_dpmpyss_rnd_s0:
+def HEXAGON_M2_dpmpyss_rnd_s0:
si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>;
-def Hexagon_M2_mpyu_up:
+def HEXAGON_M2_mpyu_up:
si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>;
// MTYPE / MPYH / Multiply and use full result.
-def Hexagon_M2_dpmpyuu_s0:
+def HEXAGON_M2_dpmpyuu_s0:
di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>;
-def Hexagon_M2_dpmpyuu_acc_s0:
+def HEXAGON_M2_dpmpyuu_acc_s0:
di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>;
-def Hexagon_M2_dpmpyuu_nac_s0:
+def HEXAGON_M2_dpmpyuu_nac_s0:
di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>;
-def Hexagon_M2_dpmpyss_s0:
+def HEXAGON_M2_dpmpyss_s0:
di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>;
-def Hexagon_M2_dpmpyss_acc_s0:
+def HEXAGON_M2_dpmpyss_acc_s0:
di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>;
-def Hexagon_M2_dpmpyss_nac_s0:
+def HEXAGON_M2_dpmpyss_nac_s0:
di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>;
@@ -2528,334 +2564,334 @@ def Hexagon_M2_dpmpyss_nac_s0:
// MTYPE / MPYS / Scalar 16x16 multiply signed.
//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
-def Hexagon_M2_mpy_hh_s0:
+def HEXAGON_M2_mpy_hh_s0:
si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>;
-def Hexagon_M2_mpy_hh_s1:
+def HEXAGON_M2_mpy_hh_s1:
si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>;
-def Hexagon_M2_mpy_rnd_hh_s1:
+def HEXAGON_M2_mpy_rnd_hh_s1:
si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>;
-def Hexagon_M2_mpy_sat_rnd_hh_s1:
+def HEXAGON_M2_mpy_sat_rnd_hh_s1:
si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>;
-def Hexagon_M2_mpy_sat_hh_s1:
+def HEXAGON_M2_mpy_sat_hh_s1:
si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>;
-def Hexagon_M2_mpy_rnd_hh_s0:
+def HEXAGON_M2_mpy_rnd_hh_s0:
si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>;
-def Hexagon_M2_mpy_sat_rnd_hh_s0:
+def HEXAGON_M2_mpy_sat_rnd_hh_s0:
si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-def Hexagon_M2_mpy_sat_hh_s0:
+def HEXAGON_M2_mpy_sat_hh_s0:
si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>;
-def Hexagon_M2_mpy_hl_s0:
+def HEXAGON_M2_mpy_hl_s0:
si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>;
-def Hexagon_M2_mpy_hl_s1:
+def HEXAGON_M2_mpy_hl_s1:
si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>;
-def Hexagon_M2_mpy_rnd_hl_s1:
+def HEXAGON_M2_mpy_rnd_hl_s1:
si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>;
-def Hexagon_M2_mpy_sat_rnd_hl_s1:
+def HEXAGON_M2_mpy_sat_rnd_hl_s1:
si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>;
-def Hexagon_M2_mpy_sat_hl_s1:
+def HEXAGON_M2_mpy_sat_hl_s1:
si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>;
-def Hexagon_M2_mpy_rnd_hl_s0:
+def HEXAGON_M2_mpy_rnd_hl_s0:
si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>;
-def Hexagon_M2_mpy_sat_rnd_hl_s0:
+def HEXAGON_M2_mpy_sat_rnd_hl_s0:
si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>;
-def Hexagon_M2_mpy_sat_hl_s0:
+def HEXAGON_M2_mpy_sat_hl_s0:
si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>;
-def Hexagon_M2_mpy_lh_s0:
+def HEXAGON_M2_mpy_lh_s0:
si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>;
-def Hexagon_M2_mpy_lh_s1:
+def HEXAGON_M2_mpy_lh_s1:
si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>;
-def Hexagon_M2_mpy_rnd_lh_s1:
+def HEXAGON_M2_mpy_rnd_lh_s1:
si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>;
-def Hexagon_M2_mpy_sat_rnd_lh_s1:
+def HEXAGON_M2_mpy_sat_rnd_lh_s1:
si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>;
-def Hexagon_M2_mpy_sat_lh_s1:
+def HEXAGON_M2_mpy_sat_lh_s1:
si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>;
-def Hexagon_M2_mpy_rnd_lh_s0:
+def HEXAGON_M2_mpy_rnd_lh_s0:
si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>;
-def Hexagon_M2_mpy_sat_rnd_lh_s0:
+def HEXAGON_M2_mpy_sat_rnd_lh_s0:
si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>;
-def Hexagon_M2_mpy_sat_lh_s0:
+def HEXAGON_M2_mpy_sat_lh_s0:
si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>;
-def Hexagon_M2_mpy_ll_s0:
+def HEXAGON_M2_mpy_ll_s0:
si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>;
-def Hexagon_M2_mpy_ll_s1:
+def HEXAGON_M2_mpy_ll_s1:
si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>;
-def Hexagon_M2_mpy_rnd_ll_s1:
+def HEXAGON_M2_mpy_rnd_ll_s1:
si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>;
-def Hexagon_M2_mpy_sat_rnd_ll_s1:
+def HEXAGON_M2_mpy_sat_rnd_ll_s1:
si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>;
-def Hexagon_M2_mpy_sat_ll_s1:
+def HEXAGON_M2_mpy_sat_ll_s1:
si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>;
-def Hexagon_M2_mpy_rnd_ll_s0:
+def HEXAGON_M2_mpy_rnd_ll_s0:
si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>;
-def Hexagon_M2_mpy_sat_rnd_ll_s0:
+def HEXAGON_M2_mpy_sat_rnd_ll_s0:
si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>;
-def Hexagon_M2_mpy_sat_ll_s0:
+def HEXAGON_M2_mpy_sat_ll_s0:
si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>;
//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
-def Hexagon_M2_mpyd_hh_s0:
+def HEXAGON_M2_mpyd_hh_s0:
di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>;
-def Hexagon_M2_mpyd_hh_s1:
+def HEXAGON_M2_mpyd_hh_s1:
di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>;
-def Hexagon_M2_mpyd_rnd_hh_s1:
+def HEXAGON_M2_mpyd_rnd_hh_s1:
di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>;
-def Hexagon_M2_mpyd_rnd_hh_s0:
+def HEXAGON_M2_mpyd_rnd_hh_s0:
di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>;
-def Hexagon_M2_mpyd_hl_s0:
+def HEXAGON_M2_mpyd_hl_s0:
di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>;
-def Hexagon_M2_mpyd_hl_s1:
+def HEXAGON_M2_mpyd_hl_s1:
di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>;
-def Hexagon_M2_mpyd_rnd_hl_s1:
+def HEXAGON_M2_mpyd_rnd_hl_s1:
di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>;
-def Hexagon_M2_mpyd_rnd_hl_s0:
+def HEXAGON_M2_mpyd_rnd_hl_s0:
di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>;
-def Hexagon_M2_mpyd_lh_s0:
+def HEXAGON_M2_mpyd_lh_s0:
di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>;
-def Hexagon_M2_mpyd_lh_s1:
+def HEXAGON_M2_mpyd_lh_s1:
di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>;
-def Hexagon_M2_mpyd_rnd_lh_s1:
+def HEXAGON_M2_mpyd_rnd_lh_s1:
di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>;
-def Hexagon_M2_mpyd_rnd_lh_s0:
+def HEXAGON_M2_mpyd_rnd_lh_s0:
di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>;
-def Hexagon_M2_mpyd_ll_s0:
+def HEXAGON_M2_mpyd_ll_s0:
di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>;
-def Hexagon_M2_mpyd_ll_s1:
+def HEXAGON_M2_mpyd_ll_s1:
di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>;
-def Hexagon_M2_mpyd_rnd_ll_s1:
+def HEXAGON_M2_mpyd_rnd_ll_s1:
di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>;
-def Hexagon_M2_mpyd_rnd_ll_s0:
+def HEXAGON_M2_mpyd_rnd_ll_s0:
di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>;
//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
-def Hexagon_M2_mpy_acc_hh_s0:
+def HEXAGON_M2_mpy_acc_hh_s0:
si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>;
-def Hexagon_M2_mpy_acc_hh_s1:
+def HEXAGON_M2_mpy_acc_hh_s1:
si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>;
-def Hexagon_M2_mpy_acc_sat_hh_s1:
+def HEXAGON_M2_mpy_acc_sat_hh_s1:
si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>;
-def Hexagon_M2_mpy_acc_sat_hh_s0:
+def HEXAGON_M2_mpy_acc_sat_hh_s0:
si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>;
-def Hexagon_M2_mpy_acc_hl_s0:
+def HEXAGON_M2_mpy_acc_hl_s0:
si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>;
-def Hexagon_M2_mpy_acc_hl_s1:
+def HEXAGON_M2_mpy_acc_hl_s1:
si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>;
-def Hexagon_M2_mpy_acc_sat_hl_s1:
+def HEXAGON_M2_mpy_acc_sat_hl_s1:
si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>;
-def Hexagon_M2_mpy_acc_sat_hl_s0:
+def HEXAGON_M2_mpy_acc_sat_hl_s0:
si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>;
-def Hexagon_M2_mpy_acc_lh_s0:
+def HEXAGON_M2_mpy_acc_lh_s0:
si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>;
-def Hexagon_M2_mpy_acc_lh_s1:
+def HEXAGON_M2_mpy_acc_lh_s1:
si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>;
-def Hexagon_M2_mpy_acc_sat_lh_s1:
+def HEXAGON_M2_mpy_acc_sat_lh_s1:
si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>;
-def Hexagon_M2_mpy_acc_sat_lh_s0:
+def HEXAGON_M2_mpy_acc_sat_lh_s0:
si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>;
-def Hexagon_M2_mpy_acc_ll_s0:
+def HEXAGON_M2_mpy_acc_ll_s0:
si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>;
-def Hexagon_M2_mpy_acc_ll_s1:
+def HEXAGON_M2_mpy_acc_ll_s1:
si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>;
-def Hexagon_M2_mpy_acc_sat_ll_s1:
+def HEXAGON_M2_mpy_acc_sat_ll_s1:
si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>;
-def Hexagon_M2_mpy_acc_sat_ll_s0:
+def HEXAGON_M2_mpy_acc_sat_ll_s0:
si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>;
//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
-def Hexagon_M2_mpy_nac_hh_s0:
+def HEXAGON_M2_mpy_nac_hh_s0:
si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>;
-def Hexagon_M2_mpy_nac_hh_s1:
+def HEXAGON_M2_mpy_nac_hh_s1:
si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>;
-def Hexagon_M2_mpy_nac_sat_hh_s1:
+def HEXAGON_M2_mpy_nac_sat_hh_s1:
si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>;
-def Hexagon_M2_mpy_nac_sat_hh_s0:
+def HEXAGON_M2_mpy_nac_sat_hh_s0:
si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>;
-def Hexagon_M2_mpy_nac_hl_s0:
+def HEXAGON_M2_mpy_nac_hl_s0:
si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>;
-def Hexagon_M2_mpy_nac_hl_s1:
+def HEXAGON_M2_mpy_nac_hl_s1:
si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>;
-def Hexagon_M2_mpy_nac_sat_hl_s1:
+def HEXAGON_M2_mpy_nac_sat_hl_s1:
si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>;
-def Hexagon_M2_mpy_nac_sat_hl_s0:
+def HEXAGON_M2_mpy_nac_sat_hl_s0:
si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>;
-def Hexagon_M2_mpy_nac_lh_s0:
+def HEXAGON_M2_mpy_nac_lh_s0:
si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>;
-def Hexagon_M2_mpy_nac_lh_s1:
+def HEXAGON_M2_mpy_nac_lh_s1:
si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>;
-def Hexagon_M2_mpy_nac_sat_lh_s1:
+def HEXAGON_M2_mpy_nac_sat_lh_s1:
si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>;
-def Hexagon_M2_mpy_nac_sat_lh_s0:
+def HEXAGON_M2_mpy_nac_sat_lh_s0:
si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>;
-def Hexagon_M2_mpy_nac_ll_s0:
+def HEXAGON_M2_mpy_nac_ll_s0:
si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>;
-def Hexagon_M2_mpy_nac_ll_s1:
+def HEXAGON_M2_mpy_nac_ll_s1:
si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>;
-def Hexagon_M2_mpy_nac_sat_ll_s1:
+def HEXAGON_M2_mpy_nac_sat_ll_s1:
si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>;
-def Hexagon_M2_mpy_nac_sat_ll_s0:
+def HEXAGON_M2_mpy_nac_sat_ll_s0:
si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>;
//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
-def Hexagon_M2_mpyd_acc_hh_s0:
+def HEXAGON_M2_mpyd_acc_hh_s0:
di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>;
-def Hexagon_M2_mpyd_acc_hh_s1:
+def HEXAGON_M2_mpyd_acc_hh_s1:
di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>;
-def Hexagon_M2_mpyd_acc_hl_s0:
+def HEXAGON_M2_mpyd_acc_hl_s0:
di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>;
-def Hexagon_M2_mpyd_acc_hl_s1:
+def HEXAGON_M2_mpyd_acc_hl_s1:
di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>;
-def Hexagon_M2_mpyd_acc_lh_s0:
+def HEXAGON_M2_mpyd_acc_lh_s0:
di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>;
-def Hexagon_M2_mpyd_acc_lh_s1:
+def HEXAGON_M2_mpyd_acc_lh_s1:
di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>;
-def Hexagon_M2_mpyd_acc_ll_s0:
+def HEXAGON_M2_mpyd_acc_ll_s0:
di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>;
-def Hexagon_M2_mpyd_acc_ll_s1:
+def HEXAGON_M2_mpyd_acc_ll_s1:
di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>;
//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
-def Hexagon_M2_mpyd_nac_hh_s0:
+def HEXAGON_M2_mpyd_nac_hh_s0:
di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>;
-def Hexagon_M2_mpyd_nac_hh_s1:
+def HEXAGON_M2_mpyd_nac_hh_s1:
di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>;
-def Hexagon_M2_mpyd_nac_hl_s0:
+def HEXAGON_M2_mpyd_nac_hl_s0:
di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>;
-def Hexagon_M2_mpyd_nac_hl_s1:
+def HEXAGON_M2_mpyd_nac_hl_s1:
di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>;
-def Hexagon_M2_mpyd_nac_lh_s0:
+def HEXAGON_M2_mpyd_nac_lh_s0:
di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>;
-def Hexagon_M2_mpyd_nac_lh_s1:
+def HEXAGON_M2_mpyd_nac_lh_s1:
di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>;
-def Hexagon_M2_mpyd_nac_ll_s0:
+def HEXAGON_M2_mpyd_nac_ll_s0:
di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>;
-def Hexagon_M2_mpyd_nac_ll_s1:
+def HEXAGON_M2_mpyd_nac_ll_s1:
di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>;
// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def Hexagon_M2_mpyu_hh_s0:
+def HEXAGON_M2_mpyu_hh_s0:
si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>;
-def Hexagon_M2_mpyu_hh_s1:
+def HEXAGON_M2_mpyu_hh_s1:
si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>;
-def Hexagon_M2_mpyu_hl_s0:
+def HEXAGON_M2_mpyu_hl_s0:
si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>;
-def Hexagon_M2_mpyu_hl_s1:
+def HEXAGON_M2_mpyu_hl_s1:
si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>;
-def Hexagon_M2_mpyu_lh_s0:
+def HEXAGON_M2_mpyu_lh_s0:
si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>;
-def Hexagon_M2_mpyu_lh_s1:
+def HEXAGON_M2_mpyu_lh_s1:
si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>;
-def Hexagon_M2_mpyu_ll_s0:
+def HEXAGON_M2_mpyu_ll_s0:
si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>;
-def Hexagon_M2_mpyu_ll_s1:
+def HEXAGON_M2_mpyu_ll_s1:
si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>;
//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def Hexagon_M2_mpyud_hh_s0:
+def HEXAGON_M2_mpyud_hh_s0:
di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>;
-def Hexagon_M2_mpyud_hh_s1:
+def HEXAGON_M2_mpyud_hh_s1:
di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>;
-def Hexagon_M2_mpyud_hl_s0:
+def HEXAGON_M2_mpyud_hl_s0:
di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>;
-def Hexagon_M2_mpyud_hl_s1:
+def HEXAGON_M2_mpyud_hl_s1:
di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>;
-def Hexagon_M2_mpyud_lh_s0:
+def HEXAGON_M2_mpyud_lh_s0:
di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>;
-def Hexagon_M2_mpyud_lh_s1:
+def HEXAGON_M2_mpyud_lh_s1:
di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>;
-def Hexagon_M2_mpyud_ll_s0:
+def HEXAGON_M2_mpyud_ll_s0:
di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>;
-def Hexagon_M2_mpyud_ll_s1:
+def HEXAGON_M2_mpyud_ll_s1:
di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>;
//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def Hexagon_M2_mpyu_acc_hh_s0:
+def HEXAGON_M2_mpyu_acc_hh_s0:
si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>;
-def Hexagon_M2_mpyu_acc_hh_s1:
+def HEXAGON_M2_mpyu_acc_hh_s1:
si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>;
-def Hexagon_M2_mpyu_acc_hl_s0:
+def HEXAGON_M2_mpyu_acc_hl_s0:
si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>;
-def Hexagon_M2_mpyu_acc_hl_s1:
+def HEXAGON_M2_mpyu_acc_hl_s1:
si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>;
-def Hexagon_M2_mpyu_acc_lh_s0:
+def HEXAGON_M2_mpyu_acc_lh_s0:
si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>;
-def Hexagon_M2_mpyu_acc_lh_s1:
+def HEXAGON_M2_mpyu_acc_lh_s1:
si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>;
-def Hexagon_M2_mpyu_acc_ll_s0:
+def HEXAGON_M2_mpyu_acc_ll_s0:
si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>;
-def Hexagon_M2_mpyu_acc_ll_s1:
+def HEXAGON_M2_mpyu_acc_ll_s1:
si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>;
//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def Hexagon_M2_mpyu_nac_hh_s0:
+def HEXAGON_M2_mpyu_nac_hh_s0:
si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>;
-def Hexagon_M2_mpyu_nac_hh_s1:
+def HEXAGON_M2_mpyu_nac_hh_s1:
si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>;
-def Hexagon_M2_mpyu_nac_hl_s0:
+def HEXAGON_M2_mpyu_nac_hl_s0:
si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>;
-def Hexagon_M2_mpyu_nac_hl_s1:
+def HEXAGON_M2_mpyu_nac_hl_s1:
si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>;
-def Hexagon_M2_mpyu_nac_lh_s0:
+def HEXAGON_M2_mpyu_nac_lh_s0:
si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>;
-def Hexagon_M2_mpyu_nac_lh_s1:
+def HEXAGON_M2_mpyu_nac_lh_s1:
si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>;
-def Hexagon_M2_mpyu_nac_ll_s0:
+def HEXAGON_M2_mpyu_nac_ll_s0:
si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>;
-def Hexagon_M2_mpyu_nac_ll_s1:
+def HEXAGON_M2_mpyu_nac_ll_s1:
si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>;
//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def Hexagon_M2_mpyud_acc_hh_s0:
+def HEXAGON_M2_mpyud_acc_hh_s0:
di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
-def Hexagon_M2_mpyud_acc_hh_s1:
+def HEXAGON_M2_mpyud_acc_hh_s1:
di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
-def Hexagon_M2_mpyud_acc_hl_s0:
+def HEXAGON_M2_mpyud_acc_hl_s0:
di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
-def Hexagon_M2_mpyud_acc_hl_s1:
+def HEXAGON_M2_mpyud_acc_hl_s1:
di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
-def Hexagon_M2_mpyud_acc_lh_s0:
+def HEXAGON_M2_mpyud_acc_lh_s0:
di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
-def Hexagon_M2_mpyud_acc_lh_s1:
+def HEXAGON_M2_mpyud_acc_lh_s1:
di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
-def Hexagon_M2_mpyud_acc_ll_s0:
+def HEXAGON_M2_mpyud_acc_ll_s0:
di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
-def Hexagon_M2_mpyud_acc_ll_s1:
+def HEXAGON_M2_mpyud_acc_ll_s1:
di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def Hexagon_M2_mpyud_nac_hh_s0:
+def HEXAGON_M2_mpyud_nac_hh_s0:
di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
-def Hexagon_M2_mpyud_nac_hh_s1:
+def HEXAGON_M2_mpyud_nac_hh_s1:
di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
-def Hexagon_M2_mpyud_nac_hl_s0:
+def HEXAGON_M2_mpyud_nac_hl_s0:
di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
-def Hexagon_M2_mpyud_nac_hl_s1:
+def HEXAGON_M2_mpyud_nac_hl_s1:
di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
-def Hexagon_M2_mpyud_nac_lh_s0:
+def HEXAGON_M2_mpyud_nac_lh_s0:
di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
-def Hexagon_M2_mpyud_nac_lh_s1:
+def HEXAGON_M2_mpyud_nac_lh_s1:
di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
-def Hexagon_M2_mpyud_nac_ll_s0:
+def HEXAGON_M2_mpyud_nac_ll_s0:
di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
-def Hexagon_M2_mpyud_nac_ll_s1:
+def HEXAGON_M2_mpyud_nac_ll_s1:
di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
@@ -2864,15 +2900,15 @@ def Hexagon_M2_mpyud_nac_ll_s1:
*********************************************************************/
// MTYPE / VB / Vector reduce add unsigned bytes.
-def Hexagon_A2_vraddub:
+def HEXAGON_A2_vraddub:
di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>;
-def Hexagon_A2_vraddub_acc:
+def HEXAGON_A2_vraddub_acc:
di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>;
// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
-def Hexagon_A2_vrsadub:
+def HEXAGON_A2_vrsadub:
di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>;
-def Hexagon_A2_vrsadub_acc:
+def HEXAGON_A2_vrsadub_acc:
di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>;
/********************************************************************
@@ -2880,56 +2916,56 @@ def Hexagon_A2_vrsadub_acc:
*********************************************************************/
// MTYPE / VH / Vector dual multiply.
-def Hexagon_M2_vdmpys_s1:
+def HEXAGON_M2_vdmpys_s1:
di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>;
-def Hexagon_M2_vdmpys_s0:
+def HEXAGON_M2_vdmpys_s0:
di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>;
-def Hexagon_M2_vdmacs_s1:
+def HEXAGON_M2_vdmacs_s1:
di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>;
-def Hexagon_M2_vdmacs_s0:
+def HEXAGON_M2_vdmacs_s0:
di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>;
// MTYPE / VH / Vector dual multiply with round and pack.
-def Hexagon_M2_vdmpyrs_s0:
+def HEXAGON_M2_vdmpyrs_s0:
si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>;
-def Hexagon_M2_vdmpyrs_s1:
+def HEXAGON_M2_vdmpyrs_s1:
si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>;
// MTYPE / VH / Vector multiply even halfwords.
-def Hexagon_M2_vmpy2es_s1:
+def HEXAGON_M2_vmpy2es_s1:
di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>;
-def Hexagon_M2_vmpy2es_s0:
+def HEXAGON_M2_vmpy2es_s0:
di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>;
-def Hexagon_M2_vmac2es:
+def HEXAGON_M2_vmac2es:
di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>;
-def Hexagon_M2_vmac2es_s1:
+def HEXAGON_M2_vmac2es_s1:
di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>;
-def Hexagon_M2_vmac2es_s0:
+def HEXAGON_M2_vmac2es_s0:
di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>;
// MTYPE / VH / Vector multiply halfwords.
-def Hexagon_M2_vmpy2s_s0:
+def HEXAGON_M2_vmpy2s_s0:
di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>;
-def Hexagon_M2_vmpy2s_s1:
+def HEXAGON_M2_vmpy2s_s1:
di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>;
-def Hexagon_M2_vmac2:
+def HEXAGON_M2_vmac2:
di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>;
-def Hexagon_M2_vmac2s_s0:
+def HEXAGON_M2_vmac2s_s0:
di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>;
-def Hexagon_M2_vmac2s_s1:
+def HEXAGON_M2_vmac2s_s1:
di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>;
// MTYPE / VH / Vector multiply halfwords with round and pack.
-def Hexagon_M2_vmpy2s_s0pack:
+def HEXAGON_M2_vmpy2s_s0pack:
si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>;
-def Hexagon_M2_vmpy2s_s1pack:
+def HEXAGON_M2_vmpy2s_s1pack:
si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>;
// MTYPE / VH / Vector reduce multiply halfwords.
// Rxx32+=vrmpyh(Rss32,Rtt32)
-def Hexagon_M2_vrmpy_s0:
+def HEXAGON_M2_vrmpy_s0:
di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>;
-def Hexagon_M2_vrmac_s0:
+def HEXAGON_M2_vrmac_s0:
di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>;
@@ -2938,25 +2974,25 @@ def Hexagon_M2_vrmac_s0:
*********************************************************************/
// STYPE / ALU / Absolute value.
-def Hexagon_A2_abs:
+def HEXAGON_A2_abs:
si_SInst_si <"abs", int_hexagon_A2_abs>;
-def Hexagon_A2_absp:
+def HEXAGON_A2_absp:
di_SInst_di <"abs", int_hexagon_A2_absp>;
-def Hexagon_A2_abssat:
+def HEXAGON_A2_abssat:
si_SInst_si_sat <"abs", int_hexagon_A2_abssat>;
// STYPE / ALU / Negate.
-def Hexagon_A2_negp:
+def HEXAGON_A2_negp:
di_SInst_di <"neg", int_hexagon_A2_negp>;
-def Hexagon_A2_negsat:
+def HEXAGON_A2_negsat:
si_SInst_si_sat <"neg", int_hexagon_A2_negsat>;
// STYPE / ALU / Logical Not.
-def Hexagon_A2_notp:
+def HEXAGON_A2_notp:
di_SInst_di <"not", int_hexagon_A2_notp>;
// STYPE / ALU / Sign extend word to doubleword.
-def Hexagon_A2_sxtw:
+def HEXAGON_A2_sxtw:
di_SInst_si <"sxtw", int_hexagon_A2_sxtw>;
@@ -2965,88 +3001,88 @@ def Hexagon_A2_sxtw:
*********************************************************************/
// STYPE / BIT / Count leading.
-def Hexagon_S2_cl0:
+def HEXAGON_S2_cl0:
si_SInst_si <"cl0", int_hexagon_S2_cl0>;
-def Hexagon_S2_cl0p:
+def HEXAGON_S2_cl0p:
si_SInst_di <"cl0", int_hexagon_S2_cl0p>;
-def Hexagon_S2_cl1:
+def HEXAGON_S2_cl1:
si_SInst_si <"cl1", int_hexagon_S2_cl1>;
-def Hexagon_S2_cl1p:
+def HEXAGON_S2_cl1p:
si_SInst_di <"cl1", int_hexagon_S2_cl1p>;
-def Hexagon_S2_clb:
+def HEXAGON_S2_clb:
si_SInst_si <"clb", int_hexagon_S2_clb>;
-def Hexagon_S2_clbp:
+def HEXAGON_S2_clbp:
si_SInst_di <"clb", int_hexagon_S2_clbp>;
-def Hexagon_S2_clbnorm:
+def HEXAGON_S2_clbnorm:
si_SInst_si <"normamt", int_hexagon_S2_clbnorm>;
// STYPE / BIT / Count trailing.
-def Hexagon_S2_ct0:
+def HEXAGON_S2_ct0:
si_SInst_si <"ct0", int_hexagon_S2_ct0>;
-def Hexagon_S2_ct1:
+def HEXAGON_S2_ct1:
si_SInst_si <"ct1", int_hexagon_S2_ct1>;
// STYPE / BIT / Compare bit mask.
-def HEXAGON_C2_bitsclr:
+def Hexagon_C2_bitsclr:
qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>;
-def HEXAGON_C2_bitsclri:
+def Hexagon_C2_bitsclri:
qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>;
-def HEXAGON_C2_bitsset:
+def Hexagon_C2_bitsset:
qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>;
// STYPE / BIT / Extract unsigned.
// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
-def Hexagon_S2_extractu:
+def HEXAGON_S2_extractu:
si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>;
-def Hexagon_S2_extractu_rp:
+def HEXAGON_S2_extractu_rp:
si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>;
-def Hexagon_S2_extractup:
+def HEXAGON_S2_extractup:
di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>;
-def Hexagon_S2_extractup_rp:
+def HEXAGON_S2_extractup_rp:
di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>;
// STYPE / BIT / Insert bitfield.
-def HEXAGON_S2_insert:
+def Hexagon_S2_insert:
si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>;
-def HEXAGON_S2_insert_rp:
+def Hexagon_S2_insert_rp:
si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>;
-def HEXAGON_S2_insertp:
+def Hexagon_S2_insertp:
di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>;
-def HEXAGON_S2_insertp_rp:
+def Hexagon_S2_insertp_rp:
di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>;
// STYPE / BIT / Innterleave/deinterleave.
-def HEXAGON_S2_interleave:
+def Hexagon_S2_interleave:
di_SInst_di <"interleave", int_hexagon_S2_interleave>;
-def HEXAGON_S2_deinterleave:
+def Hexagon_S2_deinterleave:
di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>;
// STYPE / BIT / Linear feedback-shift Iteration.
-def HEXAGON_S2_lfsp:
+def Hexagon_S2_lfsp:
di_SInst_didi <"lfs", int_hexagon_S2_lfsp>;
// STYPE / BIT / Bit reverse.
-def HEXAGON_S2_brev:
+def Hexagon_S2_brev:
si_SInst_si <"brev", int_hexagon_S2_brev>;
// STYPE / BIT / Set/Clear/Toggle Bit.
-def Hexagon_S2_setbit_i:
+def HEXAGON_S2_setbit_i:
si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>;
-def Hexagon_S2_togglebit_i:
+def HEXAGON_S2_togglebit_i:
si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>;
-def Hexagon_S2_clrbit_i:
+def HEXAGON_S2_clrbit_i:
si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>;
-def Hexagon_S2_setbit_r:
+def HEXAGON_S2_setbit_r:
si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>;
-def Hexagon_S2_togglebit_r:
+def HEXAGON_S2_togglebit_r:
si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>;
-def Hexagon_S2_clrbit_r:
+def HEXAGON_S2_clrbit_r:
si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>;
// STYPE / BIT / Test Bit.
-def Hexagon_S2_tstbit_i:
+def HEXAGON_S2_tstbit_i:
qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>;
-def Hexagon_S2_tstbit_r:
+def HEXAGON_S2_tstbit_r:
qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>;
@@ -3055,11 +3091,11 @@ def Hexagon_S2_tstbit_r:
*********************************************************************/
// STYPE / COMPLEX / Vector Complex conjugate.
-def Hexagon_A2_vconj:
+def HEXAGON_A2_vconj:
di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>;
// STYPE / COMPLEX / Vector Complex rotate.
-def Hexagon_S2_vcrotate:
+def HEXAGON_S2_vcrotate:
di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>;
@@ -3068,102 +3104,102 @@ def Hexagon_S2_vcrotate:
*********************************************************************/
// STYPE / PERM / Saturate.
-def Hexagon_A2_sat:
+def HEXAGON_A2_sat:
si_SInst_di <"sat", int_hexagon_A2_sat>;
-def Hexagon_A2_satb:
+def HEXAGON_A2_satb:
si_SInst_si <"satb", int_hexagon_A2_satb>;
-def Hexagon_A2_sath:
+def HEXAGON_A2_sath:
si_SInst_si <"sath", int_hexagon_A2_sath>;
-def Hexagon_A2_satub:
+def HEXAGON_A2_satub:
si_SInst_si <"satub", int_hexagon_A2_satub>;
-def Hexagon_A2_satuh:
+def HEXAGON_A2_satuh:
si_SInst_si <"satuh", int_hexagon_A2_satuh>;
// STYPE / PERM / Swizzle bytes.
-def Hexagon_A2_swiz:
+def HEXAGON_A2_swiz:
si_SInst_si <"swiz", int_hexagon_A2_swiz>;
// STYPE / PERM / Vector align.
// Need custom lowering
-def Hexagon_S2_valignib:
+def HEXAGON_S2_valignib:
di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>;
-def Hexagon_S2_valignrb:
+def HEXAGON_S2_valignrb:
di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>;
// STYPE / PERM / Vector round and pack.
-def Hexagon_S2_vrndpackwh:
+def HEXAGON_S2_vrndpackwh:
si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>;
-def Hexagon_S2_vrndpackwhs:
+def HEXAGON_S2_vrndpackwhs:
si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>;
// STYPE / PERM / Vector saturate and pack.
-def Hexagon_S2_svsathb:
+def HEXAGON_S2_svsathb:
si_SInst_si <"vsathb", int_hexagon_S2_svsathb>;
-def Hexagon_S2_vsathb:
+def HEXAGON_S2_vsathb:
si_SInst_di <"vsathb", int_hexagon_S2_vsathb>;
-def Hexagon_S2_svsathub:
+def HEXAGON_S2_svsathub:
si_SInst_si <"vsathub", int_hexagon_S2_svsathub>;
-def Hexagon_S2_vsathub:
+def HEXAGON_S2_vsathub:
si_SInst_di <"vsathub", int_hexagon_S2_vsathub>;
-def Hexagon_S2_vsatwh:
+def HEXAGON_S2_vsatwh:
si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>;
-def Hexagon_S2_vsatwuh:
+def HEXAGON_S2_vsatwuh:
si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>;
// STYPE / PERM / Vector saturate without pack.
-def Hexagon_S2_vsathb_nopack:
+def HEXAGON_S2_vsathb_nopack:
di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>;
-def Hexagon_S2_vsathub_nopack:
+def HEXAGON_S2_vsathub_nopack:
di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>;
-def Hexagon_S2_vsatwh_nopack:
+def HEXAGON_S2_vsatwh_nopack:
di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>;
-def Hexagon_S2_vsatwuh_nopack:
+def HEXAGON_S2_vsatwuh_nopack:
di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
// STYPE / PERM / Vector shuffle.
-def Hexagon_S2_shuffeb:
+def HEXAGON_S2_shuffeb:
di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>;
-def Hexagon_S2_shuffeh:
+def HEXAGON_S2_shuffeh:
di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>;
-def Hexagon_S2_shuffob:
+def HEXAGON_S2_shuffob:
di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>;
-def Hexagon_S2_shuffoh:
+def HEXAGON_S2_shuffoh:
di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>;
// STYPE / PERM / Vector splat bytes.
-def Hexagon_S2_vsplatrb:
+def HEXAGON_S2_vsplatrb:
si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>;
// STYPE / PERM / Vector splat halfwords.
-def Hexagon_S2_vsplatrh:
+def HEXAGON_S2_vsplatrh:
di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>;
// STYPE / PERM / Vector splice.
-def HEXAGON_S2_vsplicerb:
+def Hexagon_S2_vsplicerb:
di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>;
-def HEXAGON_S2_vspliceib:
+def Hexagon_S2_vspliceib:
di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>;
// STYPE / PERM / Sign extend.
-def Hexagon_S2_vsxtbh:
+def HEXAGON_S2_vsxtbh:
di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>;
-def Hexagon_S2_vsxthw:
+def HEXAGON_S2_vsxthw:
di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>;
// STYPE / PERM / Truncate.
-def Hexagon_S2_vtrunehb:
+def HEXAGON_S2_vtrunehb:
si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>;
-def Hexagon_S2_vtrunohb:
+def HEXAGON_S2_vtrunohb:
si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>;
-def Hexagon_S2_vtrunewh:
+def HEXAGON_S2_vtrunewh:
di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>;
-def Hexagon_S2_vtrunowh:
+def HEXAGON_S2_vtrunowh:
di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>;
// STYPE / PERM / Zero extend.
-def Hexagon_S2_vzxtbh:
+def HEXAGON_S2_vzxtbh:
di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>;
-def Hexagon_S2_vzxthw:
+def HEXAGON_S2_vzxthw:
di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>;
@@ -3172,17 +3208,17 @@ def Hexagon_S2_vzxthw:
*********************************************************************/
// STYPE / PRED / Mask generate from predicate.
-def Hexagon_C2_mask:
+def HEXAGON_C2_mask:
di_SInst_qi <"mask", int_hexagon_C2_mask>;
// STYPE / PRED / Predicate transfer.
-def Hexagon_C2_tfrpr:
+def HEXAGON_C2_tfrpr:
si_SInst_qi <"", int_hexagon_C2_tfrpr>;
-def Hexagon_C2_tfrrp:
+def HEXAGON_C2_tfrrp:
qi_SInst_si <"", int_hexagon_C2_tfrrp>;
// STYPE / PRED / Viterbi pack even and odd predicate bits.
-def Hexagon_C2_vitpack:
+def HEXAGON_C2_vitpack:
si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>;
@@ -3191,202 +3227,202 @@ def Hexagon_C2_vitpack:
*********************************************************************/
// STYPE / SHIFT / Shift by immediate.
-def Hexagon_S2_asl_i_r:
+def HEXAGON_S2_asl_i_r:
si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>;
-def Hexagon_S2_asr_i_r:
+def HEXAGON_S2_asr_i_r:
si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>;
-def Hexagon_S2_lsr_i_r:
+def HEXAGON_S2_lsr_i_r:
si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>;
-def Hexagon_S2_asl_i_p:
+def HEXAGON_S2_asl_i_p:
di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>;
-def Hexagon_S2_asr_i_p:
+def HEXAGON_S2_asr_i_p:
di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>;
-def Hexagon_S2_lsr_i_p:
+def HEXAGON_S2_lsr_i_p:
di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>;
// STYPE / SHIFT / Shift by immediate and accumulate.
-def Hexagon_S2_asl_i_r_acc:
+def HEXAGON_S2_asl_i_r_acc:
si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>;
-def Hexagon_S2_asr_i_r_acc:
+def HEXAGON_S2_asr_i_r_acc:
si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>;
-def Hexagon_S2_lsr_i_r_acc:
+def HEXAGON_S2_lsr_i_r_acc:
si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>;
-def Hexagon_S2_asl_i_r_nac:
+def HEXAGON_S2_asl_i_r_nac:
si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>;
-def Hexagon_S2_asr_i_r_nac:
+def HEXAGON_S2_asr_i_r_nac:
si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>;
-def Hexagon_S2_lsr_i_r_nac:
+def HEXAGON_S2_lsr_i_r_nac:
si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>;
-def Hexagon_S2_asl_i_p_acc:
+def HEXAGON_S2_asl_i_p_acc:
di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>;
-def Hexagon_S2_asr_i_p_acc:
+def HEXAGON_S2_asr_i_p_acc:
di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>;
-def Hexagon_S2_lsr_i_p_acc:
+def HEXAGON_S2_lsr_i_p_acc:
di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>;
-def Hexagon_S2_asl_i_p_nac:
+def HEXAGON_S2_asl_i_p_nac:
di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>;
-def Hexagon_S2_asr_i_p_nac:
+def HEXAGON_S2_asr_i_p_nac:
di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>;
-def Hexagon_S2_lsr_i_p_nac:
+def HEXAGON_S2_lsr_i_p_nac:
di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>;
// STYPE / SHIFT / Shift by immediate and add.
-def Hexagon_S2_addasl_rrri:
+def HEXAGON_S2_addasl_rrri:
si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>;
// STYPE / SHIFT / Shift by immediate and logical.
-def Hexagon_S2_asl_i_r_and:
+def HEXAGON_S2_asl_i_r_and:
si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>;
-def Hexagon_S2_asr_i_r_and:
+def HEXAGON_S2_asr_i_r_and:
si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>;
-def Hexagon_S2_lsr_i_r_and:
+def HEXAGON_S2_lsr_i_r_and:
si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>;
-def Hexagon_S2_asl_i_r_xacc:
+def HEXAGON_S2_asl_i_r_xacc:
si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>;
-def Hexagon_S2_lsr_i_r_xacc:
+def HEXAGON_S2_lsr_i_r_xacc:
si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>;
-def Hexagon_S2_asl_i_r_or:
+def HEXAGON_S2_asl_i_r_or:
si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>;
-def Hexagon_S2_asr_i_r_or:
+def HEXAGON_S2_asr_i_r_or:
si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>;
-def Hexagon_S2_lsr_i_r_or:
+def HEXAGON_S2_lsr_i_r_or:
si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>;
-def Hexagon_S2_asl_i_p_and:
+def HEXAGON_S2_asl_i_p_and:
di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>;
-def Hexagon_S2_asr_i_p_and:
+def HEXAGON_S2_asr_i_p_and:
di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>;
-def Hexagon_S2_lsr_i_p_and:
+def HEXAGON_S2_lsr_i_p_and:
di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>;
-def Hexagon_S2_asl_i_p_xacc:
+def HEXAGON_S2_asl_i_p_xacc:
di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>;
-def Hexagon_S2_lsr_i_p_xacc:
+def HEXAGON_S2_lsr_i_p_xacc:
di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>;
-def Hexagon_S2_asl_i_p_or:
+def HEXAGON_S2_asl_i_p_or:
di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>;
-def Hexagon_S2_asr_i_p_or:
+def HEXAGON_S2_asr_i_p_or:
di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>;
-def Hexagon_S2_lsr_i_p_or:
+def HEXAGON_S2_lsr_i_p_or:
di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>;
// STYPE / SHIFT / Shift right by immediate with rounding.
-def Hexagon_S2_asr_i_r_rnd:
+def HEXAGON_S2_asr_i_r_rnd:
si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>;
-def Hexagon_S2_asr_i_r_rnd_goodsyntax:
+def HEXAGON_S2_asr_i_r_rnd_goodsyntax:
si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
// STYPE / SHIFT / Shift left by immediate with saturation.
-def Hexagon_S2_asl_i_r_sat:
+def HEXAGON_S2_asl_i_r_sat:
si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>;
// STYPE / SHIFT / Shift by register.
-def Hexagon_S2_asl_r_r:
+def HEXAGON_S2_asl_r_r:
si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>;
-def Hexagon_S2_asr_r_r:
+def HEXAGON_S2_asr_r_r:
si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>;
-def Hexagon_S2_lsl_r_r:
+def HEXAGON_S2_lsl_r_r:
si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>;
-def Hexagon_S2_lsr_r_r:
+def HEXAGON_S2_lsr_r_r:
si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>;
-def Hexagon_S2_asl_r_p:
+def HEXAGON_S2_asl_r_p:
di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>;
-def Hexagon_S2_asr_r_p:
+def HEXAGON_S2_asr_r_p:
di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>;
-def Hexagon_S2_lsl_r_p:
+def HEXAGON_S2_lsl_r_p:
di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>;
-def Hexagon_S2_lsr_r_p:
+def HEXAGON_S2_lsr_r_p:
di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>;
// STYPE / SHIFT / Shift by register and accumulate.
-def Hexagon_S2_asl_r_r_acc:
+def HEXAGON_S2_asl_r_r_acc:
si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>;
-def Hexagon_S2_asr_r_r_acc:
+def HEXAGON_S2_asr_r_r_acc:
si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>;
-def Hexagon_S2_lsl_r_r_acc:
+def HEXAGON_S2_lsl_r_r_acc:
si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>;
-def Hexagon_S2_lsr_r_r_acc:
+def HEXAGON_S2_lsr_r_r_acc:
si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>;
-def Hexagon_S2_asl_r_p_acc:
+def HEXAGON_S2_asl_r_p_acc:
di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>;
-def Hexagon_S2_asr_r_p_acc:
+def HEXAGON_S2_asr_r_p_acc:
di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>;
-def Hexagon_S2_lsl_r_p_acc:
+def HEXAGON_S2_lsl_r_p_acc:
di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>;
-def Hexagon_S2_lsr_r_p_acc:
+def HEXAGON_S2_lsr_r_p_acc:
di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>;
-def Hexagon_S2_asl_r_r_nac:
+def HEXAGON_S2_asl_r_r_nac:
si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>;
-def Hexagon_S2_asr_r_r_nac:
+def HEXAGON_S2_asr_r_r_nac:
si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>;
-def Hexagon_S2_lsl_r_r_nac:
+def HEXAGON_S2_lsl_r_r_nac:
si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>;
-def Hexagon_S2_lsr_r_r_nac:
+def HEXAGON_S2_lsr_r_r_nac:
si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>;
-def Hexagon_S2_asl_r_p_nac:
+def HEXAGON_S2_asl_r_p_nac:
di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>;
-def Hexagon_S2_asr_r_p_nac:
+def HEXAGON_S2_asr_r_p_nac:
di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>;
-def Hexagon_S2_lsl_r_p_nac:
+def HEXAGON_S2_lsl_r_p_nac:
di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>;
-def Hexagon_S2_lsr_r_p_nac:
+def HEXAGON_S2_lsr_r_p_nac:
di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>;
// STYPE / SHIFT / Shift by register and logical.
-def Hexagon_S2_asl_r_r_and:
+def HEXAGON_S2_asl_r_r_and:
si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>;
-def Hexagon_S2_asr_r_r_and:
+def HEXAGON_S2_asr_r_r_and:
si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>;
-def Hexagon_S2_lsl_r_r_and:
+def HEXAGON_S2_lsl_r_r_and:
si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>;
-def Hexagon_S2_lsr_r_r_and:
+def HEXAGON_S2_lsr_r_r_and:
si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>;
-def Hexagon_S2_asl_r_r_or:
+def HEXAGON_S2_asl_r_r_or:
si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>;
-def Hexagon_S2_asr_r_r_or:
+def HEXAGON_S2_asr_r_r_or:
si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>;
-def Hexagon_S2_lsl_r_r_or:
+def HEXAGON_S2_lsl_r_r_or:
si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>;
-def Hexagon_S2_lsr_r_r_or:
+def HEXAGON_S2_lsr_r_r_or:
si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>;
-def Hexagon_S2_asl_r_p_and:
+def HEXAGON_S2_asl_r_p_and:
di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>;
-def Hexagon_S2_asr_r_p_and:
+def HEXAGON_S2_asr_r_p_and:
di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>;
-def Hexagon_S2_lsl_r_p_and:
+def HEXAGON_S2_lsl_r_p_and:
di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>;
-def Hexagon_S2_lsr_r_p_and:
+def HEXAGON_S2_lsr_r_p_and:
di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>;
-def Hexagon_S2_asl_r_p_or:
+def HEXAGON_S2_asl_r_p_or:
di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>;
-def Hexagon_S2_asr_r_p_or:
+def HEXAGON_S2_asr_r_p_or:
di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>;
-def Hexagon_S2_lsl_r_p_or:
+def HEXAGON_S2_lsl_r_p_or:
di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>;
-def Hexagon_S2_lsr_r_p_or:
+def HEXAGON_S2_lsr_r_p_or:
di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>;
// STYPE / SHIFT / Shift by register with saturation.
-def Hexagon_S2_asl_r_r_sat:
+def HEXAGON_S2_asl_r_r_sat:
si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>;
-def Hexagon_S2_asr_r_r_sat:
+def HEXAGON_S2_asr_r_r_sat:
si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>;
// STYPE / SHIFT / Table Index.
-def HEXAGON_S2_tableidxb_goodsyntax:
+def Hexagon_S2_tableidxb_goodsyntax:
si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
-def HEXAGON_S2_tableidxd_goodsyntax:
+def Hexagon_S2_tableidxd_goodsyntax:
si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
-def HEXAGON_S2_tableidxh_goodsyntax:
+def Hexagon_S2_tableidxh_goodsyntax:
si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
-def HEXAGON_S2_tableidxw_goodsyntax:
+def Hexagon_S2_tableidxw_goodsyntax:
si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
@@ -3396,29 +3432,29 @@ def HEXAGON_S2_tableidxw_goodsyntax:
// STYPE / VH / Vector absolute value halfwords.
// Rdd64=vabsh(Rss64)
-def Hexagon_A2_vabsh:
+def HEXAGON_A2_vabsh:
di_SInst_di <"vabsh", int_hexagon_A2_vabsh>;
-def Hexagon_A2_vabshsat:
+def HEXAGON_A2_vabshsat:
di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>;
// STYPE / VH / Vector shift halfwords by immediate.
// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
-def Hexagon_S2_asl_i_vh:
+def HEXAGON_S2_asl_i_vh:
di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>;
-def Hexagon_S2_asr_i_vh:
+def HEXAGON_S2_asr_i_vh:
di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>;
-def Hexagon_S2_lsr_i_vh:
+def HEXAGON_S2_lsr_i_vh:
di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>;
// STYPE / VH / Vector shift halfwords by register.
// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
-def Hexagon_S2_asl_r_vh:
+def HEXAGON_S2_asl_r_vh:
di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>;
-def Hexagon_S2_asr_r_vh:
+def HEXAGON_S2_asr_r_vh:
di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>;
-def Hexagon_S2_lsl_r_vh:
+def HEXAGON_S2_lsl_r_vh:
di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>;
-def Hexagon_S2_lsr_r_vh:
+def HEXAGON_S2_lsr_r_vh:
di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>;
@@ -3427,36 +3463,41 @@ def Hexagon_S2_lsr_r_vh:
*********************************************************************/
// STYPE / VW / Vector absolute value words.
-def Hexagon_A2_vabsw:
+def HEXAGON_A2_vabsw:
di_SInst_di <"vabsw", int_hexagon_A2_vabsw>;
-def Hexagon_A2_vabswsat:
+def HEXAGON_A2_vabswsat:
di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>;
// STYPE / VW / Vector shift words by immediate.
// Rdd64=v[asl/vsl]w(Rss64,Rt32)
-def Hexagon_S2_asl_i_vw:
+def HEXAGON_S2_asl_i_vw:
di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>;
-def Hexagon_S2_asr_i_vw:
+def HEXAGON_S2_asr_i_vw:
di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>;
-def Hexagon_S2_lsr_i_vw:
+def HEXAGON_S2_lsr_i_vw:
di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>;
// STYPE / VW / Vector shift words by register.
// Rdd64=v[asl/vsl]w(Rss64,Rt32)
-def Hexagon_S2_asl_r_vw:
+def HEXAGON_S2_asl_r_vw:
di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>;
-def Hexagon_S2_asr_r_vw:
+def HEXAGON_S2_asr_r_vw:
di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>;
-def Hexagon_S2_lsl_r_vw:
+def HEXAGON_S2_lsl_r_vw:
di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>;
-def Hexagon_S2_lsr_r_vw:
+def HEXAGON_S2_lsr_r_vw:
di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>;
// STYPE / VW / Vector shift words with truncate and pack.
-def Hexagon_S2_asr_r_svw_trun:
+def HEXAGON_S2_asr_r_svw_trun:
si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>;
-def Hexagon_S2_asr_i_svw_trun:
+def HEXAGON_S2_asr_i_svw_trun:
si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>;
+// LD / Circular loads.
+def HEXAGON_circ_ldd:
+ di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>;
+
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
+include "HexagonIntrinsicsV5.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
index 68eaf68480e0..2788101d5a66 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -12,18 +12,28 @@
// Optimized with intrinisics accumulates
//
def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
- (COMBINE_rr
- (Hexagon_M2_maci
- (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
- subreg_hireg),
- (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)),
- (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
- (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
- subreg_loreg))>;
+ (i64
+ (COMBINE_rr
+ (HEXAGON_M2_maci
+ (HEXAGON_M2_maci
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))),
+ subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), subreg_loreg))))>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
new file mode 100644
index 000000000000..1d44b526d298
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -0,0 +1,395 @@
+class sf_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class sf_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class sf_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class sf_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class si_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class df_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class di_SInst_sf<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class df_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class df_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class di_SInst_df<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+
+class df_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
+
+class sf_MInst_sfsf<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class df_MInst_dfdf<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class qi_ALU64_dfdf<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class qi_ALU64_dfu5<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+
+class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+
+class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2, $src3):scale")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+
+class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2, IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2, $src3):scale")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2, IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$dst2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):lib")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2, DoubleRegs:$dst2))],
+ "$dst2 = $dst">;
+
+class qi_SInst_sfsf<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_sfu5<string opc, Intrinsic IntID>
+ : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class sf_ALU64_u10_pos<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class sf_ALU64_u10_neg<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class df_ALU64_u10_pos<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class df_ALU64_u10_neg<string opc, Intrinsic IntID>
+ : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_MInst_diu6<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class di_MInst_diu4_rnd<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_diu4_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+
+def HEXAGON_C4_fastcorner9:
+ qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>;
+def HEXAGON_C4_fastcorner9_not:
+ qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>;
+def HEXAGON_M5_vrmpybuu:
+ di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>;
+def HEXAGON_M5_vrmacbuu:
+ di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>;
+def HEXAGON_M5_vrmpybsu:
+ di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>;
+def HEXAGON_M5_vrmacbsu:
+ di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>;
+def HEXAGON_M5_vmpybuu:
+ di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>;
+def HEXAGON_M5_vmpybsu:
+ di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>;
+def HEXAGON_M5_vmacbuu:
+ di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>;
+def HEXAGON_M5_vmacbsu:
+ di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>;
+def HEXAGON_M5_vdmpybsu:
+ di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>;
+def HEXAGON_M5_vdmacbsu:
+ di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>;
+def HEXAGON_A5_vaddhubs:
+ si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>;
+def HEXAGON_S5_popcountp:
+ si_SInst_di <"popcount", int_hexagon_S5_popcountp>;
+def HEXAGON_S5_asrhub_rnd_sat_goodsyntax:
+ si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
+def HEXAGON_S5_asrhub_sat:
+ si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>;
+def HEXAGON_S5_vasrhrnd_goodsyntax:
+ di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>;
+def HEXAGON_S2_asr_i_p_rnd:
+ di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>;
+def HEXAGON_S2_asr_i_p_rnd_goodsyntax:
+ di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
+def HEXAGON_F2_sfadd:
+ sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>;
+def HEXAGON_F2_sfsub:
+ sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>;
+def HEXAGON_F2_sfmpy:
+ sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>;
+def HEXAGON_F2_sffma:
+ sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>;
+def HEXAGON_F2_sffma_sc:
+ sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>;
+def HEXAGON_F2_sffms:
+ sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>;
+def HEXAGON_F2_sffma_lib:
+ sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>;
+def HEXAGON_F2_sffms_lib:
+ sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>;
+def HEXAGON_F2_sfcmpeq:
+ qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>;
+def HEXAGON_F2_sfcmpgt:
+ qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>;
+def HEXAGON_F2_sfcmpge:
+ qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>;
+def HEXAGON_F2_sfcmpuo:
+ qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>;
+def HEXAGON_F2_sfmax:
+ sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>;
+def HEXAGON_F2_sfmin:
+ sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>;
+def HEXAGON_F2_sfclass:
+ qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>;
+def HEXAGON_F2_sfimm_p:
+ sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>;
+def HEXAGON_F2_sfimm_n:
+ sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>;
+def HEXAGON_F2_sffixupn:
+ sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>;
+def HEXAGON_F2_sffixupd:
+ sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>;
+def HEXAGON_F2_sffixupr:
+ sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>;
+def HEXAGON_F2_dfadd:
+ df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>;
+def HEXAGON_F2_dfsub:
+ df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>;
+def HEXAGON_F2_dfmpy:
+ df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>;
+def HEXAGON_F2_dffma:
+ df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>;
+def HEXAGON_F2_dffms:
+ df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>;
+def HEXAGON_F2_dffma_lib:
+ df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>;
+def HEXAGON_F2_dffms_lib:
+ df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>;
+def HEXAGON_F2_dffma_sc:
+ df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>;
+def HEXAGON_F2_dfmax:
+ df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>;
+def HEXAGON_F2_dfmin:
+ df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>;
+def HEXAGON_F2_dfcmpeq:
+ qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>;
+def HEXAGON_F2_dfcmpgt:
+ qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>;
+def HEXAGON_F2_dfcmpge:
+ qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>;
+def HEXAGON_F2_dfcmpuo:
+ qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>;
+def HEXAGON_F2_dfclass:
+ qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>;
+def HEXAGON_F2_dfimm_p:
+ df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>;
+def HEXAGON_F2_dfimm_n:
+ df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>;
+def HEXAGON_F2_dffixupn:
+ df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>;
+def HEXAGON_F2_dffixupd:
+ df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>;
+def HEXAGON_F2_dffixupr:
+ df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>;
+def HEXAGON_F2_conv_sf2df:
+ df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>;
+def HEXAGON_F2_conv_df2sf:
+ sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>;
+def HEXAGON_F2_conv_uw2sf:
+ sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>;
+def HEXAGON_F2_conv_uw2df:
+ df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>;
+def HEXAGON_F2_conv_w2sf:
+ sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>;
+def HEXAGON_F2_conv_w2df:
+ df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>;
+def HEXAGON_F2_conv_ud2sf:
+ sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>;
+def HEXAGON_F2_conv_ud2df:
+ df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>;
+def HEXAGON_F2_conv_d2sf:
+ sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>;
+def HEXAGON_F2_conv_d2df:
+ df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>;
+def HEXAGON_F2_conv_sf2uw:
+ si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>;
+def HEXAGON_F2_conv_sf2w:
+ si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>;
+def HEXAGON_F2_conv_sf2ud:
+ di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>;
+def HEXAGON_F2_conv_sf2d:
+ di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>;
+def HEXAGON_F2_conv_df2uw:
+ si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>;
+def HEXAGON_F2_conv_df2w:
+ si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>;
+def HEXAGON_F2_conv_df2ud:
+ di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>;
+def HEXAGON_F2_conv_df2d:
+ di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>;
+def HEXAGON_F2_conv_sf2uw_chop:
+ si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>;
+def HEXAGON_F2_conv_sf2w_chop:
+ si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>;
+def HEXAGON_F2_conv_sf2ud_chop:
+ di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>;
+def HEXAGON_F2_conv_sf2d_chop:
+ di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>;
+def HEXAGON_F2_conv_df2uw_chop:
+ si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>;
+def HEXAGON_F2_conv_df2w_chop:
+ si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>;
+def HEXAGON_F2_conv_df2ud_chop:
+ di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>;
+def HEXAGON_F2_conv_df2d_chop:
+ di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>;
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
new file mode 100644
index 000000000000..7a16c241ff8f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMCInst.h
@@ -0,0 +1,41 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+ class HexagonMCInst: public MCInst {
+ // Packet start and end markers
+ unsigned startPacket: 1, endPacket: 1;
+ const MachineInstr *MachineI;
+ public:
+ explicit HexagonMCInst(): MCInst(),
+ startPacket(0), endPacket(0) {}
+
+ const MachineInstr* getMI() const { return MachineI; }
+
+ void setMI(const MachineInstr *MI) { MachineI = MI; }
+
+ bool isStartPacket() const { return (startPacket); }
+ bool isEndPacket() const { return (endPacket); }
+
+ void setStartPacket(bool yes) { startPacket = yes; }
+ void setEndPacket(bool yes) { endPacket = yes; }
+ };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index fbb331bdd8bf..70bddcc76a59 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -49,7 +49,7 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI,
switch (MO.getType()) {
default:
MI->dump();
- assert(0 && "unknown operand type");
+ llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit()) continue;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
new file mode 100644
index 000000000000..7ece4085ecbd
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -0,0 +1,647 @@
+//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements NewValueJump pass in Hexagon.
+// Ideally, we should merge this as a Peephole pass prior to register
+// allocation, but because we have a spill in between the feeder and new value
+// jump instructions, we are forced to write after register allocation.
+// Having said that, we should re-attempt to pull this earlier at some point
+// in future.
+
+// The basic approach looks for sequence of predicated jump, compare instruciton
+// that genereates the predicate and, the feeder to the predicate. Once it finds
+// all, it collapses compare and jump instruction into a new valu jump
+// intstructions.
+//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagon-nvj"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+
+#include <map>
+
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
+
+static cl::opt<int>
+DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc(
+ "Maximum number of predicated jumps to be converted to New Value Jump"));
+
+static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable New Value Jumps"));
+
+namespace {
+ struct HexagonNewValueJump : public MachineFunctionPass {
+ const HexagonInstrInfo *QII;
+ const HexagonRegisterInfo *QRI;
+
+ public:
+ static char ID;
+
+ HexagonNewValueJump() : MachineFunctionPass(ID) { }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Hexagon NewValueJump";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+
+ };
+
+} // end of anonymous namespace
+
+char HexagonNewValueJump::ID = 0;
+
+// We have identified this II could be feeder to NVJ,
+// verify that it can be.
+static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
+ const TargetRegisterInfo *TRI,
+ MachineBasicBlock::iterator II,
+ MachineBasicBlock::iterator end,
+ MachineBasicBlock::iterator skip,
+ MachineFunction &MF) {
+
+ // Predicated instruction can not be feeder to NVJ.
+ if (QII->isPredicated(II))
+ return false;
+
+ // Bail out if feederReg is a paired register (double regs in
+ // our case). One would think that we can check to see if a given
+ // register cmpReg1 or cmpReg2 is a sub register of feederReg
+ // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
+ // before the callsite of this function
+ // But we can not as it comes in the following fashion.
+ // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+ // %R0<def> = KILL %R0, %D0<imp-use,kill>
+ // %P0<def> = CMPEQri %R0<kill>, 0
+ // Hence, we need to check if it's a KILL instruction.
+ if (II->getOpcode() == TargetOpcode::KILL)
+ return false;
+
+
+ // Make sure there there is no 'def' or 'use' of any of the uses of
+ // feeder insn between it's definition, this MI and jump, jmpInst
+ // skipping compare, cmpInst.
+ // Here's the example.
+ // r21=memub(r22+r24<<#0)
+ // p0 = cmp.eq(r21, #0)
+ // r4=memub(r3+r21<<#0)
+ // if (p0.new) jump:t .LBB29_45
+ // Without this check, it will be converted into
+ // r4=memub(r3+r21<<#0)
+ // r21=memub(r22+r24<<#0)
+ // p0 = cmp.eq(r21, #0)
+ // if (p0.new) jump:t .LBB29_45
+ // and result WAR hazards if converted to New Value Jump.
+
+ for (unsigned i = 0; i < II->getNumOperands(); ++i) {
+ if (II->getOperand(i).isReg() &&
+ (II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
+ MachineBasicBlock::iterator localII = II;
+ ++localII;
+ unsigned Reg = II->getOperand(i).getReg();
+ for (MachineBasicBlock::iterator localBegin = localII;
+ localBegin != end; ++localBegin) {
+ if (localBegin == skip ) continue;
+ // Check for Subregisters too.
+ if (localBegin->modifiesRegister(Reg, TRI) ||
+ localBegin->readsRegister(Reg, TRI))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// These are the common checks that need to performed
+// to determine if
+// 1. compare instruction can be moved before jump.
+// 2. feeder to the compare instruction can be moved before jump.
+static bool commonChecksToProhibitNewValueJump(bool afterRA,
+ MachineBasicBlock::iterator MII) {
+
+ // If store in path, bail out.
+ if (MII->getDesc().mayStore())
+ return false;
+
+ // if call in path, bail out.
+ if (MII->getOpcode() == Hexagon::CALLv3)
+ return false;
+
+ // if NVJ is running prior to RA, do the following checks.
+ if (!afterRA) {
+ // The following Target Opcode instructions are spurious
+ // to new value jump. If they are in the path, bail out.
+ // KILL sets kill flag on the opcode. It also sets up a
+ // single register, out of pair.
+ // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+ // %R0<def> = KILL %R0, %D0<imp-use,kill>
+ // %P0<def> = CMPEQri %R0<kill>, 0
+ // PHI can be anything after RA.
+ // COPY can remateriaze things in between feeder, compare and nvj.
+ if (MII->getOpcode() == TargetOpcode::KILL ||
+ MII->getOpcode() == TargetOpcode::PHI ||
+ MII->getOpcode() == TargetOpcode::COPY)
+ return false;
+
+ // The following pseudo Hexagon instructions sets "use" and "def"
+ // of registers by individual passes in the backend. At this time,
+ // we don't know the scope of usage and definitions of these
+ // instructions.
+ if (MII->getOpcode() == Hexagon::TFR_condset_rr ||
+ MII->getOpcode() == Hexagon::TFR_condset_ii ||
+ MII->getOpcode() == Hexagon::TFR_condset_ri ||
+ MII->getOpcode() == Hexagon::TFR_condset_ir ||
+ MII->getOpcode() == Hexagon::LDriw_pred ||
+ MII->getOpcode() == Hexagon::STriw_pred)
+ return false;
+ }
+
+ return true;
+}
+
+static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
+ const TargetRegisterInfo *TRI,
+ MachineBasicBlock::iterator II,
+ unsigned pReg,
+ bool secondReg,
+ bool optLocation,
+ MachineBasicBlock::iterator end,
+ MachineFunction &MF) {
+
+ MachineInstr *MI = II;
+
+ // If the second operand of the compare is an imm, make sure it's in the
+ // range specified by the arch.
+ if (!secondReg) {
+ int64_t v = MI->getOperand(2).getImm();
+ if (MI->getOpcode() == Hexagon::CMPGEri ||
+ (MI->getOpcode() == Hexagon::CMPGEUri && v > 0))
+ --v;
+
+ if (!(isUInt<5>(v) ||
+ ((MI->getOpcode() == Hexagon::CMPEQri ||
+ MI->getOpcode() == Hexagon::CMPGTri ||
+ MI->getOpcode() == Hexagon::CMPGEri) &&
+ (v == -1))))
+ return false;
+ }
+
+ unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+ cmpReg1 = MI->getOperand(1).getReg();
+
+ if (secondReg) {
+ cmpOp2 = MI->getOperand(2).getReg();
+
+ // Make sure that that second register is not from COPY
+ // At machine code level, we don't need this, but if we decide
+ // to move new value jump prior to RA, we would be needing this.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
+ MachineInstr *def = MRI.getVRegDef(cmpOp2);
+ if (def->getOpcode() == TargetOpcode::COPY)
+ return false;
+ }
+ }
+
+ // Walk the instructions after the compare (predicate def) to the jump,
+ // and satisfy the following conditions.
+ ++II ;
+ for (MachineBasicBlock::iterator localII = II; localII != end;
+ ++localII) {
+
+ // Check 1.
+ // If "common" checks fail, bail out.
+ if (!commonChecksToProhibitNewValueJump(optLocation, localII))
+ return false;
+
+ // Check 2.
+ // If there is a def or use of predicate (result of compare), bail out.
+ if (localII->modifiesRegister(pReg, TRI) ||
+ localII->readsRegister(pReg, TRI))
+ return false;
+
+ // Check 3.
+ // If there is a def of any of the use of the compare (operands of compare),
+ // bail out.
+ // Eg.
+ // p0 = cmp.eq(r2, r0)
+ // r2 = r4
+ // if (p0.new) jump:t .LBB28_3
+ if (localII->modifiesRegister(cmpReg1, TRI) ||
+ (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
+ return false;
+ }
+ return true;
+}
+
+// Given a compare operator, return a matching New Value Jump
+// compare operator. Make sure that MI here is included in
+// HexagonInstrInfo.cpp::isNewValueJumpCandidate
+static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg,
+ bool secondRegNewified) {
+ switch (MI->getOpcode()) {
+ case Hexagon::CMPEQrr:
+ return Hexagon::JMP_EQrrPt_nv_V4;
+
+ case Hexagon::CMPEQri: {
+ if (reg >= 0)
+ return Hexagon::JMP_EQriPt_nv_V4;
+ else
+ return Hexagon::JMP_EQriPtneg_nv_V4;
+ }
+
+ case Hexagon::CMPLTrr:
+ case Hexagon::CMPGTrr: {
+ if (secondRegNewified)
+ return Hexagon::JMP_GTrrdnPt_nv_V4;
+ else
+ return Hexagon::JMP_GTrrPt_nv_V4;
+ }
+
+ case Hexagon::CMPGEri: {
+ if (reg >= 1)
+ return Hexagon::JMP_GTriPt_nv_V4;
+ else
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+ }
+
+ case Hexagon::CMPGTri: {
+ if (reg >= 0)
+ return Hexagon::JMP_GTriPt_nv_V4;
+ else
+ return Hexagon::JMP_GTriPtneg_nv_V4;
+ }
+
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPGTUrr: {
+ if (secondRegNewified)
+ return Hexagon::JMP_GTUrrdnPt_nv_V4;
+ else
+ return Hexagon::JMP_GTUrrPt_nv_V4;
+ }
+
+ case Hexagon::CMPGTUri:
+ return Hexagon::JMP_GTUriPt_nv_V4;
+
+ case Hexagon::CMPGEUri: {
+ if (reg == 0)
+ return Hexagon::JMP_EQrrPt_nv_V4;
+ else
+ return Hexagon::JMP_GTUriPt_nv_V4;
+ }
+
+ default:
+ llvm_unreachable("Could not find matching New Value Jump instruction.");
+ }
+ // return *some value* to avoid compiler warning
+ return 0;
+}
+
+bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
+ << "********** Function: "
+ << MF.getFunction()->getName() << "\n");
+
+#if 0
+ // for now disable this, if we move NewValueJump before register
+ // allocation we need this information.
+ LiveVariables &LVs = getAnalysis<LiveVariables>();
+#endif
+
+ QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
+ QRI =
+ static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+
+ if (!QRI->Subtarget.hasV4TOps() ||
+ DisableNewValueJumps) {
+ return false;
+ }
+
+ int nvjCount = DbgNVJCount;
+ int nvjGenerated = 0;
+
+ // Loop through all the bb's of the function
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+
+ DEBUG(dbgs() << "** dumping bb ** "
+ << MBB->getNumber() << "\n");
+ DEBUG(MBB->dump());
+ DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
+ bool foundJump = false;
+ bool foundCompare = false;
+ bool invertPredicate = false;
+ unsigned predReg = 0; // predicate reg of the jump.
+ unsigned cmpReg1 = 0;
+ int cmpOp2 = 0;
+ bool MO1IsKill = false;
+ bool MO2IsKill = false;
+ MachineBasicBlock::iterator jmpPos;
+ MachineBasicBlock::iterator cmpPos;
+ MachineInstr *cmpInstr = NULL, *jmpInstr = NULL;
+ MachineBasicBlock *jmpTarget = NULL;
+ bool afterRA = false;
+ bool isSecondOpReg = false;
+ bool isSecondOpNewified = false;
+ // Traverse the basic block - bottom up
+ for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
+ MII != E;) {
+ MachineInstr *MI = --MII;
+ if (MI->isDebugValue()) {
+ continue;
+ }
+
+ if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
+ break;
+
+ DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
+
+ if (!foundJump &&
+ (MI->getOpcode() == Hexagon::JMP_c ||
+ MI->getOpcode() == Hexagon::JMP_cNot ||
+ MI->getOpcode() == Hexagon::JMP_cdnPt ||
+ MI->getOpcode() == Hexagon::JMP_cdnPnt ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) {
+ // This is where you would insert your compare and
+ // instr that feeds compare
+ jmpPos = MII;
+ jmpInstr = MI;
+ predReg = MI->getOperand(0).getReg();
+ afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);
+
+ // If ifconverter had not messed up with the kill flags of the
+ // operands, the following check on the kill flag would suffice.
+ // if(!jmpInstr->getOperand(0).isKill()) break;
+
+ // This predicate register is live out out of BB
+ // this would only work if we can actually use Live
+ // variable analysis on phy regs - but LLVM does not
+ // provide LV analysis on phys regs.
+ //if(LVs.isLiveOut(predReg, *MBB)) break;
+
+ // Get all the successors of this block - which will always
+ // be 2. Check if the predicate register is live in in those
+ // successor. If yes, we can not delete the predicate -
+ // I am doing this only because LLVM does not provide LiveOut
+ // at the BB level.
+ bool predLive = false;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SIE = MBB->succ_end(); SI != SIE; ++SI) {
+ MachineBasicBlock* succMBB = *SI;
+ if (succMBB->isLiveIn(predReg)) {
+ predLive = true;
+ }
+ }
+ if (predLive)
+ break;
+
+ jmpTarget = MI->getOperand(1).getMBB();
+ foundJump = true;
+ if (MI->getOpcode() == Hexagon::JMP_cNot ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
+ MI->getOpcode() == Hexagon::JMP_cdnNotPnt) {
+ invertPredicate = true;
+ }
+ continue;
+ }
+
+ // No new value jump if there is a barrier. A barrier has to be in its
+ // own packet. A barrier has zero operands. We conservatively bail out
+ // here if we see any instruction with zero operands.
+ if (foundJump && MI->getNumOperands() == 0)
+ break;
+
+ if (foundJump &&
+ !foundCompare &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(0).getReg() == predReg) {
+
+ // Not all compares can be new value compare. Arch Spec: 7.6.1.1
+ if (QII->isNewValueJumpCandidate(MI)) {
+
+ assert((MI->getDesc().isCompare()) &&
+ "Only compare instruction can be collapsed into New Value Jump");
+ isSecondOpReg = MI->getOperand(2).isReg();
+
+ if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
+ afterRA, jmpPos, MF))
+ break;
+
+ cmpInstr = MI;
+ cmpPos = MII;
+ foundCompare = true;
+
+ // We need cmpReg1 and cmpOp2(imm or reg) while building
+ // new value jump instruction.
+ cmpReg1 = MI->getOperand(1).getReg();
+ if (MI->getOperand(1).isKill())
+ MO1IsKill = true;
+
+ if (isSecondOpReg) {
+ cmpOp2 = MI->getOperand(2).getReg();
+ if (MI->getOperand(2).isKill())
+ MO2IsKill = true;
+ } else
+ cmpOp2 = MI->getOperand(2).getImm();
+ continue;
+ }
+ }
+
+ if (foundCompare && foundJump) {
+
+ // If "common" checks fail, bail out on this BB.
+ if (!commonChecksToProhibitNewValueJump(afterRA, MII))
+ break;
+
+ bool foundFeeder = false;
+ MachineBasicBlock::iterator feederPos = MII;
+ if (MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isDef() &&
+ (MI->getOperand(0).getReg() == cmpReg1 ||
+ (isSecondOpReg &&
+ MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {
+
+ unsigned feederReg = MI->getOperand(0).getReg();
+
+ // First try to see if we can get the feeder from the first operand
+ // of the compare. If we can not, and if secondOpReg is true
+ // (second operand of the compare is also register), try that one.
+ // TODO: Try to come up with some heuristic to figure out which
+ // feeder would benefit.
+
+ if (feederReg == cmpReg1) {
+ if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
+ if (!isSecondOpReg)
+ break;
+ else
+ continue;
+ } else
+ foundFeeder = true;
+ }
+
+ if (!foundFeeder &&
+ isSecondOpReg &&
+ feederReg == (unsigned) cmpOp2)
+ if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
+ break;
+
+ if (isSecondOpReg) {
+ // In case of CMPLT, or CMPLTU, or EQ with the second register
+ // to newify, swap the operands.
+ if (cmpInstr->getOpcode() == Hexagon::CMPLTrr ||
+ cmpInstr->getOpcode() == Hexagon::CMPLTUrr ||
+ (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
+ feederReg == (unsigned) cmpOp2)) {
+ unsigned tmp = cmpReg1;
+ bool tmpIsKill = MO1IsKill;
+ cmpReg1 = cmpOp2;
+ MO1IsKill = MO2IsKill;
+ cmpOp2 = tmp;
+ MO2IsKill = tmpIsKill;
+ }
+
+ // Now we have swapped the operands, all we need to check is,
+ // if the second operand (after swap) is the feeder.
+ // And if it is, make a note.
+ if (feederReg == (unsigned)cmpOp2)
+ isSecondOpNewified = true;
+ }
+
+ // Now that we are moving feeder close the jump,
+ // make sure we are respecting the kill values of
+ // the operands of the feeder.
+
+ bool updatedIsKill = false;
+ for (unsigned i = 0; i < MI->getNumOperands(); i++) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned feederReg = MO.getReg();
+ for (MachineBasicBlock::iterator localII = feederPos,
+ end = jmpPos; localII != end; localII++) {
+ MachineInstr *localMI = localII;
+ for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
+ MachineOperand &localMO = localMI->getOperand(j);
+ if (localMO.isReg() && localMO.isUse() &&
+ localMO.isKill() && feederReg == localMO.getReg()) {
+ // We found that there is kill of a use register
+ // Set up a kill flag on the register
+ localMO.setIsKill(false);
+ MO.setIsKill();
+ updatedIsKill = true;
+ break;
+ }
+ }
+ if (updatedIsKill) break;
+ }
+ }
+ if (updatedIsKill) break;
+ }
+
+ MBB->splice(jmpPos, MI->getParent(), MI);
+ MBB->splice(jmpPos, MI->getParent(), cmpInstr);
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstr *NewMI;
+
+ assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
+ "This compare is not a New Value Jump candidate.");
+ unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
+ isSecondOpNewified);
+ if (invertPredicate)
+ opc = QII->getInvertedPredicatedOpcode(opc);
+
+ // Manage the conversions from CMPGEUri to either CMPEQrr
+ // or CMPGTUri properly. See Arch spec for CMPGEUri instructions.
+ // This has to be after the getNewValueJumpOpcode function call as
+ // second operand of the compare could be modified in this logic.
+ if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) {
+ if (cmpOp2 == 0) {
+ cmpOp2 = cmpReg1;
+ MO2IsKill = MO1IsKill;
+ isSecondOpReg = true;
+ } else
+ --cmpOp2;
+ }
+
+ // Manage the conversions from CMPGEri to CMPGTUri properly.
+ // See Arch spec for CMPGEri instructions.
+ if (cmpInstr->getOpcode() == Hexagon::CMPGEri)
+ --cmpOp2;
+
+ if (isSecondOpReg) {
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addReg(cmpOp2, getKillRegState(MO2IsKill))
+ .addMBB(jmpTarget);
+ }
+ else {
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addImm(cmpOp2)
+ .addMBB(jmpTarget);
+ }
+
+ assert(NewMI && "New Value Jump Instruction Not created!");
+ if (cmpInstr->getOperand(0).isReg() &&
+ cmpInstr->getOperand(0).isKill())
+ cmpInstr->getOperand(0).setIsKill(false);
+ if (cmpInstr->getOperand(1).isReg() &&
+ cmpInstr->getOperand(1).isKill())
+ cmpInstr->getOperand(1).setIsKill(false);
+ cmpInstr->eraseFromParent();
+ jmpInstr->eraseFromParent();
+ ++nvjGenerated;
+ ++NumNVJGenerated;
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+
+}
+
+FunctionPass *llvm::createHexagonNewValueJump() {
+ return new HexagonNewValueJump();
+}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2a9de9232915..2c23674a3319 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -63,6 +63,7 @@ const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
return CalleeSavedRegsV2;
case HexagonSubtarget::V3:
case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
return CalleeSavedRegsV3;
}
llvm_unreachable("Callee saved registers requested for unknown architecture "
@@ -109,6 +110,7 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
return CalleeSavedRegClassesV2;
case HexagonSubtarget::V3:
case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
return CalleeSavedRegClassesV3;
}
llvm_unreachable("Callee saved register classes requested for unknown "
@@ -179,13 +181,15 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// r0 = add(r30, #10000)
// r0 = memw(r0)
if ( (MI.getOpcode() == Hexagon::LDriw) ||
- (MI.getOpcode() == Hexagon::LDrid) ||
- (MI.getOpcode() == Hexagon::LDrih) ||
- (MI.getOpcode() == Hexagon::LDriuh) ||
- (MI.getOpcode() == Hexagon::LDrib) ||
- (MI.getOpcode() == Hexagon::LDriub) ) {
+ (MI.getOpcode() == Hexagon::LDrid) ||
+ (MI.getOpcode() == Hexagon::LDrih) ||
+ (MI.getOpcode() == Hexagon::LDriuh) ||
+ (MI.getOpcode() == Hexagon::LDrib) ||
+ (MI.getOpcode() == Hexagon::LDriub) ||
+ (MI.getOpcode() == Hexagon::LDriw_f) ||
+ (MI.getOpcode() == Hexagon::LDrid_f)) {
unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
- *getSubRegisters(MI.getOperand(0).getReg()) :
+ getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) :
MI.getOperand(0).getReg();
// Check if offset can fit in addi.
@@ -203,10 +207,13 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
MI.getOperand(i+1).ChangeToImmediate(0);
- } else if ((MI.getOpcode() == Hexagon::STriw) ||
+ } else if ((MI.getOpcode() == Hexagon::STriw_indexed) ||
+ (MI.getOpcode() == Hexagon::STriw) ||
(MI.getOpcode() == Hexagon::STrid) ||
(MI.getOpcode() == Hexagon::STrih) ||
- (MI.getOpcode() == Hexagon::STrib)) {
+ (MI.getOpcode() == Hexagon::STrib) ||
+ (MI.getOpcode() == Hexagon::STrid_f) ||
+ (MI.getOpcode() == Hexagon::STriw_f)) {
// For stores, we need a reserved register. Change
// memw(r30 + #10000) = r0 to:
//
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 6cf727bc027d..85355ae7beb5 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -73,6 +73,10 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
return true;
}
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
+
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index d44eae3602c9..fe41fc3bc60c 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -131,6 +131,9 @@ let Namespace = "Hexagon" in {
def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>;
def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>;
+ def M0 : Rc<6, "m0">, DwarfRegNum<[71]>;
+ def M1 : Rc<7, "m1">, DwarfRegNum<[72]>;
+
def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct?
def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct?
}
@@ -140,19 +143,15 @@ let Namespace = "Hexagon" in {
// FIXME: the register order should be defined in terms of the preferred
// allocation order...
//
-def IntRegs : RegisterClass<"Hexagon", [i32], 32,
+def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32,
(add (sequence "R%u", 0, 9),
(sequence "R%u", 12, 28),
R10, R11, R29, R30, R31)> {
}
-
-
-def DoubleRegs : RegisterClass<"Hexagon", [i64], 64,
+def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64,
(add (sequence "D%u", 0, 4),
- (sequence "D%u", 6, 13), D5, D14, D15)> {
- let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)];
-}
+ (sequence "D%u", 6, 13), D5, D14, D15)>;
def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
@@ -162,6 +161,7 @@ def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
def CRRegs : RegisterClass<"Hexagon", [i32], 32,
(add (sequence "LC%u", 0, 1),
- (sequence "SA%u", 0, 1), PC, GP)> {
+ (sequence "SA%u", 0, 1),
+ (sequence "M%u", 0, 1), PC, GP)> {
let Size = 32;
}
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 66a00e12dd09..2468f0b86f88 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -1,4 +1,4 @@
-//===- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends =//
+//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends //
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index fbea4452ec6c..d1076b8e4412 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -13,7 +13,6 @@ def LSUNIT : FuncUnit;
def MUNIT : FuncUnit;
def SUNIT : FuncUnit;
-
// Itinerary classes
def ALU32 : InstrItinClass;
def ALU64 : InstrItinClass;
@@ -24,23 +23,31 @@ def LD : InstrItinClass;
def M : InstrItinClass;
def ST : InstrItinClass;
def S : InstrItinClass;
+def SYS : InstrItinClass;
+def MARKER : InstrItinClass;
def PSEUDO : InstrItinClass;
-
def HexagonItineraries :
- ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
- InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
- InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
- InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
- InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
- InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
- InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
- InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
- InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
-]>;
-
+ ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
+ InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
+ InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+ ]>;
+
+def HexagonModel : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItineraries;
+}
//===----------------------------------------------------------------------===//
// V4 Machine Info +
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 4cf66fe74342..9b41126ca6fd 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -23,7 +23,6 @@
// | SLOT3 | XTYPE ALU32 J CR |
// |===========|==================================================|
-
// Functional Units.
def SLOT0 : FuncUnit;
def SLOT1 : FuncUnit;
@@ -34,22 +33,32 @@ def SLOT3 : FuncUnit;
def NV_V4 : InstrItinClass;
def MEM_V4 : InstrItinClass;
// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def PREFIX : InstrItinClass;
+
+def HexagonItinerariesV4 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
+ InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
+ ]>;
-def HexagonItinerariesV4 : ProcessorItineraries<
- [SLOT0, SLOT1, SLOT2, SLOT3], [], [
- InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
- InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>
-]>;
+def HexagonModelV4 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV4;
+}
//===----------------------------------------------------------------------===//
// Hexagon V4 Resource Definitions -
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index d10c9f2d5242..a81cd913a6ec 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -14,7 +14,7 @@
// {p0 = cmp.eq(r0,r1)}
// {r3 = mux(p0,#1,#3)}
//
-// This requires two packets. If we use .new predicated immediate transfers,
+// This requires two packets. If we use .new predicated immediate transfers,
// then we can do this in a single packet, e.g.:
//
// {p0 = cmp.eq(r0,r1)
@@ -81,40 +81,126 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
++MII) {
MachineInstr *MI = MII;
- int Opc = MI->getOpcode();
- if (Opc == Hexagon::TFR_condset_rr) {
-
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(2).getReg();
- int SrcReg2 = MI->getOperand(3).getReg();
-
- // Minor optimization: do not emit the predicated copy if the source and
- // the destination is the same register
- if (DestReg != SrcReg1) {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt),
- DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ int Opc1, Opc2;
+ switch(MI->getOpcode()) {
+ case Hexagon::TFR_condset_rr:
+ case Hexagon::TFR_condset_rr_f:
+ case Hexagon::TFR_condset_rr64_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ if (MI->getOpcode() == Hexagon::TFR_condset_rr ||
+ MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
+ Opc1 = Hexagon::TFR_cPt;
+ Opc2 = Hexagon::TFR_cNotPt;
+ }
+ else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) {
+ Opc1 = Hexagon::TFR64_cPt;
+ Opc2 = Hexagon::TFR64_cNotPt;
+ }
+
+ // Minor optimization: do not emit the predicated copy if the source
+ // and the destination is the same register.
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ break;
}
- if (DestReg != SrcReg2) {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt),
- DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ case Hexagon::TFR_condset_ri:
+ case Hexagon::TFR_condset_ri_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+
+ // Do not emit the predicated copy if the source and the destination
+ // is the same register.
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFR_cPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (MI->getOpcode() == Hexagon::TFR_condset_ri ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addImm(MI->getOperand(3).getImm());
+ } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addFPImm(MI->getOperand(3).getFPImm());
+ }
+
+ MII = MBB->erase(MI);
+ --MII;
+ break;
+ }
+ case Hexagon::TFR_condset_ir:
+ case Hexagon::TFR_condset_ir_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ if (MI->getOpcode() == Hexagon::TFR_condset_ir ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addImm(MI->getOperand(2).getImm());
+ } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt_f), DestReg).
+ addReg(MI->getOperand(1).getReg()).
+ addFPImm(MI->getOperand(2).getFPImm());
+ }
+
+ // Do not emit the predicated copy if the source and
+ // the destination is the same register.
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFR_cNotPt), DestReg).
+ addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ break;
+ }
+ case Hexagon::TFR_condset_ii:
+ case Hexagon::TFR_condset_ii_f: {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(1).getReg();
+
+ if (MI->getOpcode() == Hexagon::TFR_condset_ii ) {
+ int Immed1 = MI->getOperand(2).getImm();
+ int Immed2 = MI->getOperand(3).getImm();
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt),
+ DestReg).addReg(SrcReg1).addImm(Immed1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt),
+ DestReg).addReg(SrcReg1).addImm(Immed2);
+ } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cPt_f), DestReg).
+ addReg(SrcReg1).
+ addFPImm(MI->getOperand(2).getFPImm());
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
+ addReg(SrcReg1).
+ addFPImm(MI->getOperand(3).getFPImm());
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ break;
}
- MII = MBB->erase(MI);
- --MII;
- } else if (Opc == Hexagon::TFR_condset_ii) {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(1).getReg();
- int Immed1 = MI->getOperand(2).getImm();
- int Immed2 = MI->getOperand(3).getImm();
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt),
- DestReg).addReg(SrcReg1).addImm(Immed1);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt),
- DestReg).addReg(SrcReg1).addImm(Immed2);
- MII = MBB->erase(MI);
- --MII;
}
}
}
-
return true;
}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 654d33626edf..5d087db1bdb7 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -13,6 +13,7 @@
#include "HexagonSubtarget.h"
#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -29,11 +30,17 @@ static cl::opt<bool>
EnableMemOps(
"enable-hexagon-memops",
cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
- cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+ cl::desc("Generate V4 memop instructions."));
+
+static cl::opt<bool>
+EnableIEEERndNear(
+ "enable-hexagon-ieee-rnd-near",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Generate non-chopped conversion from fp to int."));
HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
HexagonGenSubtargetInfo(TT, CPU, FS),
- HexagonArchVersion(V1),
+ HexagonArchVersion(V2),
CPUString(CPU.str()) {
ParseSubtargetFeatures(CPU, FS);
@@ -45,18 +52,27 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
break;
case HexagonSubtarget::V4:
break;
+ case HexagonSubtarget::V5:
+ break;
default:
- llvm_unreachable("Unknown Architecture Version.");
+ // If the programmer has not specified a Hexagon version, default
+ // to -mv4.
+ CPUString = "hexagonv4";
+ HexagonArchVersion = HexagonSubtarget::V4;
+ break;
}
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- // Max issue per cycle == bundle width.
- InstrItins.IssueWidth = 4;
-
if (EnableMemOps)
UseMemOps = true;
else
UseMemOps = false;
+
+ if (EnableIEEERndNear)
+ ModeIEEERndNear = true;
+ else
+ ModeIEEERndNear = false;
}
+
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 3079086986d9..5d9d6d890d98 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -22,16 +22,18 @@
#include "HexagonGenSubtargetInfo.inc"
#define Hexagon_SMALL_DATA_THRESHOLD 8
+#define Hexagon_SLOTS 4
namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool UseMemOps;
+ bool ModeIEEERndNear;
public:
enum HexagonArchEnum {
- V1, V2, V3, V4
+ V1, V2, V3, V4, V5
};
HexagonArchEnum HexagonArchVersion;
@@ -55,7 +57,11 @@ public:
bool hasV3TOps () const { return HexagonArchVersion >= V3; }
bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+ bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; }
bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+ bool hasV5TOps () const { return HexagonArchVersion >= V5; }
+ bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; }
+ bool modeIEEERndNear () const { return ModeIEEERndNear; }
bool isSubtargetV2() const { return HexagonArchVersion == V2;}
const std::string &getCPUString () const { return CPUString; }
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 55bbba7251a7..a7b291ff2a26 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -55,7 +55,9 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") ,
+ DataLayout("e-p:32:32:32-"
+ "i64:64:64-i32:32:32-i16:16:16-i1:32:32-"
+ "f64:64:64-f32:32:32-a0:0-n32") ,
Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget),
@@ -100,43 +102,47 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool HexagonPassConfig::addInstSelector() {
- PM->add(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
- PM->add(createHexagonISelDag(getHexagonTargetMachine()));
- PM->add(createHexagonPeephole());
+ addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+ addPass(createHexagonISelDag(getHexagonTargetMachine()));
+ addPass(createHexagonPeephole());
return false;
}
bool HexagonPassConfig::addPreRegAlloc() {
if (!DisableHardwareLoops) {
- PM->add(createHexagonHardwareLoops());
+ addPass(createHexagonHardwareLoops());
}
-
return false;
}
bool HexagonPassConfig::addPostRegAlloc() {
- PM->add(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+ addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
return true;
}
bool HexagonPassConfig::addPreSched2() {
- addPass(IfConverterID);
+ addPass(&IfConverterID);
return true;
}
bool HexagonPassConfig::addPreEmitPass() {
if (!DisableHardwareLoops) {
- PM->add(createHexagonFixupHwLoops());
+ addPass(createHexagonFixupHwLoops());
}
+ addPass(createHexagonNewValueJump());
+
// Expand Spill code for predicate registers.
- PM->add(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+ addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
// Split up TFRcondsets into conditional transfers.
- PM->add(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+ addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+
+ // Create Packets.
+ addPass(createHexagonPacketizer());
return false;
}
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
new file mode 100644
index 000000000000..a03ed03365ba
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -0,0 +1,3646 @@
+//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple VLIW packetizer using DFA. The packetizer works on
+// machine basic blocks. For each instruction I in BB, the packetizer consults
+// the DFA to see if machine resources are available to execute I. If so, the
+// packetizer checks if I depends on any instruction J in the current packet.
+// If no dependency is found, I is added to current packet and machine resource
+// is marked as taken. If any dependency is found, a target API call is made to
+// prune the dependence.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "packets"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+
+#include <map>
+
+using namespace llvm;
+
+namespace {
+ class HexagonPacketizer : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ HexagonPacketizer() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Hexagon Packetizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char HexagonPacketizer::ID = 0;
+
+ class HexagonPacketizerList : public VLIWPacketizerList {
+
+ private:
+
+ // Has the instruction been promoted to a dot-new instruction.
+ bool PromotedToDotNew;
+
+ // Has the instruction been glued to allocframe.
+ bool GlueAllocframeStore;
+
+ // Has the feeder instruction been glued to new value jump.
+ bool GlueToNewValueJump;
+
+ // Check if there is a dependence between some instruction already in this
+ // packet and this instruction.
+ bool Dependence;
+
+ // Only check for dependence if there are resources available to
+ // schedule this instruction.
+ bool FoundSequentialDependence;
+
+ public:
+ // Ctor.
+ HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT);
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState();
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(MachineInstr *MI);
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ);
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ);
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI);
+ private:
+ bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
+ bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII);
+ bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
+ bool DemoteToDotOld(MachineInstr* MI);
+ bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
+ bool RestrictingDepExistInPacket(MachineInstr*,
+ unsigned, std::map <MachineInstr*, SUnit*>);
+ bool isNewifiable(MachineInstr* MI);
+ bool isCondInst(MachineInstr* MI);
+ bool IsNewifyStore (MachineInstr* MI);
+ bool tryAllocateResourcesForConstExt(MachineInstr* MI);
+ bool canReserveResourcesForConstExt(MachineInstr *MI);
+ void reserveResourcesForConstExt(MachineInstr* MI);
+ bool isNewValueInst(MachineInstr* MI);
+ bool isDotNewInst(MachineInstr* MI);
+ };
+}
+
+// HexagonPacketizerList Ctor.
+HexagonPacketizerList::HexagonPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+ : VLIWPacketizerList(MF, MLI, MDT, true){
+}
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+ // Instantiate the packetizer.
+ HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+
+ // DFA state table should not be empty.
+ assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+ //
+ // Loop over all basic blocks and remove KILL pseudo-instructions
+ // These instructions confuse the dependence analysis. Consider:
+ // D0 = ... (Insn 0)
+ // R0 = KILL R0, D0 (Insn 1)
+ // R0 = ... (Insn 2)
+ // Here, Insn 1 will result in the dependence graph not emitting an output
+ // dependence between Insn 0 and Insn 2. This can lead to incorrect
+ // packetization
+ //
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ MachineBasicBlock::iterator End = MBB->end();
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != End) {
+ if (MI->isKill()) {
+ MachineBasicBlock::iterator DeleteMI = MI;
+ ++MI;
+ MBB->erase(DeleteMI);
+ End = MBB->end();
+ continue;
+ }
+ ++MI;
+ }
+ }
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // Find scheduling regions and schedule / packetize each region.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin();) {
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+ break;
+ }
+ I = MBB->begin();
+
+ // Skip empty scheduling regions.
+ if (I == RegionEnd) {
+ RegionEnd = llvm::prior(RegionEnd);
+ --RemainingCount;
+ continue;
+ }
+ // Skip regions with one instruction.
+ if (I == llvm::prior(RegionEnd)) {
+ RegionEnd = llvm::prior(RegionEnd);
+ continue;
+ }
+
+ Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+ RegionEnd = I;
+ }
+ }
+
+ return true;
+}
+
+
+static bool IsIndirectCall(MachineInstr* MI) {
+ return ((MI->getOpcode() == Hexagon::CALLR) ||
+ (MI->getOpcode() == Hexagon::CALLRv3));
+}
+
+// Reserve resources for constant extender. Trigure an assertion if
+// reservation fail.
+void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
+ QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+
+ if (ResourceTracker->canReserveResources(PseudoMI)) {
+ ResourceTracker->reserveResources(PseudoMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ } else {
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ llvm_unreachable("can not reserve resources for constant extender.");
+ }
+ return;
+}
+
+bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ assert(QII->isExtended(MI) &&
+ "Should only be called for constant extended instructions");
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT),
+ MI->getDebugLoc());
+ bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
+ MF->DeleteMachineInstr(PseudoMI);
+ return CanReserve;
+}
+
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
+// true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
+ QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+
+ if (ResourceTracker->canReserveResources(PseudoMI)) {
+ ResourceTracker->reserveResources(PseudoMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ return true;
+ } else {
+ MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+ return false;
+ }
+}
+
+
+bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
+ SDep::Kind DepType,
+ unsigned DepReg) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+
+ // Check for lr dependence
+ if (DepReg == QRI->getRARegister()) {
+ return true;
+ }
+
+ if (QII->isDeallocRet(MI)) {
+ if (DepReg == QRI->getFrameRegister() ||
+ DepReg == QRI->getStackRegister())
+ return true;
+ }
+
+ // Check if this is a predicate dependence
+ const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
+ if (RC == &Hexagon::PredRegsRegClass) {
+ return true;
+ }
+
+ //
+ // Lastly check for an operand used in an indirect call
+ // If we had an attribute for checking if an instruction is an indirect call,
+ // then we could have avoided this relatively brittle implementation of
+ // IsIndirectCall()
+ //
+ // Assumes that the first operand of the CALLr is the function address
+ //
+ if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+ MachineOperand MO = MI->getOperand(0);
+ if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool IsRegDependence(const SDep::Kind DepType) {
+ return (DepType == SDep::Data || DepType == SDep::Anti ||
+ DepType == SDep::Output);
+}
+
+static bool IsDirectJump(MachineInstr* MI) {
+ return (MI->getOpcode() == Hexagon::JMP);
+}
+
+static bool IsSchedBarrier(MachineInstr* MI) {
+ switch (MI->getOpcode()) {
+ case Hexagon::BARRIER:
+ return true;
+ }
+ return false;
+}
+
+static bool IsControlFlow(MachineInstr* MI) {
+ return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+}
+
+bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ if (QII->isNewValueJump(MI))
+ return true;
+
+ if (QII->isNewValueStore(MI))
+ return true;
+
+ return false;
+}
+
+// Function returns true if an instruction can be promoted to the new-value
+// store. It will always return false for v2 and v3.
+// It lists all the conditional and unconditional stores that can be promoted
+// to the new-value stores.
+
+bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ // store byte
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_indexed_shl_V4:
+ case Hexagon::STrib_shl_V4:
+ case Hexagon::STrib_GP_V4:
+ case Hexagon::STb_GP_V4:
+ case Hexagon::POST_STbri:
+ case Hexagon::STrib_cPt:
+ case Hexagon::STrib_cdnPt_V4:
+ case Hexagon::STrib_cNotPt:
+ case Hexagon::STrib_cdnNotPt_V4:
+ case Hexagon::STrib_indexed_cPt:
+ case Hexagon::STrib_indexed_cdnPt_V4:
+ case Hexagon::STrib_indexed_cNotPt:
+ case Hexagon::STrib_indexed_cdnNotPt_V4:
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ case Hexagon::STrib_indexed_shl_cdnPt_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_STbri_cPt:
+ case Hexagon::POST_STbri_cdnPt_V4:
+ case Hexagon::POST_STbri_cNotPt:
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+ case Hexagon::STb_GP_cPt_V4:
+ case Hexagon::STb_GP_cNotPt_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STrib_GP_cPt_V4:
+ case Hexagon::STrib_GP_cNotPt_V4:
+ case Hexagon::STrib_GP_cdnPt_V4:
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+
+ // store halfword
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_indexed_shl_V4:
+ case Hexagon::STrih_shl_V4:
+ case Hexagon::STrih_GP_V4:
+ case Hexagon::STh_GP_V4:
+ case Hexagon::POST_SThri:
+ case Hexagon::STrih_cPt:
+ case Hexagon::STrih_cdnPt_V4:
+ case Hexagon::STrih_cNotPt:
+ case Hexagon::STrih_cdnNotPt_V4:
+ case Hexagon::STrih_indexed_cPt:
+ case Hexagon::STrih_indexed_cdnPt_V4:
+ case Hexagon::STrih_indexed_cNotPt:
+ case Hexagon::STrih_indexed_cdnNotPt_V4:
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ case Hexagon::STrih_indexed_shl_cdnPt_V4:
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_SThri_cPt:
+ case Hexagon::POST_SThri_cdnPt_V4:
+ case Hexagon::POST_SThri_cNotPt:
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+ case Hexagon::STh_GP_cPt_V4:
+ case Hexagon::STh_GP_cNotPt_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STrih_GP_cPt_V4:
+ case Hexagon::STrih_GP_cNotPt_V4:
+ case Hexagon::STrih_GP_cdnPt_V4:
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+
+ // store word
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_indexed_shl_V4:
+ case Hexagon::STriw_shl_V4:
+ case Hexagon::STriw_GP_V4:
+ case Hexagon::STw_GP_V4:
+ case Hexagon::POST_STwri:
+ case Hexagon::STriw_cPt:
+ case Hexagon::STriw_cdnPt_V4:
+ case Hexagon::STriw_cNotPt:
+ case Hexagon::STriw_cdnNotPt_V4:
+ case Hexagon::STriw_indexed_cPt:
+ case Hexagon::STriw_indexed_cdnPt_V4:
+ case Hexagon::STriw_indexed_cNotPt:
+ case Hexagon::STriw_indexed_cdnNotPt_V4:
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ case Hexagon::STriw_indexed_shl_cdnPt_V4:
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+ case Hexagon::POST_STwri_cPt:
+ case Hexagon::POST_STwri_cdnPt_V4:
+ case Hexagon::POST_STwri_cNotPt:
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+ case Hexagon::STw_GP_cPt_V4:
+ case Hexagon::STw_GP_cNotPt_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ case Hexagon::STriw_GP_cPt_V4:
+ case Hexagon::STriw_GP_cNotPt_V4:
+ case Hexagon::STriw_GP_cdnPt_V4:
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ return QRI->Subtarget.hasV4TOps();
+ }
+ return false;
+}
+
+static bool IsLoopN(MachineInstr *MI) {
+ return (MI->getOpcode() == Hexagon::LOOP0_i ||
+ MI->getOpcode() == Hexagon::LOOP0_r);
+}
+
+/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
+/// callee-saved register.
+static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ unsigned CalleeSavedReg = *CSR;
+ if (MI->modifiesRegister(CalleeSavedReg, TRI))
+ return true;
+ }
+ return false;
+}
+
+// Return the new value instruction for a given store.
+static int GetDotNewOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .new type");
+ // store new value byte
+ case Hexagon::STrib:
+ return Hexagon::STrib_nv_V4;
+
+ case Hexagon::STrib_indexed:
+ return Hexagon::STrib_indexed_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_V4:
+ return Hexagon::STrib_indexed_shl_nv_V4;
+
+ case Hexagon::STrib_shl_V4:
+ return Hexagon::STrib_shl_nv_V4;
+
+ case Hexagon::STrib_GP_V4:
+ return Hexagon::STrib_GP_nv_V4;
+
+ case Hexagon::STb_GP_V4:
+ return Hexagon::STb_GP_nv_V4;
+
+ case Hexagon::POST_STbri:
+ return Hexagon::POST_STbri_nv_V4;
+
+ case Hexagon::STrib_cPt:
+ return Hexagon::STrib_cPt_nv_V4;
+
+ case Hexagon::STrib_cdnPt_V4:
+ return Hexagon::STrib_cdnPt_nv_V4;
+
+ case Hexagon::STrib_cNotPt:
+ return Hexagon::STrib_cNotPt_nv_V4;
+
+ case Hexagon::STrib_cdnNotPt_V4:
+ return Hexagon::STrib_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cPt:
+ return Hexagon::STrib_indexed_cPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cdnPt_V4:
+ return Hexagon::STrib_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cNotPt:
+ return Hexagon::STrib_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cdnNotPt_V4:
+ return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4:
+ return Hexagon::STrib_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnPt_V4:
+ return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cPt:
+ return Hexagon::POST_STbri_cPt_nv_V4;
+
+ case Hexagon::POST_STbri_cdnPt_V4:
+ return Hexagon::POST_STbri_cdnPt_nv_V4;
+
+ case Hexagon::POST_STbri_cNotPt:
+ return Hexagon::POST_STbri_cNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cdnNotPt_V4:
+ return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cPt_V4:
+ return Hexagon::STb_GP_cPt_nv_V4;
+
+ case Hexagon::STb_GP_cNotPt_V4:
+ return Hexagon::STb_GP_cNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cdnPt_V4:
+ return Hexagon::STb_GP_cdnPt_nv_V4;
+
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_GP_cPt_V4:
+ return Hexagon::STrib_GP_cPt_nv_V4;
+
+ case Hexagon::STrib_GP_cNotPt_V4:
+ return Hexagon::STrib_GP_cNotPt_nv_V4;
+
+ case Hexagon::STrib_GP_cdnPt_V4:
+ return Hexagon::STrib_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+ return Hexagon::STrib_GP_cdnNotPt_nv_V4;
+
+ // store new value halfword
+ case Hexagon::STrih:
+ return Hexagon::STrih_nv_V4;
+
+ case Hexagon::STrih_indexed:
+ return Hexagon::STrih_indexed_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_V4:
+ return Hexagon::STrih_indexed_shl_nv_V4;
+
+ case Hexagon::STrih_shl_V4:
+ return Hexagon::STrih_shl_nv_V4;
+
+ case Hexagon::STrih_GP_V4:
+ return Hexagon::STrih_GP_nv_V4;
+
+ case Hexagon::STh_GP_V4:
+ return Hexagon::STh_GP_nv_V4;
+
+ case Hexagon::POST_SThri:
+ return Hexagon::POST_SThri_nv_V4;
+
+ case Hexagon::STrih_cPt:
+ return Hexagon::STrih_cPt_nv_V4;
+
+ case Hexagon::STrih_cdnPt_V4:
+ return Hexagon::STrih_cdnPt_nv_V4;
+
+ case Hexagon::STrih_cNotPt:
+ return Hexagon::STrih_cNotPt_nv_V4;
+
+ case Hexagon::STrih_cdnNotPt_V4:
+ return Hexagon::STrih_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cPt:
+ return Hexagon::STrih_indexed_cPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cdnPt_V4:
+ return Hexagon::STrih_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cNotPt:
+ return Hexagon::STrih_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cdnNotPt_V4:
+ return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4:
+ return Hexagon::STrih_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnPt_V4:
+ return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cPt:
+ return Hexagon::POST_SThri_cPt_nv_V4;
+
+ case Hexagon::POST_SThri_cdnPt_V4:
+ return Hexagon::POST_SThri_cdnPt_nv_V4;
+
+ case Hexagon::POST_SThri_cNotPt:
+ return Hexagon::POST_SThri_cNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cdnNotPt_V4:
+ return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cPt_V4:
+ return Hexagon::STh_GP_cPt_nv_V4;
+
+ case Hexagon::STh_GP_cNotPt_V4:
+ return Hexagon::STh_GP_cNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cdnPt_V4:
+ return Hexagon::STh_GP_cdnPt_nv_V4;
+
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_GP_cPt_V4:
+ return Hexagon::STrih_GP_cPt_nv_V4;
+
+ case Hexagon::STrih_GP_cNotPt_V4:
+ return Hexagon::STrih_GP_cNotPt_nv_V4;
+
+ case Hexagon::STrih_GP_cdnPt_V4:
+ return Hexagon::STrih_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+ return Hexagon::STrih_GP_cdnNotPt_nv_V4;
+
+ // store new value word
+ case Hexagon::STriw:
+ return Hexagon::STriw_nv_V4;
+
+ case Hexagon::STriw_indexed:
+ return Hexagon::STriw_indexed_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_V4:
+ return Hexagon::STriw_indexed_shl_nv_V4;
+
+ case Hexagon::STriw_shl_V4:
+ return Hexagon::STriw_shl_nv_V4;
+
+ case Hexagon::STriw_GP_V4:
+ return Hexagon::STriw_GP_nv_V4;
+
+ case Hexagon::STw_GP_V4:
+ return Hexagon::STw_GP_nv_V4;
+
+ case Hexagon::POST_STwri:
+ return Hexagon::POST_STwri_nv_V4;
+
+ case Hexagon::STriw_cPt:
+ return Hexagon::STriw_cPt_nv_V4;
+
+ case Hexagon::STriw_cdnPt_V4:
+ return Hexagon::STriw_cdnPt_nv_V4;
+
+ case Hexagon::STriw_cNotPt:
+ return Hexagon::STriw_cNotPt_nv_V4;
+
+ case Hexagon::STriw_cdnNotPt_V4:
+ return Hexagon::STriw_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cPt:
+ return Hexagon::STriw_indexed_cPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cdnPt_V4:
+ return Hexagon::STriw_indexed_cdnPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cNotPt:
+ return Hexagon::STriw_indexed_cNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cdnNotPt_V4:
+ return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4:
+ return Hexagon::STriw_indexed_shl_cPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnPt_V4:
+ return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+ return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cPt:
+ return Hexagon::POST_STwri_cPt_nv_V4;
+
+ case Hexagon::POST_STwri_cdnPt_V4:
+ return Hexagon::POST_STwri_cdnPt_nv_V4;
+
+ case Hexagon::POST_STwri_cNotPt:
+ return Hexagon::POST_STwri_cNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cdnNotPt_V4:
+ return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cPt_V4:
+ return Hexagon::STw_GP_cPt_nv_V4;
+
+ case Hexagon::STw_GP_cNotPt_V4:
+ return Hexagon::STw_GP_cNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cdnPt_V4:
+ return Hexagon::STw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_GP_cPt_V4:
+ return Hexagon::STriw_GP_cPt_nv_V4;
+
+ case Hexagon::STriw_GP_cNotPt_V4:
+ return Hexagon::STriw_GP_cNotPt_nv_V4;
+
+ case Hexagon::STriw_GP_cdnPt_V4:
+ return Hexagon::STriw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ return Hexagon::STriw_GP_cdnNotPt_nv_V4;
+ }
+}
+
+// Return .new predicate version for an instruction
+static int GetDotNewPredOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .new type");
+ // Conditional stores
+ // Store byte conditionally
+ case Hexagon::STrib_cPt :
+ return Hexagon::STrib_cdnPt_V4;
+
+ case Hexagon::STrib_cNotPt :
+ return Hexagon::STrib_cdnNotPt_V4;
+
+ case Hexagon::STrib_indexed_cPt :
+ return Hexagon::STrib_indexed_cdnPt_V4;
+
+ case Hexagon::STrib_indexed_cNotPt :
+ return Hexagon::STrib_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrib_imm_cPt_V4 :
+ return Hexagon::STrib_imm_cdnPt_V4;
+
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ return Hexagon::STrib_imm_cdnNotPt_V4;
+
+ case Hexagon::POST_STbri_cPt :
+ return Hexagon::POST_STbri_cdnPt_V4;
+
+ case Hexagon::POST_STbri_cNotPt :
+ return Hexagon::POST_STbri_cdnNotPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ return Hexagon::STrib_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrib_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::STb_GP_cPt_V4 :
+ return Hexagon::STb_GP_cdnPt_V4;
+
+ case Hexagon::STb_GP_cNotPt_V4 :
+ return Hexagon::STb_GP_cdnNotPt_V4;
+
+ case Hexagon::STrib_GP_cPt_V4 :
+ return Hexagon::STrib_GP_cdnPt_V4;
+
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ return Hexagon::STrib_GP_cdnNotPt_V4;
+
+ // Store doubleword conditionally
+ case Hexagon::STrid_cPt :
+ return Hexagon::STrid_cdnPt_V4;
+
+ case Hexagon::STrid_cNotPt :
+ return Hexagon::STrid_cdnNotPt_V4;
+
+ case Hexagon::STrid_indexed_cPt :
+ return Hexagon::STrid_indexed_cdnPt_V4;
+
+ case Hexagon::STrid_indexed_cNotPt :
+ return Hexagon::STrid_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ return Hexagon::STrid_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrid_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_STdri_cPt :
+ return Hexagon::POST_STdri_cdnPt_V4;
+
+ case Hexagon::POST_STdri_cNotPt :
+ return Hexagon::POST_STdri_cdnNotPt_V4;
+
+ case Hexagon::STd_GP_cPt_V4 :
+ return Hexagon::STd_GP_cdnPt_V4;
+
+ case Hexagon::STd_GP_cNotPt_V4 :
+ return Hexagon::STd_GP_cdnNotPt_V4;
+
+ case Hexagon::STrid_GP_cPt_V4 :
+ return Hexagon::STrid_GP_cdnPt_V4;
+
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ return Hexagon::STrid_GP_cdnNotPt_V4;
+
+ // Store halfword conditionally
+ case Hexagon::STrih_cPt :
+ return Hexagon::STrih_cdnPt_V4;
+
+ case Hexagon::STrih_cNotPt :
+ return Hexagon::STrih_cdnNotPt_V4;
+
+ case Hexagon::STrih_indexed_cPt :
+ return Hexagon::STrih_indexed_cdnPt_V4;
+
+ case Hexagon::STrih_indexed_cNotPt :
+ return Hexagon::STrih_indexed_cdnNotPt_V4;
+
+ case Hexagon::STrih_imm_cPt_V4 :
+ return Hexagon::STrih_imm_cdnPt_V4;
+
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ return Hexagon::STrih_imm_cdnNotPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ return Hexagon::STrih_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ return Hexagon::STrih_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_SThri_cPt :
+ return Hexagon::POST_SThri_cdnPt_V4;
+
+ case Hexagon::POST_SThri_cNotPt :
+ return Hexagon::POST_SThri_cdnNotPt_V4;
+
+ case Hexagon::STh_GP_cPt_V4 :
+ return Hexagon::STh_GP_cdnPt_V4;
+
+ case Hexagon::STh_GP_cNotPt_V4 :
+ return Hexagon::STh_GP_cdnNotPt_V4;
+
+ case Hexagon::STrih_GP_cPt_V4 :
+ return Hexagon::STrih_GP_cdnPt_V4;
+
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ return Hexagon::STrih_GP_cdnNotPt_V4;
+
+ // Store word conditionally
+ case Hexagon::STriw_cPt :
+ return Hexagon::STriw_cdnPt_V4;
+
+ case Hexagon::STriw_cNotPt :
+ return Hexagon::STriw_cdnNotPt_V4;
+
+ case Hexagon::STriw_indexed_cPt :
+ return Hexagon::STriw_indexed_cdnPt_V4;
+
+ case Hexagon::STriw_indexed_cNotPt :
+ return Hexagon::STriw_indexed_cdnNotPt_V4;
+
+ case Hexagon::STriw_imm_cPt_V4 :
+ return Hexagon::STriw_imm_cdnPt_V4;
+
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ return Hexagon::STriw_imm_cdnNotPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ return Hexagon::STriw_indexed_shl_cdnPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ return Hexagon::STriw_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::POST_STwri_cPt :
+ return Hexagon::POST_STwri_cdnPt_V4;
+
+ case Hexagon::POST_STwri_cNotPt :
+ return Hexagon::POST_STwri_cdnNotPt_V4;
+
+ case Hexagon::STw_GP_cPt_V4 :
+ return Hexagon::STw_GP_cdnPt_V4;
+
+ case Hexagon::STw_GP_cNotPt_V4 :
+ return Hexagon::STw_GP_cdnNotPt_V4;
+
+ case Hexagon::STriw_GP_cPt_V4 :
+ return Hexagon::STriw_GP_cdnPt_V4;
+
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ return Hexagon::STriw_GP_cdnNotPt_V4;
+
+ // Condtional Jumps
+ case Hexagon::JMP_c:
+ return Hexagon::JMP_cdnPt;
+
+ case Hexagon::JMP_cNot:
+ return Hexagon::JMP_cdnNotPt;
+
+ case Hexagon::JMPR_cPt:
+ return Hexagon::JMPR_cdnPt_V3;
+
+ case Hexagon::JMPR_cNotPt:
+ return Hexagon::JMPR_cdnNotPt_V3;
+
+ // Conditional Transfers
+ case Hexagon::TFR_cPt:
+ return Hexagon::TFR_cdnPt;
+
+ case Hexagon::TFR_cNotPt:
+ return Hexagon::TFR_cdnNotPt;
+
+ case Hexagon::TFRI_cPt:
+ return Hexagon::TFRI_cdnPt;
+
+ case Hexagon::TFRI_cNotPt:
+ return Hexagon::TFRI_cdnNotPt;
+
+ // Load double word
+ case Hexagon::LDrid_cPt :
+ return Hexagon::LDrid_cdnPt;
+
+ case Hexagon::LDrid_cNotPt :
+ return Hexagon::LDrid_cdnNotPt;
+
+ case Hexagon::LDrid_indexed_cPt :
+ return Hexagon::LDrid_indexed_cdnPt;
+
+ case Hexagon::LDrid_indexed_cNotPt :
+ return Hexagon::LDrid_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrid_cPt :
+ return Hexagon::POST_LDrid_cdnPt_V4;
+
+ case Hexagon::POST_LDrid_cNotPt :
+ return Hexagon::POST_LDrid_cdnNotPt_V4;
+
+ // Load word
+ case Hexagon::LDriw_cPt :
+ return Hexagon::LDriw_cdnPt;
+
+ case Hexagon::LDriw_cNotPt :
+ return Hexagon::LDriw_cdnNotPt;
+
+ case Hexagon::LDriw_indexed_cPt :
+ return Hexagon::LDriw_indexed_cdnPt;
+
+ case Hexagon::LDriw_indexed_cNotPt :
+ return Hexagon::LDriw_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriw_cPt :
+ return Hexagon::POST_LDriw_cdnPt_V4;
+
+ case Hexagon::POST_LDriw_cNotPt :
+ return Hexagon::POST_LDriw_cdnNotPt_V4;
+
+ // Load halfword
+ case Hexagon::LDrih_cPt :
+ return Hexagon::LDrih_cdnPt;
+
+ case Hexagon::LDrih_cNotPt :
+ return Hexagon::LDrih_cdnNotPt;
+
+ case Hexagon::LDrih_indexed_cPt :
+ return Hexagon::LDrih_indexed_cdnPt;
+
+ case Hexagon::LDrih_indexed_cNotPt :
+ return Hexagon::LDrih_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrih_cPt :
+ return Hexagon::POST_LDrih_cdnPt_V4;
+
+ case Hexagon::POST_LDrih_cNotPt :
+ return Hexagon::POST_LDrih_cdnNotPt_V4;
+
+ // Load byte
+ case Hexagon::LDrib_cPt :
+ return Hexagon::LDrib_cdnPt;
+
+ case Hexagon::LDrib_cNotPt :
+ return Hexagon::LDrib_cdnNotPt;
+
+ case Hexagon::LDrib_indexed_cPt :
+ return Hexagon::LDrib_indexed_cdnPt;
+
+ case Hexagon::LDrib_indexed_cNotPt :
+ return Hexagon::LDrib_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDrib_cPt :
+ return Hexagon::POST_LDrib_cdnPt_V4;
+
+ case Hexagon::POST_LDrib_cNotPt :
+ return Hexagon::POST_LDrib_cdnNotPt_V4;
+
+ // Load unsigned halfword
+ case Hexagon::LDriuh_cPt :
+ return Hexagon::LDriuh_cdnPt;
+
+ case Hexagon::LDriuh_cNotPt :
+ return Hexagon::LDriuh_cdnNotPt;
+
+ case Hexagon::LDriuh_indexed_cPt :
+ return Hexagon::LDriuh_indexed_cdnPt;
+
+ case Hexagon::LDriuh_indexed_cNotPt :
+ return Hexagon::LDriuh_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriuh_cPt :
+ return Hexagon::POST_LDriuh_cdnPt_V4;
+
+ case Hexagon::POST_LDriuh_cNotPt :
+ return Hexagon::POST_LDriuh_cdnNotPt_V4;
+
+ // Load unsigned byte
+ case Hexagon::LDriub_cPt :
+ return Hexagon::LDriub_cdnPt;
+
+ case Hexagon::LDriub_cNotPt :
+ return Hexagon::LDriub_cdnNotPt;
+
+ case Hexagon::LDriub_indexed_cPt :
+ return Hexagon::LDriub_indexed_cdnPt;
+
+ case Hexagon::LDriub_indexed_cNotPt :
+ return Hexagon::LDriub_indexed_cdnNotPt;
+
+ case Hexagon::POST_LDriub_cPt :
+ return Hexagon::POST_LDriub_cdnPt_V4;
+
+ case Hexagon::POST_LDriub_cNotPt :
+ return Hexagon::POST_LDriub_cdnNotPt_V4;
+
+ // V4 indexed+scaled load
+
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ return Hexagon::LDrid_indexed_cdnPt_V4;
+
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ return Hexagon::LDrid_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ return Hexagon::LDrib_indexed_cdnPt_V4;
+
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ return Hexagon::LDrib_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ return Hexagon::LDriub_indexed_cdnPt_V4;
+
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ return Hexagon::LDriub_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ return Hexagon::LDrih_indexed_cdnPt_V4;
+
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ return Hexagon::LDrih_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ return Hexagon::LDriuh_indexed_cdnPt_V4;
+
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ return Hexagon::LDriuh_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
+
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ return Hexagon::LDriw_indexed_cdnPt_V4;
+
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ return Hexagon::LDriw_indexed_cdnNotPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cdnPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cdnNotPt_V4;
+
+ // V4 global address load
+
+ case Hexagon::LDd_GP_cPt_V4:
+ return Hexagon::LDd_GP_cdnPt_V4;
+
+ case Hexagon::LDd_GP_cNotPt_V4:
+ return Hexagon::LDd_GP_cdnNotPt_V4;
+
+ case Hexagon::LDb_GP_cPt_V4:
+ return Hexagon::LDb_GP_cdnPt_V4;
+
+ case Hexagon::LDb_GP_cNotPt_V4:
+ return Hexagon::LDb_GP_cdnNotPt_V4;
+
+ case Hexagon::LDub_GP_cPt_V4:
+ return Hexagon::LDub_GP_cdnPt_V4;
+
+ case Hexagon::LDub_GP_cNotPt_V4:
+ return Hexagon::LDub_GP_cdnNotPt_V4;
+
+ case Hexagon::LDh_GP_cPt_V4:
+ return Hexagon::LDh_GP_cdnPt_V4;
+
+ case Hexagon::LDh_GP_cNotPt_V4:
+ return Hexagon::LDh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDuh_GP_cPt_V4:
+ return Hexagon::LDuh_GP_cdnPt_V4;
+
+ case Hexagon::LDuh_GP_cNotPt_V4:
+ return Hexagon::LDuh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDw_GP_cPt_V4:
+ return Hexagon::LDw_GP_cdnPt_V4;
+
+ case Hexagon::LDw_GP_cNotPt_V4:
+ return Hexagon::LDw_GP_cdnNotPt_V4;
+
+ case Hexagon::LDrid_GP_cPt_V4:
+ return Hexagon::LDrid_GP_cdnPt_V4;
+
+ case Hexagon::LDrid_GP_cNotPt_V4:
+ return Hexagon::LDrid_GP_cdnNotPt_V4;
+
+ case Hexagon::LDrib_GP_cPt_V4:
+ return Hexagon::LDrib_GP_cdnPt_V4;
+
+ case Hexagon::LDrib_GP_cNotPt_V4:
+ return Hexagon::LDrib_GP_cdnNotPt_V4;
+
+ case Hexagon::LDriub_GP_cPt_V4:
+ return Hexagon::LDriub_GP_cdnPt_V4;
+
+ case Hexagon::LDriub_GP_cNotPt_V4:
+ return Hexagon::LDriub_GP_cdnNotPt_V4;
+
+ case Hexagon::LDrih_GP_cPt_V4:
+ return Hexagon::LDrih_GP_cdnPt_V4;
+
+ case Hexagon::LDrih_GP_cNotPt_V4:
+ return Hexagon::LDrih_GP_cdnNotPt_V4;
+
+ case Hexagon::LDriuh_GP_cPt_V4:
+ return Hexagon::LDriuh_GP_cdnPt_V4;
+
+ case Hexagon::LDriuh_GP_cNotPt_V4:
+ return Hexagon::LDriuh_GP_cdnNotPt_V4;
+
+ case Hexagon::LDriw_GP_cPt_V4:
+ return Hexagon::LDriw_GP_cdnPt_V4;
+
+ case Hexagon::LDriw_GP_cNotPt_V4:
+ return Hexagon::LDriw_GP_cdnNotPt_V4;
+
+ // Conditional store new-value byte
+ case Hexagon::STrib_cPt_nv_V4 :
+ return Hexagon::STrib_cdnPt_nv_V4;
+ case Hexagon::STrib_cNotPt_nv_V4 :
+ return Hexagon::STrib_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_cPt_nv_V4 :
+ return Hexagon::STrib_indexed_cdnPt_nv_V4;
+ case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STbri_cPt_nv_V4 :
+ return Hexagon::POST_STbri_cdnPt_nv_V4;
+ case Hexagon::POST_STbri_cNotPt_nv_V4 :
+ return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+ case Hexagon::STb_GP_cPt_nv_V4 :
+ return Hexagon::STb_GP_cdnPt_nv_V4;
+
+ case Hexagon::STb_GP_cNotPt_nv_V4 :
+ return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrib_GP_cPt_nv_V4 :
+ return Hexagon::STrib_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrib_GP_cNotPt_nv_V4 :
+ return Hexagon::STrib_GP_cdnNotPt_nv_V4;
+
+ // Conditional store new-value halfword
+ case Hexagon::STrih_cPt_nv_V4 :
+ return Hexagon::STrih_cdnPt_nv_V4;
+ case Hexagon::STrih_cNotPt_nv_V4 :
+ return Hexagon::STrih_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_cPt_nv_V4 :
+ return Hexagon::STrih_indexed_cdnPt_nv_V4;
+ case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+ return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_SThri_cPt_nv_V4 :
+ return Hexagon::POST_SThri_cdnPt_nv_V4;
+ case Hexagon::POST_SThri_cNotPt_nv_V4 :
+ return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+ case Hexagon::STh_GP_cPt_nv_V4 :
+ return Hexagon::STh_GP_cdnPt_nv_V4;
+
+ case Hexagon::STh_GP_cNotPt_nv_V4 :
+ return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STrih_GP_cPt_nv_V4 :
+ return Hexagon::STrih_GP_cdnPt_nv_V4;
+
+ case Hexagon::STrih_GP_cNotPt_nv_V4 :
+ return Hexagon::STrih_GP_cdnNotPt_nv_V4;
+
+ // Conditional store new-value word
+ case Hexagon::STriw_cPt_nv_V4 :
+ return Hexagon::STriw_cdnPt_nv_V4;
+ case Hexagon::STriw_cNotPt_nv_V4 :
+ return Hexagon::STriw_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_cPt_nv_V4 :
+ return Hexagon::STriw_indexed_cdnPt_nv_V4;
+ case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+ return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+ return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+ return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+ case Hexagon::POST_STwri_cPt_nv_V4 :
+ return Hexagon::POST_STwri_cdnPt_nv_V4;
+ case Hexagon::POST_STwri_cNotPt_nv_V4:
+ return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+ case Hexagon::STw_GP_cPt_nv_V4 :
+ return Hexagon::STw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STw_GP_cNotPt_nv_V4 :
+ return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+ case Hexagon::STriw_GP_cPt_nv_V4 :
+ return Hexagon::STriw_GP_cdnPt_nv_V4;
+
+ case Hexagon::STriw_GP_cNotPt_nv_V4 :
+ return Hexagon::STriw_GP_cdnNotPt_nv_V4;
+
+ // Conditional add
+ case Hexagon::ADD_ri_cPt :
+ return Hexagon::ADD_ri_cdnPt;
+ case Hexagon::ADD_ri_cNotPt :
+ return Hexagon::ADD_ri_cdnNotPt;
+
+ case Hexagon::ADD_rr_cPt :
+ return Hexagon::ADD_rr_cdnPt;
+ case Hexagon::ADD_rr_cNotPt :
+ return Hexagon::ADD_rr_cdnNotPt;
+
+ // Conditional logical Operations
+ case Hexagon::XOR_rr_cPt :
+ return Hexagon::XOR_rr_cdnPt;
+ case Hexagon::XOR_rr_cNotPt :
+ return Hexagon::XOR_rr_cdnNotPt;
+
+ case Hexagon::AND_rr_cPt :
+ return Hexagon::AND_rr_cdnPt;
+ case Hexagon::AND_rr_cNotPt :
+ return Hexagon::AND_rr_cdnNotPt;
+
+ case Hexagon::OR_rr_cPt :
+ return Hexagon::OR_rr_cdnPt;
+ case Hexagon::OR_rr_cNotPt :
+ return Hexagon::OR_rr_cdnNotPt;
+
+ // Conditional Subtract
+ case Hexagon::SUB_rr_cPt :
+ return Hexagon::SUB_rr_cdnPt;
+ case Hexagon::SUB_rr_cNotPt :
+ return Hexagon::SUB_rr_cdnNotPt;
+
+ // Conditional combine
+ case Hexagon::COMBINE_rr_cPt :
+ return Hexagon::COMBINE_rr_cdnPt;
+ case Hexagon::COMBINE_rr_cNotPt :
+ return Hexagon::COMBINE_rr_cdnNotPt;
+
+ case Hexagon::ASLH_cPt_V4 :
+ return Hexagon::ASLH_cdnPt_V4;
+ case Hexagon::ASLH_cNotPt_V4 :
+ return Hexagon::ASLH_cdnNotPt_V4;
+
+ case Hexagon::ASRH_cPt_V4 :
+ return Hexagon::ASRH_cdnPt_V4;
+ case Hexagon::ASRH_cNotPt_V4 :
+ return Hexagon::ASRH_cdnNotPt_V4;
+
+ case Hexagon::SXTB_cPt_V4 :
+ return Hexagon::SXTB_cdnPt_V4;
+ case Hexagon::SXTB_cNotPt_V4 :
+ return Hexagon::SXTB_cdnNotPt_V4;
+
+ case Hexagon::SXTH_cPt_V4 :
+ return Hexagon::SXTH_cdnPt_V4;
+ case Hexagon::SXTH_cNotPt_V4 :
+ return Hexagon::SXTH_cdnNotPt_V4;
+
+ case Hexagon::ZXTB_cPt_V4 :
+ return Hexagon::ZXTB_cdnPt_V4;
+ case Hexagon::ZXTB_cNotPt_V4 :
+ return Hexagon::ZXTB_cdnNotPt_V4;
+
+ case Hexagon::ZXTH_cPt_V4 :
+ return Hexagon::ZXTH_cdnPt_V4;
+ case Hexagon::ZXTH_cNotPt_V4 :
+ return Hexagon::ZXTH_cdnNotPt_V4;
+ }
+}
+
+// Returns true if an instruction can be promoted to .new predicate
+// or new-value store.
+bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
+ if ( isCondInst(MI) || IsNewifyStore(MI))
+ return true;
+ else
+ return false;
+}
+
+bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const MCInstrDesc& TID = MI->getDesc();
+ // bug 5670: until that is fixed,
+ // this portion is disabled.
+ if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) ||
+ || QII->isConditionalTransfer(MI)
+ || QII->isConditionalALU32(MI)
+ || QII->isConditionalLoad(MI)
+ || QII->isConditionalStore(MI)) {
+ return true;
+ }
+ return false;
+}
+
+
+// Promote an instructiont to its .new form.
+// At this time, we have already made a call to CanPromoteToDotNew
+// and made sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+
+ assert (DepType == SDep::Data);
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ int NewOpcode;
+ if (RC == &Hexagon::PredRegsRegClass)
+ NewOpcode = GetDotNewPredOp(MI->getOpcode());
+ else
+ NewOpcode = GetDotNewOp(MI->getOpcode());
+ MI->setDesc(QII->get(NewOpcode));
+
+ return true;
+}
+
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new --->
+// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1 --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+// if (p0.new) R2 = add(R3, R4)
+// R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+static int GetDotOldOp(const int opc) {
+ switch (opc) {
+ default: llvm_unreachable("Unknown .old type");
+ case Hexagon::TFR_cdnPt:
+ return Hexagon::TFR_cPt;
+
+ case Hexagon::TFR_cdnNotPt:
+ return Hexagon::TFR_cNotPt;
+
+ case Hexagon::TFRI_cdnPt:
+ return Hexagon::TFRI_cPt;
+
+ case Hexagon::TFRI_cdnNotPt:
+ return Hexagon::TFRI_cNotPt;
+
+ case Hexagon::JMP_cdnPt:
+ return Hexagon::JMP_c;
+
+ case Hexagon::JMP_cdnNotPt:
+ return Hexagon::JMP_cNot;
+
+ case Hexagon::JMPR_cdnPt_V3:
+ return Hexagon::JMPR_cPt;
+
+ case Hexagon::JMPR_cdnNotPt_V3:
+ return Hexagon::JMPR_cNotPt;
+
+ // Load double word
+
+ case Hexagon::LDrid_cdnPt :
+ return Hexagon::LDrid_cPt;
+
+ case Hexagon::LDrid_cdnNotPt :
+ return Hexagon::LDrid_cNotPt;
+
+ case Hexagon::LDrid_indexed_cdnPt :
+ return Hexagon::LDrid_indexed_cPt;
+
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ return Hexagon::LDrid_indexed_cNotPt;
+
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ return Hexagon::POST_LDrid_cPt;
+
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ return Hexagon::POST_LDrid_cNotPt;
+
+ // Load word
+
+ case Hexagon::LDriw_cdnPt :
+ return Hexagon::LDriw_cPt;
+
+ case Hexagon::LDriw_cdnNotPt :
+ return Hexagon::LDriw_cNotPt;
+
+ case Hexagon::LDriw_indexed_cdnPt :
+ return Hexagon::LDriw_indexed_cPt;
+
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ return Hexagon::LDriw_indexed_cNotPt;
+
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ return Hexagon::POST_LDriw_cPt;
+
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ return Hexagon::POST_LDriw_cNotPt;
+
+ // Load half
+
+ case Hexagon::LDrih_cdnPt :
+ return Hexagon::LDrih_cPt;
+
+ case Hexagon::LDrih_cdnNotPt :
+ return Hexagon::LDrih_cNotPt;
+
+ case Hexagon::LDrih_indexed_cdnPt :
+ return Hexagon::LDrih_indexed_cPt;
+
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ return Hexagon::LDrih_indexed_cNotPt;
+
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ return Hexagon::POST_LDrih_cPt;
+
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ return Hexagon::POST_LDrih_cNotPt;
+
+ // Load byte
+
+ case Hexagon::LDrib_cdnPt :
+ return Hexagon::LDrib_cPt;
+
+ case Hexagon::LDrib_cdnNotPt :
+ return Hexagon::LDrib_cNotPt;
+
+ case Hexagon::LDrib_indexed_cdnPt :
+ return Hexagon::LDrib_indexed_cPt;
+
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ return Hexagon::LDrib_indexed_cNotPt;
+
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ return Hexagon::POST_LDrib_cPt;
+
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ return Hexagon::POST_LDrib_cNotPt;
+
+ // Load unsigned half
+
+ case Hexagon::LDriuh_cdnPt :
+ return Hexagon::LDriuh_cPt;
+
+ case Hexagon::LDriuh_cdnNotPt :
+ return Hexagon::LDriuh_cNotPt;
+
+ case Hexagon::LDriuh_indexed_cdnPt :
+ return Hexagon::LDriuh_indexed_cPt;
+
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ return Hexagon::LDriuh_indexed_cNotPt;
+
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ return Hexagon::POST_LDriuh_cPt;
+
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ return Hexagon::POST_LDriuh_cNotPt;
+
+ // Load unsigned byte
+ case Hexagon::LDriub_cdnPt :
+ return Hexagon::LDriub_cPt;
+
+ case Hexagon::LDriub_cdnNotPt :
+ return Hexagon::LDriub_cNotPt;
+
+ case Hexagon::LDriub_indexed_cdnPt :
+ return Hexagon::LDriub_indexed_cPt;
+
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ return Hexagon::LDriub_indexed_cNotPt;
+
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ return Hexagon::POST_LDriub_cPt;
+
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ return Hexagon::POST_LDriub_cNotPt;
+
+ // V4 indexed+scaled Load
+
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ return Hexagon::LDrid_indexed_cPt_V4;
+
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ return Hexagon::LDrid_indexed_cNotPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ return Hexagon::LDrib_indexed_cPt_V4;
+
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ return Hexagon::LDrib_indexed_cNotPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ return Hexagon::LDriub_indexed_cPt_V4;
+
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ return Hexagon::LDriub_indexed_cNotPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ return Hexagon::LDrih_indexed_cPt_V4;
+
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ return Hexagon::LDrih_indexed_cNotPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ return Hexagon::LDriuh_indexed_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ return Hexagon::LDriuh_indexed_cNotPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ return Hexagon::LDriw_indexed_cPt_V4;
+
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ return Hexagon::LDriw_indexed_cNotPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+
+ // V4 global address load
+
+ case Hexagon::LDd_GP_cdnPt_V4:
+ return Hexagon::LDd_GP_cPt_V4;
+
+ case Hexagon::LDd_GP_cdnNotPt_V4:
+ return Hexagon::LDd_GP_cNotPt_V4;
+
+ case Hexagon::LDb_GP_cdnPt_V4:
+ return Hexagon::LDb_GP_cPt_V4;
+
+ case Hexagon::LDb_GP_cdnNotPt_V4:
+ return Hexagon::LDb_GP_cNotPt_V4;
+
+ case Hexagon::LDub_GP_cdnPt_V4:
+ return Hexagon::LDub_GP_cPt_V4;
+
+ case Hexagon::LDub_GP_cdnNotPt_V4:
+ return Hexagon::LDub_GP_cNotPt_V4;
+
+ case Hexagon::LDh_GP_cdnPt_V4:
+ return Hexagon::LDh_GP_cPt_V4;
+
+ case Hexagon::LDh_GP_cdnNotPt_V4:
+ return Hexagon::LDh_GP_cNotPt_V4;
+
+ case Hexagon::LDuh_GP_cdnPt_V4:
+ return Hexagon::LDuh_GP_cPt_V4;
+
+ case Hexagon::LDuh_GP_cdnNotPt_V4:
+ return Hexagon::LDuh_GP_cNotPt_V4;
+
+ case Hexagon::LDw_GP_cdnPt_V4:
+ return Hexagon::LDw_GP_cPt_V4;
+
+ case Hexagon::LDw_GP_cdnNotPt_V4:
+ return Hexagon::LDw_GP_cNotPt_V4;
+
+ case Hexagon::LDrid_GP_cdnPt_V4:
+ return Hexagon::LDrid_GP_cPt_V4;
+
+ case Hexagon::LDrid_GP_cdnNotPt_V4:
+ return Hexagon::LDrid_GP_cNotPt_V4;
+
+ case Hexagon::LDrib_GP_cdnPt_V4:
+ return Hexagon::LDrib_GP_cPt_V4;
+
+ case Hexagon::LDrib_GP_cdnNotPt_V4:
+ return Hexagon::LDrib_GP_cNotPt_V4;
+
+ case Hexagon::LDriub_GP_cdnPt_V4:
+ return Hexagon::LDriub_GP_cPt_V4;
+
+ case Hexagon::LDriub_GP_cdnNotPt_V4:
+ return Hexagon::LDriub_GP_cNotPt_V4;
+
+ case Hexagon::LDrih_GP_cdnPt_V4:
+ return Hexagon::LDrih_GP_cPt_V4;
+
+ case Hexagon::LDrih_GP_cdnNotPt_V4:
+ return Hexagon::LDrih_GP_cNotPt_V4;
+
+ case Hexagon::LDriuh_GP_cdnPt_V4:
+ return Hexagon::LDriuh_GP_cPt_V4;
+
+ case Hexagon::LDriuh_GP_cdnNotPt_V4:
+ return Hexagon::LDriuh_GP_cNotPt_V4;
+
+ case Hexagon::LDriw_GP_cdnPt_V4:
+ return Hexagon::LDriw_GP_cPt_V4;
+
+ case Hexagon::LDriw_GP_cdnNotPt_V4:
+ return Hexagon::LDriw_GP_cNotPt_V4;
+
+ // Conditional add
+
+ case Hexagon::ADD_ri_cdnPt :
+ return Hexagon::ADD_ri_cPt;
+ case Hexagon::ADD_ri_cdnNotPt :
+ return Hexagon::ADD_ri_cNotPt;
+
+ case Hexagon::ADD_rr_cdnPt :
+ return Hexagon::ADD_rr_cPt;
+ case Hexagon::ADD_rr_cdnNotPt:
+ return Hexagon::ADD_rr_cNotPt;
+
+ // Conditional logical Operations
+
+ case Hexagon::XOR_rr_cdnPt :
+ return Hexagon::XOR_rr_cPt;
+ case Hexagon::XOR_rr_cdnNotPt :
+ return Hexagon::XOR_rr_cNotPt;
+
+ case Hexagon::AND_rr_cdnPt :
+ return Hexagon::AND_rr_cPt;
+ case Hexagon::AND_rr_cdnNotPt :
+ return Hexagon::AND_rr_cNotPt;
+
+ case Hexagon::OR_rr_cdnPt :
+ return Hexagon::OR_rr_cPt;
+ case Hexagon::OR_rr_cdnNotPt :
+ return Hexagon::OR_rr_cNotPt;
+
+ // Conditional Subtract
+
+ case Hexagon::SUB_rr_cdnPt :
+ return Hexagon::SUB_rr_cPt;
+ case Hexagon::SUB_rr_cdnNotPt :
+ return Hexagon::SUB_rr_cNotPt;
+
+ // Conditional combine
+
+ case Hexagon::COMBINE_rr_cdnPt :
+ return Hexagon::COMBINE_rr_cPt;
+ case Hexagon::COMBINE_rr_cdnNotPt :
+ return Hexagon::COMBINE_rr_cNotPt;
+
+// Conditional shift operations
+
+ case Hexagon::ASLH_cdnPt_V4 :
+ return Hexagon::ASLH_cPt_V4;
+ case Hexagon::ASLH_cdnNotPt_V4 :
+ return Hexagon::ASLH_cNotPt_V4;
+
+ case Hexagon::ASRH_cdnPt_V4 :
+ return Hexagon::ASRH_cPt_V4;
+ case Hexagon::ASRH_cdnNotPt_V4 :
+ return Hexagon::ASRH_cNotPt_V4;
+
+ case Hexagon::SXTB_cdnPt_V4 :
+ return Hexagon::SXTB_cPt_V4;
+ case Hexagon::SXTB_cdnNotPt_V4 :
+ return Hexagon::SXTB_cNotPt_V4;
+
+ case Hexagon::SXTH_cdnPt_V4 :
+ return Hexagon::SXTH_cPt_V4;
+ case Hexagon::SXTH_cdnNotPt_V4 :
+ return Hexagon::SXTH_cNotPt_V4;
+
+ case Hexagon::ZXTB_cdnPt_V4 :
+ return Hexagon::ZXTB_cPt_V4;
+ case Hexagon::ZXTB_cdnNotPt_V4 :
+ return Hexagon::ZXTB_cNotPt_V4;
+
+ case Hexagon::ZXTH_cdnPt_V4 :
+ return Hexagon::ZXTH_cPt_V4;
+ case Hexagon::ZXTH_cdnNotPt_V4 :
+ return Hexagon::ZXTH_cNotPt_V4;
+
+ // Store byte
+
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ return Hexagon::STrib_imm_cPt_V4;
+
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ return Hexagon::STrib_imm_cNotPt_V4;
+
+ case Hexagon::STrib_cdnPt_nv_V4 :
+ case Hexagon::STrib_cPt_nv_V4 :
+ case Hexagon::STrib_cdnPt_V4 :
+ return Hexagon::STrib_cPt;
+
+ case Hexagon::STrib_cdnNotPt_nv_V4 :
+ case Hexagon::STrib_cNotPt_nv_V4 :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ return Hexagon::STrib_cNotPt;
+
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cPt_nv_V4 :
+ case Hexagon::STrib_indexed_cdnPt_nv_V4 :
+ return Hexagon::STrib_indexed_cPt;
+
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+ case Hexagon::STrib_indexed_cdnNotPt_nv_V4 :
+ return Hexagon::STrib_indexed_cNotPt;
+
+ case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrib_indexed_shl_cPt_V4;
+
+ case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+ case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrib_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STbri_cdnPt_nv_V4 :
+ case Hexagon::POST_STbri_cPt_nv_V4 :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ return Hexagon::POST_STbri_cPt;
+
+ case Hexagon::POST_STbri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_STbri_cNotPt_nv_V4:
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ return Hexagon::POST_STbri_cNotPt;
+
+ case Hexagon::STb_GP_cdnPt_nv_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cPt_nv_V4:
+ return Hexagon::STb_GP_cPt_V4;
+
+ case Hexagon::STb_GP_cdnNotPt_nv_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STb_GP_cNotPt_nv_V4:
+ return Hexagon::STb_GP_cNotPt_V4;
+
+ case Hexagon::STrib_GP_cdnPt_nv_V4:
+ case Hexagon::STrib_GP_cdnPt_V4:
+ case Hexagon::STrib_GP_cPt_nv_V4:
+ return Hexagon::STrib_GP_cPt_V4;
+
+ case Hexagon::STrib_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+ case Hexagon::STrib_GP_cNotPt_nv_V4:
+ return Hexagon::STrib_GP_cNotPt_V4;
+
+ // Store new-value byte - unconditional
+ case Hexagon::STrib_nv_V4:
+ return Hexagon::STrib;
+
+ case Hexagon::STrib_indexed_nv_V4:
+ return Hexagon::STrib_indexed;
+
+ case Hexagon::STrib_indexed_shl_nv_V4:
+ return Hexagon::STrib_indexed_shl_V4;
+
+ case Hexagon::STrib_shl_nv_V4:
+ return Hexagon::STrib_shl_V4;
+
+ case Hexagon::STrib_GP_nv_V4:
+ return Hexagon::STrib_GP_V4;
+
+ case Hexagon::STb_GP_nv_V4:
+ return Hexagon::STb_GP_V4;
+
+ case Hexagon::POST_STbri_nv_V4:
+ return Hexagon::POST_STbri;
+
+ // Store halfword
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ return Hexagon::STrih_imm_cPt_V4;
+
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ return Hexagon::STrih_imm_cNotPt_V4;
+
+ case Hexagon::STrih_cdnPt_nv_V4 :
+ case Hexagon::STrih_cPt_nv_V4 :
+ case Hexagon::STrih_cdnPt_V4 :
+ return Hexagon::STrih_cPt;
+
+ case Hexagon::STrih_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_cNotPt_nv_V4 :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ return Hexagon::STrih_cNotPt;
+
+ case Hexagon::STrih_indexed_cdnPt_nv_V4:
+ case Hexagon::STrih_indexed_cPt_nv_V4 :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ return Hexagon::STrih_indexed_cPt;
+
+ case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+ case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ return Hexagon::STrih_indexed_cNotPt;
+
+ case Hexagon::STrih_indexed_shl_cdnPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrih_indexed_shl_cPt_V4;
+
+ case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrih_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_SThri_cdnPt_nv_V4 :
+ case Hexagon::POST_SThri_cPt_nv_V4 :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ return Hexagon::POST_SThri_cPt;
+
+ case Hexagon::POST_SThri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_SThri_cNotPt_nv_V4 :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+ return Hexagon::POST_SThri_cNotPt;
+
+ case Hexagon::STh_GP_cdnPt_nv_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cPt_nv_V4:
+ return Hexagon::STh_GP_cPt_V4;
+
+ case Hexagon::STh_GP_cdnNotPt_nv_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STh_GP_cNotPt_nv_V4:
+ return Hexagon::STh_GP_cNotPt_V4;
+
+ case Hexagon::STrih_GP_cdnPt_nv_V4:
+ case Hexagon::STrih_GP_cdnPt_V4:
+ case Hexagon::STrih_GP_cPt_nv_V4:
+ return Hexagon::STrih_GP_cPt_V4;
+
+ case Hexagon::STrih_GP_cdnNotPt_nv_V4:
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+ case Hexagon::STrih_GP_cNotPt_nv_V4:
+ return Hexagon::STrih_GP_cNotPt_V4;
+
+ // Store new-value halfword - unconditional
+
+ case Hexagon::STrih_nv_V4:
+ return Hexagon::STrih;
+
+ case Hexagon::STrih_indexed_nv_V4:
+ return Hexagon::STrih_indexed;
+
+ case Hexagon::STrih_indexed_shl_nv_V4:
+ return Hexagon::STrih_indexed_shl_V4;
+
+ case Hexagon::STrih_shl_nv_V4:
+ return Hexagon::STrih_shl_V4;
+
+ case Hexagon::STrih_GP_nv_V4:
+ return Hexagon::STrih_GP_V4;
+
+ case Hexagon::STh_GP_nv_V4:
+ return Hexagon::STh_GP_V4;
+
+ case Hexagon::POST_SThri_nv_V4:
+ return Hexagon::POST_SThri;
+
+ // Store word
+
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ return Hexagon::STriw_imm_cPt_V4;
+
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ return Hexagon::STriw_imm_cNotPt_V4;
+
+ case Hexagon::STriw_cdnPt_nv_V4 :
+ case Hexagon::STriw_cPt_nv_V4 :
+ case Hexagon::STriw_cdnPt_V4 :
+ return Hexagon::STriw_cPt;
+
+ case Hexagon::STriw_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_cNotPt_nv_V4 :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ return Hexagon::STriw_cNotPt;
+
+ case Hexagon::STriw_indexed_cdnPt_nv_V4 :
+ case Hexagon::STriw_indexed_cPt_nv_V4 :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ return Hexagon::STriw_indexed_cPt;
+
+ case Hexagon::STriw_indexed_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ return Hexagon::STriw_indexed_cNotPt;
+
+ case Hexagon::STriw_indexed_shl_cdnPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ return Hexagon::STriw_indexed_shl_cPt_V4;
+
+ case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STriw_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STwri_cdnPt_nv_V4 :
+ case Hexagon::POST_STwri_cPt_nv_V4 :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ return Hexagon::POST_STwri_cPt;
+
+ case Hexagon::POST_STwri_cdnNotPt_nv_V4 :
+ case Hexagon::POST_STwri_cNotPt_nv_V4 :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+ return Hexagon::POST_STwri_cNotPt;
+
+ case Hexagon::STw_GP_cdnPt_nv_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cPt_nv_V4:
+ return Hexagon::STw_GP_cPt_V4;
+
+ case Hexagon::STw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ case Hexagon::STw_GP_cNotPt_nv_V4:
+ return Hexagon::STw_GP_cNotPt_V4;
+
+ case Hexagon::STriw_GP_cdnPt_nv_V4:
+ case Hexagon::STriw_GP_cdnPt_V4:
+ case Hexagon::STriw_GP_cPt_nv_V4:
+ return Hexagon::STriw_GP_cPt_V4;
+
+ case Hexagon::STriw_GP_cdnNotPt_nv_V4:
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ case Hexagon::STriw_GP_cNotPt_nv_V4:
+ return Hexagon::STriw_GP_cNotPt_V4;
+
+ // Store new-value word - unconditional
+
+ case Hexagon::STriw_nv_V4:
+ return Hexagon::STriw;
+
+ case Hexagon::STriw_indexed_nv_V4:
+ return Hexagon::STriw_indexed;
+
+ case Hexagon::STriw_indexed_shl_nv_V4:
+ return Hexagon::STriw_indexed_shl_V4;
+
+ case Hexagon::STriw_shl_nv_V4:
+ return Hexagon::STriw_shl_V4;
+
+ case Hexagon::STriw_GP_nv_V4:
+ return Hexagon::STriw_GP_V4;
+
+ case Hexagon::STw_GP_nv_V4:
+ return Hexagon::STw_GP_V4;
+
+ case Hexagon::POST_STwri_nv_V4:
+ return Hexagon::POST_STwri;
+
+ // Store doubleword
+
+ case Hexagon::STrid_cdnPt_V4 :
+ return Hexagon::STrid_cPt;
+
+ case Hexagon::STrid_cdnNotPt_V4 :
+ return Hexagon::STrid_cNotPt;
+
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ return Hexagon::STrid_indexed_cPt;
+
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ return Hexagon::STrid_indexed_cNotPt;
+
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ return Hexagon::STrid_indexed_shl_cPt_V4;
+
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ return Hexagon::STrid_indexed_shl_cNotPt_V4;
+
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ return Hexagon::POST_STdri_cPt;
+
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+ return Hexagon::POST_STdri_cNotPt;
+
+ case Hexagon::STd_GP_cdnPt_V4 :
+ return Hexagon::STd_GP_cPt_V4;
+
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ return Hexagon::STd_GP_cNotPt_V4;
+
+ case Hexagon::STrid_GP_cdnPt_V4 :
+ return Hexagon::STrid_GP_cPt_V4;
+
+ case Hexagon::STrid_GP_cdnNotPt_V4 :
+ return Hexagon::STrid_GP_cNotPt_V4;
+ }
+}
+
+bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ int NewOpcode = GetDotOldOp(MI->getOpcode());
+ MI->setDesc(QII->get(NewOpcode));
+ return true;
+}
+
+// Returns true if an instruction is predicated on p0 and false if it's
+// predicated on !p0.
+
+static bool GetPredicateSense(MachineInstr* MI,
+ const HexagonInstrInfo *QII) {
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown predicate sense of the instruction");
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::STrib_cPt :
+ case Hexagon::STrib_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cPt :
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_shl_cPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STbri_cPt :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ case Hexagon::STrih_cPt :
+ case Hexagon::STrih_cdnPt_V4 :
+ case Hexagon::STrih_indexed_cPt :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ case Hexagon::STrih_indexed_shl_cPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_SThri_cPt :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ case Hexagon::STriw_cPt :
+ case Hexagon::STriw_cdnPt_V4 :
+ case Hexagon::STriw_indexed_cPt :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ case Hexagon::STriw_indexed_shl_cPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STwri_cPt :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ case Hexagon::STrib_imm_cPt_V4 :
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ case Hexagon::STrid_cPt :
+ case Hexagon::STrid_cdnPt_V4 :
+ case Hexagon::STrid_indexed_cPt :
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ case Hexagon::STrid_indexed_shl_cPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::POST_STdri_cPt :
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ case Hexagon::STrih_imm_cPt_V4 :
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ case Hexagon::STriw_imm_cPt_V4 :
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ case Hexagon::JMP_cdnPt :
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::ADD_ri_cPt :
+ case Hexagon::ADD_ri_cdnPt :
+ case Hexagon::ADD_rr_cPt :
+ case Hexagon::ADD_rr_cdnPt :
+ case Hexagon::XOR_rr_cPt :
+ case Hexagon::XOR_rr_cdnPt :
+ case Hexagon::AND_rr_cPt :
+ case Hexagon::AND_rr_cdnPt :
+ case Hexagon::OR_rr_cPt :
+ case Hexagon::OR_rr_cdnPt :
+ case Hexagon::SUB_rr_cPt :
+ case Hexagon::SUB_rr_cdnPt :
+ case Hexagon::COMBINE_rr_cPt :
+ case Hexagon::COMBINE_rr_cdnPt :
+ case Hexagon::ASLH_cPt_V4 :
+ case Hexagon::ASLH_cdnPt_V4 :
+ case Hexagon::ASRH_cPt_V4 :
+ case Hexagon::ASRH_cdnPt_V4 :
+ case Hexagon::SXTB_cPt_V4 :
+ case Hexagon::SXTB_cdnPt_V4 :
+ case Hexagon::SXTH_cPt_V4 :
+ case Hexagon::SXTH_cdnPt_V4 :
+ case Hexagon::ZXTB_cPt_V4 :
+ case Hexagon::ZXTB_cdnPt_V4 :
+ case Hexagon::ZXTH_cPt_V4 :
+ case Hexagon::ZXTH_cdnPt_V4 :
+ case Hexagon::LDrid_GP_cPt_V4 :
+ case Hexagon::LDrib_GP_cPt_V4 :
+ case Hexagon::LDriub_GP_cPt_V4 :
+ case Hexagon::LDrih_GP_cPt_V4 :
+ case Hexagon::LDriuh_GP_cPt_V4 :
+ case Hexagon::LDriw_GP_cPt_V4 :
+ case Hexagon::LDd_GP_cPt_V4 :
+ case Hexagon::LDb_GP_cPt_V4 :
+ case Hexagon::LDub_GP_cPt_V4 :
+ case Hexagon::LDh_GP_cPt_V4 :
+ case Hexagon::LDuh_GP_cPt_V4 :
+ case Hexagon::LDw_GP_cPt_V4 :
+ case Hexagon::STrid_GP_cPt_V4 :
+ case Hexagon::STrib_GP_cPt_V4 :
+ case Hexagon::STrih_GP_cPt_V4 :
+ case Hexagon::STriw_GP_cPt_V4 :
+ case Hexagon::STd_GP_cPt_V4 :
+ case Hexagon::STb_GP_cPt_V4 :
+ case Hexagon::STh_GP_cPt_V4 :
+ case Hexagon::STw_GP_cPt_V4 :
+ case Hexagon::LDrid_GP_cdnPt_V4 :
+ case Hexagon::LDrib_GP_cdnPt_V4 :
+ case Hexagon::LDriub_GP_cdnPt_V4 :
+ case Hexagon::LDrih_GP_cdnPt_V4 :
+ case Hexagon::LDriuh_GP_cdnPt_V4 :
+ case Hexagon::LDriw_GP_cdnPt_V4 :
+ case Hexagon::LDd_GP_cdnPt_V4 :
+ case Hexagon::LDb_GP_cdnPt_V4 :
+ case Hexagon::LDub_GP_cdnPt_V4 :
+ case Hexagon::LDh_GP_cdnPt_V4 :
+ case Hexagon::LDuh_GP_cdnPt_V4 :
+ case Hexagon::LDw_GP_cdnPt_V4 :
+ case Hexagon::STrid_GP_cdnPt_V4 :
+ case Hexagon::STrib_GP_cdnPt_V4 :
+ case Hexagon::STrih_GP_cdnPt_V4 :
+ case Hexagon::STriw_GP_cdnPt_V4 :
+ case Hexagon::STd_GP_cdnPt_V4 :
+ case Hexagon::STb_GP_cdnPt_V4 :
+ case Hexagon::STh_GP_cdnPt_V4 :
+ case Hexagon::STw_GP_cdnPt_V4 :
+ return true;
+
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFRI_cdnNotPt:
+ case Hexagon::STrib_cNotPt :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cNotPt :
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STbri_cNotPt :
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ case Hexagon::STrih_cNotPt :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_cNotPt :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_SThri_cNotPt :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+ case Hexagon::STriw_cNotPt :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_cNotPt :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STwri_cNotPt :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+ case Hexagon::STrib_imm_cNotPt_V4 :
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ case Hexagon::STrid_cNotPt :
+ case Hexagon::STrid_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_cNotPt :
+ case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STdri_cNotPt :
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+ case Hexagon::STrih_imm_cNotPt_V4 :
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ case Hexagon::STriw_imm_cNotPt_V4 :
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ case Hexagon::JMP_cdnNotPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::ADD_ri_cNotPt :
+ case Hexagon::ADD_ri_cdnNotPt :
+ case Hexagon::ADD_rr_cNotPt :
+ case Hexagon::ADD_rr_cdnNotPt :
+ case Hexagon::XOR_rr_cNotPt :
+ case Hexagon::XOR_rr_cdnNotPt :
+ case Hexagon::AND_rr_cNotPt :
+ case Hexagon::AND_rr_cdnNotPt :
+ case Hexagon::OR_rr_cNotPt :
+ case Hexagon::OR_rr_cdnNotPt :
+ case Hexagon::SUB_rr_cNotPt :
+ case Hexagon::SUB_rr_cdnNotPt :
+ case Hexagon::COMBINE_rr_cNotPt :
+ case Hexagon::COMBINE_rr_cdnNotPt :
+ case Hexagon::ASLH_cNotPt_V4 :
+ case Hexagon::ASLH_cdnNotPt_V4 :
+ case Hexagon::ASRH_cNotPt_V4 :
+ case Hexagon::ASRH_cdnNotPt_V4 :
+ case Hexagon::SXTB_cNotPt_V4 :
+ case Hexagon::SXTB_cdnNotPt_V4 :
+ case Hexagon::SXTH_cNotPt_V4 :
+ case Hexagon::SXTH_cdnNotPt_V4 :
+ case Hexagon::ZXTB_cNotPt_V4 :
+ case Hexagon::ZXTB_cdnNotPt_V4 :
+ case Hexagon::ZXTH_cNotPt_V4 :
+ case Hexagon::ZXTH_cdnNotPt_V4 :
+
+ case Hexagon::LDrid_GP_cNotPt_V4 :
+ case Hexagon::LDrib_GP_cNotPt_V4 :
+ case Hexagon::LDriub_GP_cNotPt_V4 :
+ case Hexagon::LDrih_GP_cNotPt_V4 :
+ case Hexagon::LDriuh_GP_cNotPt_V4 :
+ case Hexagon::LDriw_GP_cNotPt_V4 :
+ case Hexagon::LDd_GP_cNotPt_V4 :
+ case Hexagon::LDb_GP_cNotPt_V4 :
+ case Hexagon::LDub_GP_cNotPt_V4 :
+ case Hexagon::LDh_GP_cNotPt_V4 :
+ case Hexagon::LDuh_GP_cNotPt_V4 :
+ case Hexagon::LDw_GP_cNotPt_V4 :
+ case Hexagon::STrid_GP_cNotPt_V4 :
+ case Hexagon::STrib_GP_cNotPt_V4 :
+ case Hexagon::STrih_GP_cNotPt_V4 :
+ case Hexagon::STriw_GP_cNotPt_V4 :
+ case Hexagon::STd_GP_cNotPt_V4 :
+ case Hexagon::STb_GP_cNotPt_V4 :
+ case Hexagon::STh_GP_cNotPt_V4 :
+ case Hexagon::STw_GP_cNotPt_V4 :
+ case Hexagon::LDrid_GP_cdnNotPt_V4 :
+ case Hexagon::LDrib_GP_cdnNotPt_V4 :
+ case Hexagon::LDriub_GP_cdnNotPt_V4 :
+ case Hexagon::LDrih_GP_cdnNotPt_V4 :
+ case Hexagon::LDriuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDriw_GP_cdnNotPt_V4 :
+ case Hexagon::LDd_GP_cdnNotPt_V4 :
+ case Hexagon::LDb_GP_cdnNotPt_V4 :
+ case Hexagon::LDub_GP_cdnNotPt_V4 :
+ case Hexagon::LDh_GP_cdnNotPt_V4 :
+ case Hexagon::LDuh_GP_cdnNotPt_V4 :
+ case Hexagon::LDw_GP_cdnNotPt_V4 :
+ case Hexagon::STrid_GP_cdnNotPt_V4 :
+ case Hexagon::STrib_GP_cdnNotPt_V4 :
+ case Hexagon::STrih_GP_cdnNotPt_V4 :
+ case Hexagon::STriw_GP_cdnNotPt_V4 :
+ case Hexagon::STd_GP_cdnNotPt_V4 :
+ case Hexagon::STb_GP_cdnNotPt_V4 :
+ case Hexagon::STh_GP_cdnNotPt_V4 :
+ case Hexagon::STw_GP_cdnNotPt_V4 :
+ return false;
+ }
+ // return *some value* to avoid compiler warning
+ return false;
+}
+
+bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
+ if (isNewValueInst(MI))
+ return true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+
+// Coditional add
+ case Hexagon::ADD_ri_cdnPt:
+ case Hexagon::ADD_ri_cdnNotPt:
+ case Hexagon::ADD_rr_cdnPt:
+ case Hexagon::ADD_rr_cdnNotPt:
+
+ // Conditional logical operations
+ case Hexagon::XOR_rr_cdnPt :
+ case Hexagon::XOR_rr_cdnNotPt :
+ case Hexagon::AND_rr_cdnPt :
+ case Hexagon::AND_rr_cdnNotPt :
+ case Hexagon::OR_rr_cdnPt :
+ case Hexagon::OR_rr_cdnNotPt :
+
+ // Conditonal subtract
+ case Hexagon::SUB_rr_cdnPt :
+ case Hexagon::SUB_rr_cdnNotPt :
+
+ // Conditional combine
+ case Hexagon::COMBINE_rr_cdnPt :
+ case Hexagon::COMBINE_rr_cdnNotPt :
+
+ // Conditional shift operations
+ case Hexagon::ASLH_cdnPt_V4:
+ case Hexagon::ASLH_cdnNotPt_V4:
+ case Hexagon::ASRH_cdnPt_V4:
+ case Hexagon::ASRH_cdnNotPt_V4:
+ case Hexagon::SXTB_cdnPt_V4:
+ case Hexagon::SXTB_cdnNotPt_V4:
+ case Hexagon::SXTH_cdnPt_V4:
+ case Hexagon::SXTH_cdnNotPt_V4:
+ case Hexagon::ZXTB_cdnPt_V4:
+ case Hexagon::ZXTB_cdnNotPt_V4:
+ case Hexagon::ZXTH_cdnPt_V4:
+ case Hexagon::ZXTH_cdnNotPt_V4:
+
+ // Conditional stores
+ case Hexagon::STrib_imm_cdnPt_V4 :
+ case Hexagon::STrib_imm_cdnNotPt_V4 :
+ case Hexagon::STrib_cdnPt_V4 :
+ case Hexagon::STrib_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_cdnPt_V4 :
+ case Hexagon::STrib_indexed_cdnNotPt_V4 :
+ case Hexagon::POST_STbri_cdnPt_V4 :
+ case Hexagon::POST_STbri_cdnNotPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+
+ // Store doubleword conditionally
+ case Hexagon::STrid_indexed_cdnPt_V4 :
+ case Hexagon::STrid_indexed_cdnNotPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STdri_cdnPt_V4 :
+ case Hexagon::POST_STdri_cdnNotPt_V4 :
+
+ // Store halfword conditionally
+ case Hexagon::STrih_cdnPt_V4 :
+ case Hexagon::STrih_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_cdnPt_V4 :
+ case Hexagon::STrih_indexed_cdnNotPt_V4 :
+ case Hexagon::STrih_imm_cdnPt_V4 :
+ case Hexagon::STrih_imm_cdnNotPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_SThri_cdnPt_V4 :
+ case Hexagon::POST_SThri_cdnNotPt_V4 :
+
+ // Store word conditionally
+ case Hexagon::STriw_cdnPt_V4 :
+ case Hexagon::STriw_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_cdnPt_V4 :
+ case Hexagon::STriw_indexed_cdnNotPt_V4 :
+ case Hexagon::STriw_imm_cdnPt_V4 :
+ case Hexagon::STriw_imm_cdnNotPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::POST_STwri_cdnPt_V4 :
+ case Hexagon::POST_STwri_cdnNotPt_V4 :
+
+ case Hexagon::LDd_GP_cdnPt_V4:
+ case Hexagon::LDd_GP_cdnNotPt_V4:
+ case Hexagon::LDb_GP_cdnPt_V4:
+ case Hexagon::LDb_GP_cdnNotPt_V4:
+ case Hexagon::LDub_GP_cdnPt_V4:
+ case Hexagon::LDub_GP_cdnNotPt_V4:
+ case Hexagon::LDh_GP_cdnPt_V4:
+ case Hexagon::LDh_GP_cdnNotPt_V4:
+ case Hexagon::LDuh_GP_cdnPt_V4:
+ case Hexagon::LDuh_GP_cdnNotPt_V4:
+ case Hexagon::LDw_GP_cdnPt_V4:
+ case Hexagon::LDw_GP_cdnNotPt_V4:
+ case Hexagon::LDrid_GP_cdnPt_V4:
+ case Hexagon::LDrid_GP_cdnNotPt_V4:
+ case Hexagon::LDrib_GP_cdnPt_V4:
+ case Hexagon::LDrib_GP_cdnNotPt_V4:
+ case Hexagon::LDriub_GP_cdnPt_V4:
+ case Hexagon::LDriub_GP_cdnNotPt_V4:
+ case Hexagon::LDrih_GP_cdnPt_V4:
+ case Hexagon::LDrih_GP_cdnNotPt_V4:
+ case Hexagon::LDriuh_GP_cdnPt_V4:
+ case Hexagon::LDriuh_GP_cdnNotPt_V4:
+ case Hexagon::LDriw_GP_cdnPt_V4:
+ case Hexagon::LDriw_GP_cdnNotPt_V4:
+
+ case Hexagon::STrid_GP_cdnPt_V4:
+ case Hexagon::STrid_GP_cdnNotPt_V4:
+ case Hexagon::STrib_GP_cdnPt_V4:
+ case Hexagon::STrib_GP_cdnNotPt_V4:
+ case Hexagon::STrih_GP_cdnPt_V4:
+ case Hexagon::STrih_GP_cdnNotPt_V4:
+ case Hexagon::STriw_GP_cdnPt_V4:
+ case Hexagon::STriw_GP_cdnNotPt_V4:
+ case Hexagon::STd_GP_cdnPt_V4:
+ case Hexagon::STd_GP_cdnNotPt_V4:
+ case Hexagon::STb_GP_cdnPt_V4:
+ case Hexagon::STb_GP_cdnNotPt_V4:
+ case Hexagon::STh_GP_cdnPt_V4:
+ case Hexagon::STh_GP_cdnNotPt_V4:
+ case Hexagon::STw_GP_cdnPt_V4:
+ case Hexagon::STw_GP_cdnNotPt_V4:
+ return true;
+ }
+ return false;
+}
+
+static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
+ const HexagonInstrInfo *QII) {
+ assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+#ifndef NDEBUG
+ // Post Increment means duplicates. Use dense map to find duplicates in the
+ // list. Caution: Densemap initializes with the minimum of 64 buckets,
+ // whereas there are at most 5 operands in the post increment.
+ DenseMap<unsigned, unsigned> DefRegsSet;
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).isDef()) {
+ DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
+ }
+
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).isUse()) {
+ if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
+ return MI->getOperand(opNum);
+ }
+ }
+#else
+ if (MI->getDesc().mayLoad()) {
+ // The 2nd operand is always the post increment operand in load.
+ assert(MI->getOperand(1).isReg() &&
+ "Post increment operand has be to a register.");
+ return (MI->getOperand(1));
+ }
+ if (MI->getDesc().mayStore()) {
+ // The 1st operand is always the post increment operand in store.
+ assert(MI->getOperand(0).isReg() &&
+ "Post increment operand has be to a register.");
+ return (MI->getOperand(0));
+ }
+#endif
+ // we should never come here.
+ llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
+}
+
+// get the value being stored
+static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+ // value being stored is always the last operand.
+ return (MI->getOperand(MI->getNumOperands()-1));
+}
+
+// can be new value store?
+// Following restrictions are to be respected in convert a store into
+// a new value store.
+// 1. If an instruction uses auto-increment, its address register cannot
+// be a new-value register. Arch Spec 5.4.2.1
+// 2. If an instruction uses absolute-set addressing mode,
+// its address register cannot be a new-value register.
+// Arch Spec 5.4.2.1.TODO: This is not enabled as
+// as absolute-set address mode patters are not implemented.
+// 3. If an instruction produces a 64-bit result, its registers cannot be used
+// as new-value registers. Arch Spec 5.4.2.2.
+// 4. If the instruction that sets a new-value register is conditional, then
+// the instruction that uses the new-value register must also be conditional,
+// and both must always have their predicates evaluate identically.
+// Arch Spec 5.4.2.3.
+// 5. There is an implied restriction of a packet can not have another store,
+// if there is a new value store in the packet. Corollary, if there is
+// already a store in a packet, there can not be a new value store.
+// Arch Spec: 3.4.4.2
+bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
+ MachineInstr *PacketMI, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit)
+{
+ // Make sure we are looking at the store
+ if (!IsNewifyStore(MI))
+ return false;
+
+ // Make sure there is dependency and can be new value'ed
+ if (GetStoreValueOperand(MI).isReg() &&
+ GetStoreValueOperand(MI).getReg() != DepReg)
+ return false;
+
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const MCInstrDesc& MCID = PacketMI->getDesc();
+ // first operand is always the result
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF);
+
+ // if there is already an store in the packet, no can do new value store
+ // Arch Spec 3.4.4.2.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE); ++VI) {
+ SUnit* PacketSU = MIToSUnit[*VI];
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
+ // then we don't need this
+ PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME)
+ return false;
+ }
+
+ if (PacketRC == &Hexagon::DoubleRegsRegClass) {
+ // new value store constraint: double regs can not feed into new value store
+ // arch spec section: 5.4.2.2
+ return false;
+ }
+
+ // Make sure it's NOT the post increment register that we are going to
+ // new value.
+ if (QII->isPostIncrement(MI) &&
+ MI->getDesc().mayStore() &&
+ GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+ return false;
+ }
+
+ if (QII->isPostIncrement(PacketMI) &&
+ PacketMI->getDesc().mayLoad() &&
+ GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
+ // if source is post_inc, or absolute-set addressing,
+ // it can not feed into new value store
+ // r3 = memw(r2++#4)
+ // memw(r30 + #-1404) = r2.new -> can not be new value store
+ // arch spec section: 5.4.2.1
+ return false;
+ }
+
+ // If the source that feeds the store is predicated, new value store must
+ // also be also predicated.
+ if (QII->isPredicated(PacketMI)) {
+ if (!QII->isPredicated(MI))
+ return false;
+
+ // Check to make sure that they both will have their predicates
+ // evaluate identically
+ unsigned predRegNumSrc = 0;
+ unsigned predRegNumDst = 0;
+ const TargetRegisterClass* predRegClass = NULL;
+
+ // Get predicate register used in the source instruction
+ for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+ if ( PacketMI->getOperand(opNum).isReg())
+ predRegNumSrc = PacketMI->getOperand(opNum).getReg();
+ predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
+ if (predRegClass == &Hexagon::PredRegsRegClass) {
+ break;
+ }
+ }
+ assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
+ ("predicate register not found in a predicated PacketMI instruction"));
+
+ // Get predicate register used in new-value store instruction
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ if ( MI->getOperand(opNum).isReg())
+ predRegNumDst = MI->getOperand(opNum).getReg();
+ predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
+ if (predRegClass == &Hexagon::PredRegsRegClass) {
+ break;
+ }
+ }
+ assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
+ ("predicate register not found in a predicated MI instruction"));
+
+ // New-value register producer and user (store) need to satisfy these
+ // constraints:
+ // 1) Both instructions should be predicated on the same register.
+ // 2) If producer of the new-value register is .new predicated then store
+ // should also be .new predicated and if producer is not .new predicated
+ // then store should not be .new predicated.
+ // 3) Both new-value register producer and user should have same predicate
+ // sense, i.e, either both should be negated or both should be none negated.
+
+ if (( predRegNumDst != predRegNumSrc) ||
+ isDotNewInst(PacketMI) != isDotNewInst(MI) ||
+ GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
+ return false;
+ }
+ }
+
+ // Make sure that other than the new-value register no other store instruction
+ // register has been modified in the same packet. Predicate registers can be
+ // modified by they should not be modified between the producer and the store
+ // instruction as it will make them both conditional on different values.
+ // We already know this to be true for all the instructions before and
+ // including PacketMI. Howerver, we need to perform the check for the
+ // remaining instructions in the packet.
+
+ std::vector<MachineInstr*>::iterator VI;
+ std::vector<MachineInstr*>::iterator VE;
+ unsigned StartCheck = 0;
+
+ for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
+ (VI != VE); ++VI) {
+ SUnit* TempSU = MIToSUnit[*VI];
+ MachineInstr* TempMI = TempSU->getInstr();
+
+ // Following condition is true for all the instructions until PacketMI is
+ // reached (StartCheck is set to 0 before the for loop).
+ // StartCheck flag is 1 for all the instructions after PacketMI.
+ if (TempMI != PacketMI && !StartCheck) // start processing only after
+ continue; // encountering PacketMI
+
+ StartCheck = 1;
+ if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+ continue;
+
+ for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ if (MI->getOperand(opNum).isReg() &&
+ TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(),
+ QRI))
+ return false;
+ }
+ }
+
+ // Make sure that for non POST_INC stores:
+ // 1. The only use of reg is DepReg and no other registers.
+ // This handles V4 base+index registers.
+ // The following store can not be dot new.
+ // Eg. r0 = add(r0, #3)a
+ // memw(r1+r0<<#2) = r0
+ if (!QII->isPostIncrement(MI) &&
+ GetStoreValueOperand(MI).isReg() &&
+ GetStoreValueOperand(MI).getReg() == DepReg) {
+ for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+ if (MI->getOperand(opNum).isReg() &&
+ MI->getOperand(opNum).getReg() == DepReg) {
+ return false;
+ }
+ }
+ // 2. If data definition is because of implicit definition of the register,
+ // do not newify the store. Eg.
+ // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+ // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+ for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+ if (PacketMI->getOperand(opNum).isReg() &&
+ PacketMI->getOperand(opNum).getReg() == DepReg &&
+ PacketMI->getOperand(opNum).isDef() &&
+ PacketMI->getOperand(opNum).isImplicit()) {
+ return false;
+ }
+ }
+ }
+
+ // Can be dot new store.
+ return true;
+}
+
+// can this MI to promoted to either
+// new value store or new value jump
+bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
+ SUnit *PacketSU, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII)
+{
+
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ if (!QRI->Subtarget.hasV4TOps() ||
+ !IsNewifyStore(MI))
+ return false;
+
+ MachineInstr *PacketMI = PacketSU->getInstr();
+
+ // Check to see the store can be new value'ed.
+ if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+ return true;
+
+ // Check to see the compare/jump can be new value'ed.
+ // This is done as a pass on its own. Don't need to check it here.
+ return false;
+}
+
+// Check to see if an instruction can be dot new
+// There are three kinds.
+// 1. dot new on predicate - V2/V3/V4
+// 2. dot new on stores NV/ST - V4
+// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
+ SUnit *PacketSU, unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC )
+{
+ // already a dot new instruction
+ if (isDotNewInst(MI) && !IsNewifyStore(MI))
+ return false;
+
+ if (!isNewifiable(MI))
+ return false;
+
+ // predicate .new
+ if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI))
+ return true;
+ else if (RC != &Hexagon::PredRegsRegClass &&
+ !IsNewifyStore(MI)) // MI is not a new-value store
+ return false;
+ else {
+ // Create a dot new machine instruction to see if resources can be
+ // allocated. If not, bail out now.
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ int NewOpcode = GetDotNewOp(MI->getOpcode());
+ const MCInstrDesc &desc = QII->get(NewOpcode);
+ DebugLoc dl;
+ MachineInstr *NewMI =
+ MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
+ bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+ MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
+
+ if (!ResourcesAvailable)
+ return false;
+
+ // new value store only
+ // new new value jump generated as a passes
+ if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Go through the packet instructions and search for anti dependency
+// between them and DepReg from MI
+// Consider this case:
+// Trying to add
+// a) %R1<def> = TFRI_cdNotPt %P3, 2
+// to this packet:
+// {
+// b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
+// c) %P3<def> = TFR_PdRs %R23
+// d) %R1<def> = TFRI_cdnPt %P3, 4
+// }
+// The P3 from a) and d) will be complements after
+// a)'s P3 is converted to .new form
+// Anti Dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ SUnit* PacketSUDep = MIToSUnit[MI];
+
+ for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+ VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+ // We only care for dependencies to predicated instructions
+ if(!QII->isPredicated(*VIN)) continue;
+
+ // Scheduling Unit for current insn in the packet
+ SUnit* PacketSU = MIToSUnit[*VIN];
+
+ // Look at dependencies between current members of the packet
+ // and predicate defining instruction MI.
+ // Make sure that dependency is on the exact register
+ // we care about.
+ if (PacketSU->isSucc(PacketSUDep)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
+ (PacketSU->Succs[i].getKind() == SDep::Anti) &&
+ (PacketSU->Succs[i].getReg() == DepReg)) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+
+// Given two predicated instructions, this function detects whether
+// the predicates are complements
+bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
+ MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // Currently can only reason about conditional transfers
+ if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) {
+ return false;
+ }
+
+ // Scheduling unit for candidate
+ SUnit* SU = MIToSUnit[MI1];
+
+ // One corner case deals with the following scenario:
+ // Trying to add
+ // a) %R24<def> = TFR_cPt %P0, %R25
+ // to this packet:
+ //
+ // {
+ // b) %R25<def> = TFR_cNotPt %P0, %R24
+ // c) %P0<def> = CMPEQri %R26, 1
+ // }
+ //
+ // On general check a) and b) are complements, but
+ // presence of c) will convert a) to .new form, and
+ // then it is not a complement
+ // We attempt to detect it by analyzing existing
+ // dependencies in the packet
+
+ // Analyze relationships between all existing members of the packet.
+ // Look for Anti dependecy on the same predicate reg
+ // as used in the candidate
+ for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+ VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+ // Scheduling Unit for current insn in the packet
+ SUnit* PacketSU = MIToSUnit[*VIN];
+
+ // If this instruction in the packet is succeeded by the candidate...
+ if (PacketSU->isSucc(SU)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ // The corner case exist when there is true data
+ // dependency between candidate and one of current
+ // packet members, this dep is on predicate reg, and
+ // there already exist anti dep on the same pred in
+ // the packet.
+ if (PacketSU->Succs[i].getSUnit() == SU &&
+ Hexagon::PredRegsRegClass.contains(
+ PacketSU->Succs[i].getReg()) &&
+ PacketSU->Succs[i].getKind() == SDep::Data &&
+ // Here I know that *VIN is predicate setting instruction
+ // with true data dep to candidate on the register
+ // we care about - c) in the above example.
+ // Now I need to see if there is an anti dependency
+ // from c) to any other instruction in the
+ // same packet on the pred reg of interest
+ RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
+ MIToSUnit)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ // If the above case does not apply, check regular
+ // complement condition.
+ // Check that the predicate register is the same and
+ // that the predicate sense is different
+ // We also need to differentiate .old vs. .new:
+ // !p0 is not complimentary to p0.new
+ return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
+ (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
+ (isDotNewInst(MI1) == isDotNewInst(MI2)));
+}
+
+// initPacketizerState - Initialize packetizer flags
+void HexagonPacketizerList::initPacketizerState() {
+
+ Dependence = false;
+ PromotedToDotNew = false;
+ GlueToNewValueJump = false;
+ GlueAllocframeStore = false;
+ FoundSequentialDependence = false;
+
+ return;
+}
+
+// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (MI->isDebugValue())
+ return true;
+
+ // We must print out inline assembly
+ if (MI->isInlineAsm())
+ return false;
+
+ // We check if MI has any functional units mapped to it.
+ // If it doesn't, we ignore the instruction.
+ const MCInstrDesc& TID = MI->getDesc();
+ unsigned SchedClass = TID.getSchedClass();
+ const InstrStage* IS =
+ ResourceTracker->getInstrItins()->beginStage(SchedClass);
+ unsigned FuncUnits = IS->getUnits();
+ return !FuncUnits;
+}
+
+// isSoloInstruction: - Returns true for instructions that must be
+// scheduled in their own packet.
+bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+
+ if (MI->isInlineAsm())
+ return true;
+
+ if (MI->isEHLabel())
+ return true;
+
+ // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
+ // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
+ // They must not be grouped with other instructions in a packet.
+ if (IsSchedBarrier(MI))
+ return true;
+
+ return false;
+}
+
+// isLegalToPacketizeTogether:
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ const MCInstrDesc &MCIDI = I->getDesc();
+ const MCInstrDesc &MCIDJ = J->getDesc();
+
+ MachineBasicBlock::iterator II = I;
+
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+ const HexagonRegisterInfo* QRI =
+ (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ // Inline asm cannot go in the packet.
+ if (I->getOpcode() == Hexagon::INLINEASM)
+ llvm_unreachable("Should not meet inline asm here!");
+
+ if (isSoloInstruction(I))
+ llvm_unreachable("Should not meet solo instr here!");
+
+ // A save callee-save register function call can only be in a packet
+ // with instructions that don't write to the callee-save registers.
+ if ((QII->isSaveCalleeSavedRegsCall(I) &&
+ DoesModifyCalleeSavedReg(J, QRI)) ||
+ (QII->isSaveCalleeSavedRegsCall(J) &&
+ DoesModifyCalleeSavedReg(I, QRI))) {
+ Dependence = true;
+ return false;
+ }
+
+ // Two control flow instructions cannot go in the same packet.
+ if (IsControlFlow(I) && IsControlFlow(J)) {
+ Dependence = true;
+ return false;
+ }
+
+ // A LoopN instruction cannot appear in the same packet as a jump or call.
+ if (IsLoopN(I) && ( IsDirectJump(J)
+ || MCIDJ.isCall()
+ || QII->isDeallocRet(J))) {
+ Dependence = true;
+ return false;
+ }
+ if (IsLoopN(J) && ( IsDirectJump(I)
+ || MCIDI.isCall()
+ || QII->isDeallocRet(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ // dealloc_return cannot appear in the same packet as a conditional or
+ // unconditional jump.
+ if (QII->isDeallocRet(I) && ( MCIDJ.isBranch()
+ || MCIDJ.isCall()
+ || MCIDJ.isBarrier())) {
+ Dependence = true;
+ return false;
+ }
+
+
+ // V4 allows dual store. But does not allow second store, if the
+ // first store is not in SLOT0. New value store, new value jump,
+ // dealloc_return and memop always take SLOT0.
+ // Arch spec 3.4.4.2
+ if (QRI->Subtarget.hasV4TOps()) {
+
+ if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) {
+ Dependence = true;
+ return false;
+ }
+
+ if ( (QII->isMemOp(J) && MCIDI.mayStore())
+ || (MCIDJ.mayStore() && QII->isMemOp(I))
+ || (QII->isMemOp(J) && QII->isMemOp(I))) {
+ Dependence = true;
+ return false;
+ }
+
+ //if dealloc_return
+ if (MCIDJ.mayStore() && QII->isDeallocRet(I)){
+ Dependence = true;
+ return false;
+ }
+
+ // If an instruction feeds new value jump, glue it.
+ MachineBasicBlock::iterator NextMII = I;
+ ++NextMII;
+ MachineInstr *NextMI = NextMII;
+
+ if (QII->isNewValueJump(NextMI)) {
+
+ bool secondRegMatch = false;
+ bool maintainNewValueJump = false;
+
+ if (NextMI->getOperand(1).isReg() &&
+ I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+ secondRegMatch = true;
+ maintainNewValueJump = true;
+ }
+
+ if (!secondRegMatch &&
+ I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
+ maintainNewValueJump = true;
+ }
+
+ for (std::vector<MachineInstr*>::iterator
+ VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE && maintainNewValueJump); ++VI) {
+ SUnit* PacketSU = MIToSUnit[*VI];
+
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8
+ if (PacketSU->getInstr()->getDesc().isCall()) {
+ Dependence = true;
+ break;
+ }
+ // Validate
+ // 1. Packet does not have a store in it.
+ // 2. If the first operand of the nvj is newified, and the second
+ // operand is also a reg, it (second reg) is not defined in
+ // the same packet.
+ // 3. If the second operand of the nvj is newified, (which means
+ // first operand is also a reg), first reg is not defined in
+ // the same packet.
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+ // Check #2.
+ (!secondRegMatch && NextMI->getOperand(1).isReg() &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(1).getReg(), QRI)) ||
+ // Check #3.
+ (secondRegMatch &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(0).getReg(), QRI))) {
+ Dependence = true;
+ break;
+ }
+ }
+ if (!Dependence)
+ GlueToNewValueJump = true;
+ else
+ return false;
+ }
+ }
+
+ if (SUJ->isSucc(SUI)) {
+ for (unsigned i = 0;
+ (i < SUJ->Succs.size()) && !FoundSequentialDependence;
+ ++i) {
+
+ if (SUJ->Succs[i].getSUnit() != SUI) {
+ continue;
+ }
+
+ SDep::Kind DepType = SUJ->Succs[i].getKind();
+
+ // For direct calls:
+ // Ignore register dependences for call instructions for
+ // packetization purposes except for those due to r31 and
+ // predicate registers.
+ //
+ // For indirect calls:
+ // Same as direct calls + check for true dependences to the register
+ // used in the indirect call.
+ //
+ // We completely ignore Order dependences for call instructions
+ //
+ // For returns:
+ // Ignore register dependences for return instructions like jumpr,
+ // dealloc return unless we have dependencies on the explicit uses
+ // of the registers used by jumpr (like r31) or dealloc return
+ // (like r29 or r30).
+ //
+ // TODO: Currently, jumpr is handling only return of r31. So, the
+ // following logic (specificaly IsCallDependent) is working fine.
+ // We need to enable jumpr for register other than r31 and then,
+ // we need to rework the last part, where it handles indirect call
+ // of that (IsCallDependent) function. Bug 6216 is opened for this.
+ //
+ unsigned DepReg = 0;
+ const TargetRegisterClass* RC = NULL;
+ if (DepType == SDep::Data) {
+ DepReg = SUJ->Succs[i].getReg();
+ RC = QRI->getMinimalPhysRegClass(DepReg);
+ }
+ if ((MCIDI.isCall() || MCIDI.isReturn()) &&
+ (!IsRegDependence(DepType) ||
+ !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
+ /* do nothing */
+ }
+
+ // For instructions that can be promoted to dot-new, try to promote.
+ else if ((DepType == SDep::Data) &&
+ CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
+ PromoteToDotNew(I, DepType, II, RC)) {
+ PromotedToDotNew = true;
+ /* do nothing */
+ }
+
+ else if ((DepType == SDep::Data) &&
+ (QII->isNewValueJump(I))) {
+ /* do nothing */
+ }
+
+ // For predicated instructions, if the predicates are complements
+ // then there can be no dependence.
+ else if (QII->isPredicated(I) &&
+ QII->isPredicated(J) &&
+ ArePredicatesComplements(I, J, MIToSUnit)) {
+ /* do nothing */
+
+ }
+ else if (IsDirectJump(I) &&
+ !MCIDJ.isBranch() &&
+ !MCIDJ.isCall() &&
+ (DepType == SDep::Order)) {
+ // Ignore Order dependences between unconditional direct branches
+ // and non-control-flow instructions
+ /* do nothing */
+ }
+ else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
+ (DepType != SDep::Output)) {
+ // Ignore all dependences for jumps except for true and output
+ // dependences
+ /* do nothing */
+ }
+
+ // Ignore output dependences due to superregs. We can
+ // write to two different subregisters of R1:0 for instance
+ // in the same cycle
+ //
+
+ //
+ // Let the
+ // If neither I nor J defines DepReg, then this is a
+ // superfluous output dependence. The dependence must be of the
+ // form:
+ // R0 = ...
+ // R1 = ...
+ // and there is an output dependence between the two instructions
+ // with
+ // DepReg = D0
+ // We want to ignore these dependences.
+ // Ideally, the dependence constructor should annotate such
+ // dependences. We can then avoid this relatively expensive check.
+ //
+ else if (DepType == SDep::Output) {
+ // DepReg is the register that's responsible for the dependence.
+ unsigned DepReg = SUJ->Succs[i].getReg();
+
+ // Check if I and J really defines DepReg.
+ if (I->definesRegister(DepReg) ||
+ J->definesRegister(DepReg)) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ // We ignore Order dependences for
+ // 1. Two loads unless they are volatile.
+ // 2. Two stores in V4 unless they are volatile.
+ else if ((DepType == SDep::Order) &&
+ !I->hasVolatileMemoryRef() &&
+ !J->hasVolatileMemoryRef()) {
+ if (QRI->Subtarget.hasV4TOps() &&
+ // hexagonv4 allows dual store.
+ MCIDI.mayStore() && MCIDJ.mayStore()) {
+ /* do nothing */
+ }
+ // store followed by store-- not OK on V2
+ // store followed by load -- not OK on all (OK if addresses
+ // are not aliased)
+ // load followed by store -- OK on all
+ // load followed by load -- OK on all
+ else if ( !MCIDJ.mayStore()) {
+ /* do nothing */
+ }
+ else {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ // For V4, special case ALLOCFRAME. Even though there is dependency
+ // between ALLOCAFRAME and subsequent store, allow it to be
+ // packetized in a same packet. This implies that the store is using
+ // caller's SP. Hense, offset needs to be updated accordingly.
+ else if (DepType == SDep::Data
+ && QRI->Subtarget.hasV4TOps()
+ && J->getOpcode() == Hexagon::ALLOCFRAME
+ && (I->getOpcode() == Hexagon::STrid
+ || I->getOpcode() == Hexagon::STriw
+ || I->getOpcode() == Hexagon::STrib)
+ && I->getOperand(0).getReg() == QRI->getStackRegister()
+ && QII->isValidOffset(I->getOpcode(),
+ I->getOperand(1).getImm() -
+ (FrameSize + HEXAGON_LRFP_SIZE)))
+ {
+ GlueAllocframeStore = true;
+ // Since this store is to be glued with allocframe in the same
+ // packet, it will use SP of the previous stack frame, i.e
+ // caller's SP. Therefore, we need to recalculate offset according
+ // to this change.
+ I->getOperand(1).setImm(I->getOperand(1).getImm() -
+ (FrameSize + HEXAGON_LRFP_SIZE));
+ }
+
+ //
+ // Skip over anti-dependences. Two instructions that are
+ // anti-dependent can share a packet
+ //
+ else if (DepType != SDep::Anti) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ if (FoundSequentialDependence) {
+ Dependence = true;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// isLegalToPruneDependencies
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
+
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+ if (Dependence) {
+
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old.
+ if (PromotedToDotNew) {
+ DemoteToDotOld(I);
+ }
+
+ // Check if the instruction (must be a store) was glued with an Allocframe
+ // instruction. If so, restore its offset to its original value, i.e. use
+ // curent SP instead of caller's SP.
+ if (GlueAllocframeStore) {
+ I->getOperand(1).setImm(I->getOperand(1).getImm() +
+ FrameSize + HEXAGON_LRFP_SIZE);
+ }
+
+ return false;
+ }
+ return true;
+}
+
+MachineBasicBlock::iterator
+HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ if (GlueToNewValueJump) {
+
+ ++MII;
+ MachineInstr *nvjMI = MII;
+ assert(ResourceTracker->canReserveResources(MI));
+ ResourceTracker->reserveResources(MI);
+ if (QII->isExtended(MI) &&
+ !tryAllocateResourcesForConstExt(MI)) {
+ endPacket(MBB, MI);
+ ResourceTracker->reserveResources(MI);
+ assert(canReserveResourcesForConstExt(MI) &&
+ "Ensure that there is a slot");
+ reserveResourcesForConstExt(MI);
+ // Reserve resources for new value jump constant extender.
+ assert(canReserveResourcesForConstExt(MI) &&
+ "Ensure that there is a slot");
+ reserveResourcesForConstExt(nvjMI);
+ assert(ResourceTracker->canReserveResources(nvjMI) &&
+ "Ensure that there is a slot");
+
+ } else if ( // Extended instruction takes two slots in the packet.
+ // Try reserve and allocate 4-byte in the current packet first.
+ (QII->isExtended(nvjMI)
+ && (!tryAllocateResourcesForConstExt(nvjMI)
+ || !ResourceTracker->canReserveResources(nvjMI)))
+ || // For non-extended instruction, no need to allocate extra 4 bytes.
+ (!QII->isExtended(nvjMI) &&
+ !ResourceTracker->canReserveResources(nvjMI)))
+ {
+ endPacket(MBB, MI);
+ // A new and empty packet starts.
+ // We are sure that the resources requirements can be satisfied.
+ // Therefore, do not need to call "canReserveResources" anymore.
+ ResourceTracker->reserveResources(MI);
+ if (QII->isExtended(nvjMI))
+ reserveResourcesForConstExt(nvjMI);
+ }
+ // Here, we are sure that "reserveResources" would succeed.
+ ResourceTracker->reserveResources(nvjMI);
+ CurrentPacketMIs.push_back(MI);
+ CurrentPacketMIs.push_back(nvjMI);
+ } else {
+ if ( QII->isExtended(MI)
+ && ( !tryAllocateResourcesForConstExt(MI)
+ || !ResourceTracker->canReserveResources(MI)))
+ {
+ endPacket(MBB, MI);
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old
+ if (PromotedToDotNew) {
+ DemoteToDotOld(MI);
+ }
+ reserveResourcesForConstExt(MI);
+ }
+ // In case that "MI" is not an extended insn,
+ // the resource availability has already been checked.
+ ResourceTracker->reserveResources(MI);
+ CurrentPacketMIs.push_back(MI);
+ }
+ return MII;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonPacketizer() {
+ return new HexagonPacketizer();
+}
+
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 47384cd533fa..035afe88d5bc 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -15,6 +15,7 @@
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonInstPrinter.h"
+#include "HexagonMCInst.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -37,20 +38,50 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
+ printInst((const HexagonMCInst*)(MI), O, Annot);
+}
+
+void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
+ StringRef Annot) {
const char packetPadding[] = " ";
const char startPacket = '{',
endPacket = '}';
// TODO: add outer HW loop when it's supported too.
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- MCInst Nop;
+ // Ending a harware loop is different from ending an regular packet.
+ assert(MI->isEndPacket() && "Loop end must also end the packet");
+
+ if (MI->isStartPacket()) {
+ // There must be a packet to end a loop.
+ // FIXME: when shuffling is always run, this shouldn't be needed.
+ HexagonMCInst Nop;
+ StringRef NoAnnot;
+
+ Nop.setOpcode (Hexagon::NOP);
+ Nop.setStartPacket (MI->isStartPacket());
+ printInst (&Nop, O, NoAnnot);
+ }
+
+ // Close the packet.
+ if (MI->isEndPacket())
+ O << packetPadding << endPacket;
- O << packetPadding << startPacket << '\n';
- Nop.setOpcode(Hexagon::NOP);
- printInstruction(&Nop, O);
- O << packetPadding << endPacket;
+ printInstruction(MI, O);
+ }
+ else {
+ // Prefix the insn opening the packet.
+ if (MI->isStartPacket())
+ O << packetPadding << startPacket << '\n';
+
+ printInstruction(MI, O);
+
+ // Suffix the insn closing the packet.
+ if (MI->isEndPacket())
+ // Suffix the packet in a new line always, since the GNU assembler has
+ // issues with a closing brace on the same line as CONST{32,64}.
+ O << '\n' << packetPadding << endPacket;
}
- printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -65,22 +96,22 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else if(MO.isImm()) {
printImmOperand(MI, OpNo, O);
} else {
- assert(false && "Unknown operand");
+ llvm_unreachable("Unknown operand");
}
}
-void HexagonInstPrinter::printImmOperand
- (const MCInst *MI, unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
O << MI->getOperand(OpNo).getImm();
}
void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
+ raw_ostream &O) const {
O << MI->getOperand(OpNo).getImm();
}
-void HexagonInstPrinter::printUnsignedImmOperand
- (const MCInst *MI, unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
+ unsigned OpNo, raw_ostream &O) const {
O << MI->getOperand(OpNo).getImm();
}
@@ -89,13 +120,13 @@ void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
O << -MI->getOperand(OpNo).getImm();
}
-void HexagonInstPrinter::printNOneImmOperand
- (const MCInst *MI, unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
O << -1;
}
-void HexagonInstPrinter::printMEMriOperand
- (const MCInst *MI, unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
const MCOperand& MO0 = MI->getOperand(OpNo);
const MCOperand& MO1 = MI->getOperand(OpNo + 1);
@@ -103,8 +134,8 @@ void HexagonInstPrinter::printMEMriOperand
O << " + #" << MO1.getImm();
}
-void HexagonInstPrinter::printFrameIndexOperand
- (const MCInst *MI, unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
const MCOperand& MO0 = MI->getOperand(OpNo);
const MCOperand& MO1 = MI->getOperand(OpNo + 1);
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
index dad4334c3eb4..902a32352f1c 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -14,6 +14,7 @@
#ifndef HEXAGONINSTPRINTER_H
#define HEXAGONINSTPRINTER_H
+#include "HexagonMCInst.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -25,6 +26,7 @@ namespace llvm {
: MCInstPrinter(MAI, MII, MRI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
void printInstruction(const MCInst *MI, raw_ostream &O);
StringRef getRegName(unsigned RegNo) const;
@@ -33,16 +35,16 @@ namespace llvm {
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
+ void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const;
void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
const;
void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
const;
void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
const;
- void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
+ void printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const;
void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
const;
void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
@@ -55,7 +57,8 @@ namespace llvm {
const;
void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printConstantPool(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const;
void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
{ printSymbol(MI, OpNo, O, true); }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index ed55c3c1c15c..7221e906342e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -23,14 +23,41 @@ namespace llvm {
/// instruction info tracks.
///
namespace HexagonII {
-
// *** The code below must match HexagonInstrFormat*.td *** //
+ // Insn types.
+ // *** Must match HexagonInstrFormat*.td ***
+ enum Type {
+ TypePSEUDO = 0,
+ TypeALU32 = 1,
+ TypeCR = 2,
+ TypeJR = 3,
+ TypeJ = 4,
+ TypeLD = 5,
+ TypeST = 6,
+ TypeSYSTEM = 7,
+ TypeXTYPE = 8,
+ TypeMEMOP = 9,
+ TypeNV = 10,
+ TypePREFIX = 30, // Such as extenders.
+ TypeMARKER = 31 // Such as end of a HW loop.
+ };
+
+
+
// MCInstrDesc TSFlags
+ // *** Must match HexagonInstrFormat*.td ***
enum {
+ // This 5-bit field describes the insn type.
+ TypePos = 0,
+ TypeMask = 0x1f,
+
+ // Solo instructions.
+ SoloPos = 5,
+ SoloMask = 0x1,
// Predicated instructions.
- PredicatedPos = 1,
+ PredicatedPos = 6,
PredicatedMask = 0x1
};
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 8ec5673470fc..8995080974cc 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index bf1deef491c9..6c3e8b644703 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -30,6 +30,8 @@ add_llvm_target(MBlazeCodeGen
MBlazeELFWriterInfo.cpp
)
+add_dependencies(LLVMMBlazeCodeGen intrinsics_gen)
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index b4edff0709e6..c2888553c5e3 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -50,7 +50,7 @@ def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
// MBlaze processors supported.
//===----------------------------------------------------------------------===//
-def : Processor<"mblaze", MBlazeGenericItineraries, []>;
+def : Processor<"mblaze", NoItineraries, []>;
def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 55fffe3ebfa7..e9f340f2f6d2 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -135,7 +135,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask() {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
unsigned RegNum = getMBlazeRegisterNumbering(Reg);
- if (MBlaze::GPRRegisterClass->contains(Reg))
+ if (MBlaze::GPRRegClass.contains(Reg))
CPUBitmask |= (1 << RegNum);
}
@@ -187,7 +187,7 @@ void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
//===----------------------------------------------------------------------===//
void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+ MBlazeMCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
@@ -200,7 +200,13 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ }
printOperand(MI, OpNo, O);
return false;
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index edfc3355691f..310c25e839c3 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -62,9 +62,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// Set up the register classes
- addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
+ addRegisterClass(MVT::i32, &MBlaze::GPRRegClass);
if (Subtarget->hasFPU()) {
- addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
+ addRegisterClass(MVT::f32, &MBlaze::GPRRegClass);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
}
@@ -291,12 +291,12 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
loop->addSuccessor(finish);
loop->addSuccessor(loop);
- unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned IAMT = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
.addReg(MI->getOperand(2).getReg())
.addImm(31);
- unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned IVAL = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
.addReg(MI->getOperand(1).getReg())
.addImm(0);
@@ -305,14 +305,14 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
.addReg(IAMT)
.addMBB(finish);
- unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
- unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned DST = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ unsigned NDST = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
.addReg(IVAL).addMBB(MBB)
.addReg(NDST).addMBB(loop);
- unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
- unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned SAMT = R.createVirtualRegister(&MBlaze::GPRRegClass);
+ unsigned NAMT = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
.addReg(IAMT).addMBB(MBB)
.addReg(NAMT).addMBB(loop);
@@ -500,7 +500,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
case MBlaze::LAN32: opcode = MBlaze::AND; break;
}
- finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ finalReg = R.createVirtualRegister(&MBlaze::GPRRegClass);
start->addSuccessor(exit);
start->addSuccessor(start);
@@ -510,7 +510,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
if (MI->getOpcode() == MBlaze::LAN32) {
unsigned tmp = finalReg;
- finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ finalReg = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
.addReg(tmp)
.addImm(-1);
@@ -528,7 +528,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
final->addSuccessor(exit);
final->addSuccessor(start);
- unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned CMP = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg());
@@ -543,7 +543,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
}
}
- unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned CHK = R.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(final, dl, TII->get(MBlaze::SWX))
.addReg(finalReg)
.addReg(MI->getOperand(1).getReg())
@@ -681,13 +681,19 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
/// TODO: isVarArg, isTailCall.
SDValue MBlazeTargetLowering::
-LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
// MBlaze does not yet support tail call optimization
isTailCall = false;
@@ -702,7 +708,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
// Get a count of how many bytes are to be pushed on the stack.
@@ -841,7 +847,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
@@ -884,7 +890,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
SDValue StackPtr;
@@ -899,9 +905,9 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
- RC = MBlaze::GPRRegisterClass;
+ RC = &MBlaze::GPRRegClass;
else if (RegVT == MVT::f32)
- RC = MBlaze::GPRRegisterClass;
+ RC = &MBlaze::GPRRegClass;
else
llvm_unreachable("RegVT not supported by LowerFormalArguments");
@@ -964,7 +970,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
StackPtr = DAG.getRegister(StackReg, getPointerTy());
// The last register argument that must be saved is MBlaze::R10
- const TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+ const TargetRegisterClass *RC = &MBlaze::GPRRegClass;
unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
@@ -1016,7 +1022,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
@@ -1124,14 +1130,14 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- return std::make_pair(0U, MBlaze::GPRRegisterClass);
+ return std::make_pair(0U, &MBlaze::GPRRegClass);
// TODO: These can't possibly be right, but match what was in
// getRegClassForInlineAsmConstraint.
case 'd':
case 'y':
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, MBlaze::GPRRegisterClass);
+ return std::make_pair(0U, &MBlaze::GPRRegClass);
}
}
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 6a79fc126702..a01fab567c8a 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -132,13 +132,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index db71434443bf..b5025fc8ee6c 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -287,7 +287,7 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
+ GlobalBaseReg = RegInfo.createVirtualRegister(&MBlaze::GPRRegClass);
BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
GlobalBaseReg).addReg(MBlaze::R20);
RegInfo.addLiveIn(MBlaze::R20);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 02a21574f493..139bf7156a69 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -295,7 +295,7 @@ class BranchI<bits<6> op, bits<5> br, string instr_asm> :
// Branch and Link Instructions
//===----------------------------------------------------------------------===//
class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
- TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+ TA<op, flags, (outs), (ins GPR:$link, GPR:$target),
!strconcat(instr_asm, " $link, $target"),
[], IIC_BRl> {
let ra = br;
@@ -303,7 +303,7 @@ class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
}
class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
- TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+ TB<op, (outs), (ins GPR:$link, calltarget:$target),
!strconcat(instr_asm, " $link, $target"),
[], IIC_BRl> {
let ra = br;
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
index 7b97744ea933..8ab2c9a3a633 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.h
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -21,18 +21,16 @@ namespace llvm {
class MachineInstr;
class MachineModuleInfoMachO;
class MachineOperand;
- class Mangler;
/// MBlazeMCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
MCContext &Ctx;
- Mangler &Mang;
AsmPrinter &Printer;
public:
- MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
- : Ctx(ctx), Mang(mang), Printer(printer) {}
+ MBlazeMCInstLower(MCContext &ctx, AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer) {}
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 4a3ae5fc1470..cd5691ce644f 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -40,11 +40,6 @@ def IIC_WDC : InstrItinClass;
def IIC_Pseudo : InstrItinClass;
//===----------------------------------------------------------------------===//
-// MBlaze generic instruction itineraries.
-//===----------------------------------------------------------------------===//
-def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
-
-//===----------------------------------------------------------------------===//
// MBlaze instruction itineraries for three stage pipeline.
//===----------------------------------------------------------------------===//
include "MBlazeSchedule3.td"
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index d12d14245ea3..dc2ad29be2a2 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -43,13 +43,6 @@ MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
-
- // Compute the issue width of the MBlaze itineraries
- computeIssueWidth();
-}
-
-void MBlazeSubtarget::computeIssueWidth() {
- InstrItins.IssueWidth = 1;
}
bool MBlazeSubtarget::
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 62393d0920bb..5f82f142032a 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -68,7 +68,7 @@ TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) {
// Install an instruction selector pass using
// the ISelDag to gen MBlaze code.
bool MBlazePassConfig::addInstSelector() {
- PM->add(createMBlazeISelDag(getMBlazeTargetMachine()));
+ addPass(createMBlazeISelDag(getMBlazeTargetMachine()));
return false;
}
@@ -76,6 +76,6 @@ bool MBlazePassConfig::addInstSelector() {
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MBlazePassConfig::addPreEmitPass() {
- PM->add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
+ addPass(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
return true;
}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index c9b16368ecc5..bfd11a070ba5 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -98,6 +98,7 @@ public:
MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
index ae82c32a5f26..7cc96c62c830 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -22,6 +22,7 @@ class MCContext;
class MCCodeEmitter;
class MCInstrInfo;
class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
@@ -30,6 +31,7 @@ class raw_ostream;
extern Target TheMBlazeTarget;
MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index a8f9b52746ad..f9ecaed83a6f 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -23,6 +23,8 @@ add_llvm_target(MSP430CodeGen
MSP430MCInstLower.cpp
)
+add_dependencies(LLVMMSP430CodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 1d1094bc339d..86bc183c1bdf 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -154,7 +154,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
//===----------------------------------------------------------------------===//
void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
+ MSP430MCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 071a2f7de2c8..f8b7e149f0db 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -59,13 +59,13 @@ HWMultMode("msp430-hwmult-mode",
MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
TargetLowering(tm, new TargetLoweringObjectFileELF()),
- Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+ Subtarget(*tm.getSubtargetImpl()) {
TD = getTargetData();
// Set up the register classes.
- addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
- addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+ addRegisterClass(MVT::i8, &MSP430::GR8RegClass);
+ addRegisterClass(MVT::i16, &MSP430::GR16RegClass);
// Compute derived properties from the register classes
computeRegisterProperties();
@@ -226,9 +226,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
default: break;
case 'r': // GENERAL_REGS
if (VT == MVT::i8)
- return std::make_pair(0U, MSP430::GR8RegisterClass);
+ return std::make_pair(0U, &MSP430::GR8RegClass);
- return std::make_pair(0U, MSP430::GR16RegisterClass);
+ return std::make_pair(0U, &MSP430::GR16RegClass);
}
}
@@ -266,14 +266,19 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
}
SDValue
-MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
// MSP430 target does not yet support tail call optimization.
isTailCall = false;
@@ -310,7 +315,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
assert(!isVarArg && "Varargs not supported yet");
@@ -330,8 +335,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
llvm_unreachable(0);
}
case MVT::i16:
- unsigned VReg =
- RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+ unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
@@ -391,7 +395,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
@@ -445,7 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
@@ -568,7 +572,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
@@ -1024,27 +1028,27 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
default: llvm_unreachable("Invalid shift opcode!");
case MSP430::Shl8:
Opc = MSP430::SHL8r1;
- RC = MSP430::GR8RegisterClass;
+ RC = &MSP430::GR8RegClass;
break;
case MSP430::Shl16:
Opc = MSP430::SHL16r1;
- RC = MSP430::GR16RegisterClass;
+ RC = &MSP430::GR16RegClass;
break;
case MSP430::Sra8:
Opc = MSP430::SAR8r1;
- RC = MSP430::GR8RegisterClass;
+ RC = &MSP430::GR8RegClass;
break;
case MSP430::Sra16:
Opc = MSP430::SAR16r1;
- RC = MSP430::GR16RegisterClass;
+ RC = &MSP430::GR16RegClass;
break;
case MSP430::Srl8:
Opc = MSP430::SAR8r1c;
- RC = MSP430::GR8RegisterClass;
+ RC = &MSP430::GR8RegClass;
break;
case MSP430::Srl16:
Opc = MSP430::SAR16r1c;
- RC = MSP430::GR16RegisterClass;
+ RC = &MSP430::GR16RegClass;
break;
}
@@ -1072,8 +1076,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
LoopBB->addSuccessor(RemBB);
LoopBB->addSuccessor(LoopBB);
- unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
- unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass);
+ unsigned ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass);
unsigned ShiftReg = RI.createVirtualRegister(RC);
unsigned ShiftReg2 = RI.createVirtualRegister(RC);
unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index e372f00bf324..d8ad02fca403 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -152,12 +152,7 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
@@ -174,7 +169,6 @@ namespace llvm {
SelectionDAG &DAG) const;
const MSP430Subtarget &Subtarget;
- const MSP430TargetMachine &TM;
const TargetData *TD;
};
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index c03ba470af27..be332f05b30b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
: MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
- RI(tm, *this), TM(tm) {}
+ RI(tm, *this) {}
void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 04f339bdd608..d79f99245e71 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -42,7 +42,6 @@ namespace MSP430II {
class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
- MSP430TargetMachine &TM;
public:
explicit MSP430InstrInfo(MSP430TargetMachine &TM);
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 4348dd5e54e6..f003574eda00 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -210,13 +210,13 @@ let isCall = 1 in
let Defs = [R12W, R13W, R14W, R15W, SRW],
Uses = [SPW] in {
def CALLi : II16i<0x0,
- (outs), (ins i16imm:$dst, variable_ops),
+ (outs), (ins i16imm:$dst),
"call\t$dst", [(MSP430call imm:$dst)]>;
def CALLr : II16r<0x0,
- (outs), (ins GR16:$dst, variable_ops),
+ (outs), (ins GR16:$dst),
"call\t$dst", [(MSP430call GR16:$dst)]>;
def CALLm : II16m<0x0,
- (outs), (ins memsrc:$dst, variable_ops),
+ (outs), (ins memsrc:$dst),
"call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
}
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index 24151e2b8ea1..794aa56bf04c 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -21,18 +21,16 @@ namespace llvm {
class MachineInstr;
class MachineModuleInfoMachO;
class MachineOperand;
- class Mangler;
/// MSP430MCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
MCContext &Ctx;
- Mangler &Mang;
AsmPrinter &Printer;
public:
- MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
- : Ctx(ctx), Mang(mang), Printer(printer) {}
+ MSP430MCInstLower(MCContext &ctx, AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer) {}
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 51ec71ace525..aed46a2ec595 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -96,7 +96,8 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
const TargetRegisterClass *
-MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
+MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
return &MSP430::GR16RegClass;
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 82ee4997392c..9ee0a03f6310 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -39,7 +39,8 @@ public:
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 3f2eb8ccef10..07619d0675b8 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -78,8 +78,4 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
// Frame pointer, sometimes allocable
FPW,
// Volatile, but not allocable
- PCW, SPW, SRW, CGW)>
-{
- let SubRegClasses = [(GR8 subreg_8bit)];
-}
-
+ PCW, SPW, SRW, CGW)>;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 3acf96bb7d27..817001d6ad7c 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -60,12 +60,12 @@ TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) {
bool MSP430PassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
+ addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
return false;
}
bool MSP430PassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- PM->add(createMSP430BranchSelectionPass());
+ addPass(createMSP430BranchSelectionPass());
return false;
}
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
index ac21c259fb44..6c7343bbe513 100644
--- a/lib/Target/Mips/AsmParser/CMakeLists.txt
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -1,6 +1,5 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMMipsAsmParser
MipsAsmParser.cpp
)
+add_dependencies(LLVMMipsAsmParser MipsCommonTableGen)
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 0500c5dc3832..aab8a011d4d0 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -13,26 +13,33 @@ tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
+ Mips16FrameLowering.cpp
+ Mips16InstrInfo.cpp
+ Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
MipsDelaySlotFiller.cpp
- MipsEmitGPRestore.cpp
- MipsExpandPseudo.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
MipsISelLowering.cpp
MipsFrameLowering.cpp
+ MipsLongBranch.cpp
MipsMCInstLower.cpp
MipsMachineFunction.cpp
MipsRegisterInfo.cpp
+ MipsSEFrameLowering.cpp
+ MipsSEInstrInfo.cpp
+ MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
MipsTargetObjectFile.cpp
MipsSelectionDAGInfo.cpp
)
+add_dependencies(LLVMMipsCodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(Disassembler)
add_subdirectory(TargetInfo)
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 78dbc0694902..042b456538c3 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -13,136 +13,87 @@
#include "Mips.h"
#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
-
#include "MipsGenEDInfo.inc"
using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
-/// MipsDisassembler - a disasembler class for Mips32.
-class MipsDisassembler : public MCDisassembler {
+namespace {
+
+/// MipsDisassemblerBase - a disasembler class for Mips.
+class MipsDisassemblerBase : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- MipsDisassembler(const MCSubtargetInfo &STI, bool bigEndian) :
- MCDisassembler(STI), isBigEndian(bigEndian) {
- }
-
- ~MipsDisassembler() {
- }
+ MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ bool bigEndian) :
+ MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
+ virtual ~MipsDisassemblerBase() {}
/// getEDInfo - See MCDisassembler.
const EDInstInfo *getEDInfo() const;
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+
private:
+ const MCRegisterInfo *RegInfo;
+protected:
bool isBigEndian;
};
-
-/// Mips64Disassembler - a disasembler class for Mips64.
-class Mips64Disassembler : public MCDisassembler {
+/// MipsDisassembler - a disasembler class for Mips32.
+class MipsDisassembler : public MipsDisassemblerBase {
public:
/// Constructor - Initializes the disassembler.
///
- Mips64Disassembler(const MCSubtargetInfo &STI, bool bigEndian) :
- MCDisassembler(STI), isBigEndian(bigEndian) {
- }
-
- ~Mips64Disassembler() {
- }
+ MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ bool bigEndian) :
+ MipsDisassemblerBase(STI, Info, bigEndian) {}
/// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
-
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
-
-private:
- bool isBigEndian;
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
};
-const EDInstInfo *MipsDisassembler::getEDInfo() const {
- return instInfoMips;
-}
-
-const EDInstInfo *Mips64Disassembler::getEDInfo() const {
- return instInfoMips;
-}
-
-// Decoder tables for Mips register
-static const unsigned CPURegsTable[] = {
- Mips::ZERO, Mips::AT, Mips::V0, Mips::V1,
- Mips::A0, Mips::A1, Mips::A2, Mips::A3,
- Mips::T0, Mips::T1, Mips::T2, Mips::T3,
- Mips::T4, Mips::T5, Mips::T6, Mips::T7,
- Mips::S0, Mips::S1, Mips::S2, Mips::S3,
- Mips::S4, Mips::S5, Mips::S6, Mips::S7,
- Mips::T8, Mips::T9, Mips::K0, Mips::K1,
- Mips::GP, Mips::SP, Mips::FP, Mips::RA
-};
-static const unsigned FGR32RegsTable[] = {
- Mips::F0, Mips::F1, Mips::F2, Mips::F3,
- Mips::F4, Mips::F5, Mips::F6, Mips::F7,
- Mips::F8, Mips::F9, Mips::F10, Mips::F11,
- Mips::F12, Mips::F13, Mips::F14, Mips::F15,
- Mips::F16, Mips::F17, Mips::F18, Mips::F18,
- Mips::F20, Mips::F21, Mips::F22, Mips::F23,
- Mips::F24, Mips::F25, Mips::F26, Mips::F27,
- Mips::F28, Mips::F29, Mips::F30, Mips::F31
-};
+/// Mips64Disassembler - a disasembler class for Mips64.
+class Mips64Disassembler : public MipsDisassemblerBase {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ Mips64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ bool bigEndian) :
+ MipsDisassemblerBase(STI, Info, bigEndian) {}
-static const unsigned CPU64RegsTable[] = {
- Mips::ZERO_64, Mips::AT_64, Mips::V0_64, Mips::V1_64,
- Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
- Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64,
- Mips::T4_64, Mips::T5_64, Mips::T6_64, Mips::T7_64,
- Mips::S0_64, Mips::S1_64, Mips::S2_64, Mips::S3_64,
- Mips::S4_64, Mips::S5_64, Mips::S6_64, Mips::S7_64,
- Mips::T8_64, Mips::T9_64, Mips::K0_64, Mips::K1_64,
- Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64
+ /// getInstruction - See MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
};
-static const unsigned FGR64RegsTable[] = {
- Mips::D0_64, Mips::D1_64, Mips::D2_64, Mips::D3_64,
- Mips::D4_64, Mips::D5_64, Mips::D6_64, Mips::D7_64,
- Mips::D8_64, Mips::D9_64, Mips::D10_64, Mips::D11_64,
- Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
- Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64,
- Mips::D20_64, Mips::D21_64, Mips::D22_64, Mips::D23_64,
- Mips::D24_64, Mips::D25_64, Mips::D26_64, Mips::D27_64,
- Mips::D28_64, Mips::D29_64, Mips::D30_64, Mips::D31_64
-};
+} // end anonymous namespace
-static const unsigned AFGR64RegsTable[] = {
- Mips::D0, Mips::D1, Mips::D2, Mips::D3,
- Mips::D4, Mips::D5, Mips::D6, Mips::D7,
- Mips::D8, Mips::D9, Mips::D10, Mips::D11,
- Mips::D12, Mips::D13, Mips::D14, Mips::D15
-};
+const EDInstInfo *MipsDisassemblerBase::getEDInfo() const {
+ return instInfoMips;
+}
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
@@ -239,25 +190,25 @@ extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
static MCDisassembler *createMipsDisassembler(
const Target &T,
const MCSubtargetInfo &STI) {
- return new MipsDisassembler(STI,true);
+ return new MipsDisassembler(STI, T.createMCRegInfo(""), true);
}
static MCDisassembler *createMipselDisassembler(
const Target &T,
const MCSubtargetInfo &STI) {
- return new MipsDisassembler(STI,false);
+ return new MipsDisassembler(STI, T.createMCRegInfo(""), false);
}
static MCDisassembler *createMips64Disassembler(
const Target &T,
const MCSubtargetInfo &STI) {
- return new Mips64Disassembler(STI,true);
+ return new Mips64Disassembler(STI, T.createMCRegInfo(""), true);
}
static MCDisassembler *createMips64elDisassembler(
const Target &T,
const MCSubtargetInfo &STI) {
- return new Mips64Disassembler(STI, false);
+ return new Mips64Disassembler(STI, T.createMCRegInfo(""), false);
}
extern "C" void LLVMInitializeMipsDisassembler() {
@@ -362,6 +313,11 @@ Mips64Disassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const MipsDisassemblerBase *Dis = static_cast<const MipsDisassemblerBase*>(D);
+ return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+}
+
static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -370,7 +326,8 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
if (RegNo > 31)
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(CPU64RegsTable[RegNo]));
+ unsigned Reg = getReg(Decoder, Mips::CPU64RegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -380,8 +337,8 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateReg(CPURegsTable[RegNo]));
+ unsigned Reg = getReg(Decoder, Mips::CPURegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -392,7 +349,8 @@ static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
if (RegNo > 31)
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[RegNo]));
+ unsigned Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -403,7 +361,8 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
if (RegNo > 31)
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(FGR32RegsTable[RegNo]));
+ unsigned Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -420,15 +379,18 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
- int Reg = (int)fieldFromInstruction32(Insn, 16, 5);
- int Base = (int)fieldFromInstruction32(Insn, 21, 5);
+ unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
+ Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
if(Inst.getOpcode() == Mips::SC){
- Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg]));
+ Inst.addOperand(MCOperand::CreateReg(Reg));
}
- Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg]));
- Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base]));
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
Inst.addOperand(MCOperand::CreateImm(Offset));
return MCDisassembler::Success;
@@ -439,11 +401,14 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
- int Reg = (int)fieldFromInstruction32(Insn, 16, 5);
- int Base = (int)fieldFromInstruction32(Insn, 21, 5);
+ unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction32(Insn, 21, 5);
- Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[Reg]));
- Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base]));
+ Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
+ Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
Inst.addOperand(MCOperand::CreateImm(Offset));
return MCDisassembler::Success;
@@ -474,10 +439,12 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > 31)
+ if (RegNo > 30 || RegNo %2)
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(AFGR64RegsTable[RegNo]));
+ ;
+ unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -488,7 +455,7 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
//Currently only hardware register 29 is supported
if (RegNo != 29)
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(Mips::HWR29));
+ Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 6886b1745240..b38463de4bfe 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "asm-printer"
#include "MipsInstPrinter.h"
+#include "MipsInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -68,8 +69,25 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::RDHWR:
+ case Mips::RDHWR64:
+ O << "\t.set\tpush\n";
+ O << "\t.set\tmips32r2\n";
+ }
+
printInstruction(MI, O);
printAnnotation(O, Annot);
+
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::RDHWR:
+ case Mips::RDHWR64:
+ O << "\n\t.set\tpop";
+ }
}
static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
@@ -108,6 +126,8 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break;
case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break;
case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
+ case MCSymbolRefExpr::VK_Mips_HIGHER: OS << "%higher("; break;
+ case MCSymbolRefExpr::VK_Mips_HIGHEST: OS << "%highest("; break;
}
OS << SRE->getSymbol();
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 76b839b2127f..3d8a6f918ff6 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -16,7 +16,7 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-// These enumeration declarations were orignally in MipsInstrInfo.h but
+// These enumeration declarations were originally in MipsInstrInfo.h but
// had to be moved here to avoid circular dependencies between
// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
namespace Mips {
diff --git a/lib/Target/Mips/MCTargetDesc/Makefile b/lib/Target/Mips/MCTargetDesc/Makefile
index 7fe2086a6e00..22a27218f28d 100644
--- a/lib/Target/Mips/MCTargetDesc/Makefile
+++ b/lib/Target/Mips/MCTargetDesc/Makefile
@@ -14,3 +14,4 @@ LIBRARYNAME = LLVMMipsDesc
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 9b4caf65cbff..18961fdd785e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -35,7 +35,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
return 0;
case FK_GPRel_4:
case FK_Data_4:
+ case FK_Data_8:
case Mips::fixup_Mips_LO16:
+ case Mips::fixup_Mips_GPOFF_HI:
+ case Mips::fixup_Mips_GPOFF_LO:
+ case Mips::fixup_Mips_GOT_PAGE:
+ case Mips::fixup_Mips_GOT_OFST:
+ case Mips::fixup_Mips_GOT_DISP:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -54,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
break;
case Mips::fixup_Mips_HI16:
case Mips::fixup_Mips_GOT_Local:
- // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ // Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
+ case Mips::fixup_Mips_HIGHER:
+ // Get the 3rd 16-bits.
+ Value = ((Value + 0x80008000LL) >> 32) & 0xffff;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ // Get the 4th 16-bits.
+ Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
+ break;
}
return Value;
@@ -74,7 +88,8 @@ public:
:MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit);
+ return createMipsELFObjectWriter(OS,
+ MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
}
/// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
@@ -115,7 +130,8 @@ public:
CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
}
- uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
+ uint64_t Mask = ((uint64_t)(-1) >>
+ (64 - getFixupKindInfo(Kind).TargetSize));
CurVal |= Value & Mask;
// Write out the fixed up bytes back to the code/data bits.
@@ -156,7 +172,14 @@ public:
{ "fixup_Mips_TLSLDM", 0, 16, 0 },
{ "fixup_Mips_DTPREL_HI", 0, 16, 0 },
{ "fixup_Mips_DTPREL_LO", 0, 16, 0 },
- { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }
+ { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_GPOFF_HI", 0, 16, 0 },
+ { "fixup_Mips_GPOFF_LO", 0, 16, 0 },
+ { "fixup_Mips_GOT_PAGE", 0, 16, 0 },
+ { "fixup_Mips_GOT_OFST", 0, 16, 0 },
+ { "fixup_Mips_GOT_DISP", 0, 16, 0 },
+ { "fixup_Mips_HIGHER", 0, 16, 0 },
+ { "fixup_Mips_HIGHEST", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -206,6 +229,14 @@ public:
///
/// \return - True on success.
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // Check for a less than instruction size number of bytes
+ // FIXME: 16 bit instructions are not handled yet here.
+ // We shouldn't be using a hard coded number for instruction size.
+ if (Count % 4) return false;
+
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0);
return true;
}
}; // class MipsAsmBackend
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index fb1c5ce6b6c6..234455e0c7f0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -79,7 +79,12 @@ namespace MipsII {
MO_GPOFF_LO,
MO_GOT_DISP,
MO_GOT_PAGE,
- MO_GOT_OFST
+ MO_GOT_OFST,
+
+ /// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a
+ /// 64-bit symbol address.
+ MO_HIGHER,
+ MO_HIGHEST
};
enum {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 2091bec50082..8e84b3f99f42 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -34,7 +34,7 @@ namespace {
class MipsELFObjectWriter : public MCELFObjectTargetWriter {
public:
- MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI);
+ MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64);
virtual ~MipsELFObjectWriter();
@@ -52,9 +52,11 @@ namespace {
};
}
-MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI)
+MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+ bool _isN64)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
- /*HasRelocationAddend*/ false) {}
+ /*HasRelocationAddend*/ false,
+ /*IsN64*/ _isN64) {}
MipsELFObjectWriter::~MipsELFObjectWriter() {}
@@ -101,6 +103,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case FK_Data_4:
Type = ELF::R_MIPS_32;
break;
+ case FK_Data_8:
+ Type = ELF::R_MIPS_64;
+ break;
case FK_GPRel_4:
Type = ELF::R_MIPS_GPREL32;
break;
@@ -148,8 +153,32 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_Mips_PC16:
Type = ELF::R_MIPS_PC16;
break;
+ case Mips::fixup_Mips_GOT_PAGE:
+ Type = ELF::R_MIPS_GOT_PAGE;
+ break;
+ case Mips::fixup_Mips_GOT_OFST:
+ Type = ELF::R_MIPS_GOT_OFST;
+ break;
+ case Mips::fixup_Mips_GOT_DISP:
+ Type = ELF::R_MIPS_GOT_DISP;
+ break;
+ case Mips::fixup_Mips_GPOFF_HI:
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type);
+ break;
+ case Mips::fixup_Mips_GPOFF_LO:
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
+ break;
+ case Mips::fixup_Mips_HIGHER:
+ Type = ELF::R_MIPS_HIGHER;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ Type = ELF::R_MIPS_HIGHEST;
+ break;
}
-
return Type;
}
@@ -184,10 +213,10 @@ static int CompareOffset(const RelEntry &R0, const RelEntry &R1) {
void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
- // Call the defualt function first. Relocations are sorted in descending
+ // Call the default function first. Relocations are sorted in descending
// order of r_offset.
MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
-
+
RelLs RelocLs;
std::vector<RelLsIter> Unmatched;
@@ -244,6 +273,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
uint8_t OSABI,
bool IsLittleEndian,
bool Is64Bit) {
- MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI);
+ MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
+ (Is64Bit) ? true : false);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 9b76eda861dc..77faec54fb23 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -95,6 +95,27 @@ namespace Mips {
// PC relative branch fixup resulting in - R_MIPS_PC16
fixup_Mips_Branch_PCRel,
+ // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
+ fixup_Mips_GPOFF_HI,
+
+ // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
+ fixup_Mips_GPOFF_LO,
+
+ // resulting in - R_MIPS_PAGE
+ fixup_Mips_GOT_PAGE,
+
+ // resulting in - R_MIPS_GOT_OFST
+ fixup_Mips_GOT_OFST,
+
+ // resulting in - R_MIPS_GOT_DISP
+ fixup_Mips_GOT_DISP,
+
+ // resulting in - R_MIPS_GOT_HIGHER
+ fixup_Mips_HIGHER,
+
+ // resulting in - R_MIPS_HIGHEST
+ fixup_Mips_HIGHEST,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 4ed2be0f430e..8dab62d51813 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -91,6 +91,7 @@ public:
} // namespace
MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx)
{
@@ -98,6 +99,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
}
MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx)
{
@@ -179,7 +181,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
} else if (MO.isFPImm()) {
return static_cast<unsigned>(APFloat(MO.getFPImm())
.bitcastToAPInt().getHiBits(32).getLimitedValue());
- }
+ }
// MO must be an Expr.
assert(MO.isExpr());
@@ -193,10 +195,27 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
}
assert (Kind == MCExpr::SymbolRef);
-
+
Mips::Fixups FixupKind = Mips::Fixups(0);
switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
+ default: llvm_unreachable("Unknown fixup kind!");
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_HI :
+ FixupKind = Mips::fixup_Mips_GPOFF_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_LO :
+ FixupKind = Mips::fixup_Mips_GPOFF_LO;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_PAGE :
+ FixupKind = Mips::fixup_Mips_GOT_PAGE;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_OFST :
+ FixupKind = Mips::fixup_Mips_GOT_OFST;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_DISP :
+ FixupKind = Mips::fixup_Mips_GOT_DISP;
+ break;
case MCSymbolRefExpr::VK_Mips_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
@@ -236,7 +255,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_TPREL_LO:
FixupKind = Mips::fixup_Mips_TPREL_LO;
break;
- default:
+ case MCSymbolRefExpr::VK_Mips_HIGHER:
+ FixupKind = Mips::fixup_Mips_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_Mips_HIGHEST:
+ FixupKind = Mips::fixup_Mips_HIGHEST;
break;
} // switch
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 547ccddd78ea..bfcc2a2e4ae0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -22,6 +22,7 @@ class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
class Target;
@@ -33,9 +34,11 @@ extern Target TheMips64Target;
extern Target TheMips64elTarget;
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index bafadc8f25f6..2963f7e7fa42 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -24,9 +24,7 @@ namespace llvm {
FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
- FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
- FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
-
+ FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
JITCodeEmitter &JCE);
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index cbebe84a1805..8548ae0b8b53 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -72,6 +72,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
"Mips64r2", "Mips64r2 ISA Support",
[FeatureMips64, FeatureMips32r2]>;
+def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
+ "Mips16 mode">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
@@ -83,6 +86,7 @@ def : Proc<"mips32", [FeatureMips32]>;
def : Proc<"mips32r2", [FeatureMips32r2]>;
def : Proc<"mips64", [FeatureMips64]>;
def : Proc<"mips64r2", [FeatureMips64r2]>;
+def : Proc<"mips16", [FeatureMips16]>;
def MipsAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
new file mode 100644
index 000000000000..030042f2e832
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -0,0 +1,87 @@
+//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16FrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ // Adjust stack.
+ if (isInt<16>(-StackSize))
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+}
+
+void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ if (isInt<16>(StackSize))
+ // assumes stacksize multiple of 8
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+}
+
+bool Mips16FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ // FIXME: implement.
+ return true;
+}
+
+bool
+Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // FIXME: implement.
+ return true;
+}
+
+void Mips16FrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+}
+
+const MipsFrameLowering *
+llvm::createMips16FrameLowering(const MipsSubtarget &ST) {
+ return new Mips16FrameLowering(ST);
+}
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
new file mode 100644
index 000000000000..25cc37b81519
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -0,0 +1,43 @@
+//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16_FRAMEINFO_H
+#define MIPS16_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+class Mips16FrameLowering : public MipsFrameLowering {
+public:
+ explicit Mips16FrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
new file mode 100644
index 000000000000..61602b62fb44
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -0,0 +1,663 @@
+//===- Mips16InstrFormats.td - Mips Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// funct or f Function field
+//
+// immediate 4-,5-,8- or 11-bit immediate, branch displacement, or
+// or imm address displacement
+//
+// op 5-bit major operation code
+//
+// rx 3-bit source or destination register
+//
+// ry 3-bit source or destination register
+//
+// rz 3-bit source or destination register
+//
+// sa 3- or 5-bit shift amount
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+//
+class Format16<bits<5> val> {
+ bits<5> Value = val;
+}
+
+def Pseudo16 : Format16<0>;
+def FrmI16 : Format16<1>;
+def FrmRI16 : Format16<2>;
+def FrmRR16 : Format16<3>;
+def FrmRRI16 : Format16<4>;
+def FrmRRR16 : Format16<5>;
+def FrmRRI_A16 : Format16<6>;
+def FrmSHIFT16 : Format16<7>;
+def FrmI8_TYPE16 : Format16<8>;
+def FrmI8_MOVR3216 : Format16<9>;
+def FrmI8_MOV32R16 : Format16<10>;
+def FrmI8_SVRS16 : Format16<11>;
+def FrmJAL16 : Format16<12>;
+def FrmJALX16 : Format16<13>;
+def FrmEXT_I16 : Format16<14>;
+def FrmASMACRO16 : Format16<15>;
+def FrmEXT_RI16 : Format16<16>;
+def FrmEXT_RRI16 : Format16<17>;
+def FrmEXT_RRI_A16 : Format16<18>;
+def FrmEXT_SHIFT16 : Format16<19>;
+def FrmEXT_I816 : Format16<20>;
+def FrmEXT_I8_SVRS16 : Format16<21>;
+def FrmOther16 : Format16<22>; // Instruction w/ a custom format
+
+// Base class for Mips 16 Format
+// This class does not depend on the instruction size
+//
+class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format16 f>: Instruction
+{
+ Format16 Form = f;
+
+ let Namespace = "Mips";
+
+ let OutOperandList = outs;
+ let InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ //
+ // Attributes specific to Mips instructions...
+ //
+ bits<5> FormBits = Form.Value;
+
+ // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+ let TSFlags{4-0} = FormBits;
+
+ let Predicates = [InMips16Mode];
+}
+
+//
+// Generic Mips 16 Format
+//
+class MipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format16 f>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+{
+ field bits<16> Inst;
+ bits<5> Opcode = 0;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{15-11} = Opcode;
+}
+
+//
+// For 32 bit extended instruction forms.
+//
+class MipsInst16_32<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format16 f>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+{
+ field bits<32> Inst;
+
+}
+
+class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format16 f>:
+ MipsInst16_32<outs, ins, asmstr, pattern, itin, f>
+{
+ let Inst{31-27} = 0b11110;
+}
+
+
+
+// Mips Pseudo Instructions Format
+class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst16<outs, ins, asmstr, pattern, IIPseudo, Pseudo16> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|imm11|>
+//===----------------------------------------------------------------------===//
+
+class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmI16>
+{
+ bits<11> imm11;
+
+ let Opcode = op;
+
+ let Inst{10-0} = imm11;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RI instruction class in Mips : <|opcode|rx|imm8|>
+//===----------------------------------------------------------------------===//
+
+class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRI16>
+{
+ bits<3> rx;
+ bits<8> imm8;
+
+ let Opcode = op;
+
+ let Inst{10-8} = rx;
+ let Inst{7-0} = imm8;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RR instruction class in Mips : <|opcode|rx|ry|funct|>
+//===----------------------------------------------------------------------===//
+
+class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<5> funct;
+
+ let Opcode = 0b11101;
+ let funct = _funct;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-0} = funct;
+}
+
+//
+// For conversion functions.
+//
+class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+{
+ bits<3> rx;
+ bits<3> subfunct;
+ bits<5> funct;
+
+ let Opcode = 0b11101; // RR
+ let funct = _funct;
+ let subfunct = _subfunct;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = subfunct;
+ let Inst{4-0} = funct;
+}
+
+//
+// just used for breakpoint (hardware and software) instructions.
+//
+class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+{
+ bits<6> _code; // code is a keyword in tablegen
+ bits<5> funct;
+
+ let Opcode = 0b11101; // RR
+ let funct = _funct;
+
+ let Inst{10-5} = _code;
+ let Inst{4-0} = funct;
+}
+
+//
+// J(AL)R(C) subformat
+//
+class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
+ dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+{
+ bits<3> rx;
+ bits<1> nd;
+ bits<1> l;
+ bits<1> ra;
+
+ let nd = _nd;
+ let l = _l;
+ let ra = r_a;
+
+ let Opcode = 0b11101;
+
+ let Inst{10-8} = rx;
+ let Inst{7} = nd;
+ let Inst{6} = l;
+ let Inst{5} = ra;
+ let Inst{4-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RRI instruction class in Mips : <|opcode|rx|ry|imm5|>
+//===----------------------------------------------------------------------===//
+
+class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI16>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<5> imm5;
+
+ let Opcode = op;
+
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-0} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RRR instruction class in Mips : <|opcode|rx|ry|rz|f|>
+//===----------------------------------------------------------------------===//
+
+class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRR16>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<3> rz;
+ bits<2> f;
+
+ let Opcode = 0b11100;
+ let f = _f;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-2} = rz;
+ let Inst{1-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Format RRI-A instruction class in Mips : <|opcode|rx|ry|f|imm4|>
+//===----------------------------------------------------------------------===//
+
+class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI_A16>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<1> f;
+ bits<4> imm4;
+
+ let Opcode = 0b01000;
+ let f = _f;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4} = f;
+ let Inst{3-0} = imm4;
+}
+
+//===----------------------------------------------------------------------===//
+// Format Shift instruction class in Mips : <|opcode|rx|ry|sa|f|>
+//===----------------------------------------------------------------------===//
+
+class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmSHIFT16>
+{
+ bits<3> rx;
+ bits<3> ry;
+ bits<3> sa;
+ bits<2> f;
+
+ let Opcode = 0b00110;
+ let f = _f;
+
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-2} = sa;
+ let Inst{1-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Format i8 instruction class in Mips : <|opcode|funct|imm8>
+//===----------------------------------------------------------------------===//
+
+class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_TYPE16>
+{
+ bits<3> func;
+ bits<8> imm8;
+
+ let Opcode = 0b01100;
+ let func = _func;
+
+ let Inst{10-8} = func;
+ let Inst{7-0} = imm8;
+}
+
+//===----------------------------------------------------------------------===//
+// Format i8_MOVR32 instruction class in Mips : <|opcode|func|ry|r32>
+//===----------------------------------------------------------------------===//
+
+class FI8_MOVR3216<dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOVR3216>
+{
+
+ bits<4> ry;
+ bits<4> r32;
+
+ let Opcode = 0b01100;
+
+ let Inst{10-8} = 0b111;
+ let Inst{7-4} = ry;
+ let Inst{3-0} = r32;
+
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Format i8_MOV32R instruction class in Mips : <|opcode|func|r32|rz>
+//===----------------------------------------------------------------------===//
+
+class FI8_MOV32R16<dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOV32R16>
+{
+
+ bits<3> func;
+ bits<5> r32;
+ bits<3> rz;
+
+
+ let Opcode = 0b01100;
+
+ let Inst{10-8} = 0b101;
+ let Inst{7-5} = r32{2-0};
+ let Inst{4-3} = r32{4-3};
+ let Inst{2-0} = rz;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format i8_SVRS instruction class in Mips :
+// <|opcode|svrs|s|ra|s0|s1|framesize>
+//===----------------------------------------------------------------------===//
+
+class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+{
+ bits<1> s;
+ bits<1> ra = 0;
+ bits<1> s0 = 0;
+ bits<1> s1 = 0;
+ bits<4> framesize = 0;
+
+ let s =_s;
+ let Opcode = 0b01100;
+
+ let Inst{10-8} = 0b100;
+ let Inst{7} = s;
+ let Inst{6} = ra;
+ let Inst{5} = s0;
+ let Inst{4} = s1;
+ let Inst{3-0} = framesize;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format JAL instruction class in Mips16 :
+// <|opcode|svrs|s|ra|s0|s1|framesize>
+//===----------------------------------------------------------------------===//
+
+class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_32<outs, ins, asmstr, pattern, itin, FrmJAL16>
+{
+ bits<1> X;
+ bits<26> imm26;
+
+
+ let X = _X;
+
+ let Inst{31-27} = 0b00011;
+ let Inst{26} = X;
+ let Inst{25-21} = imm26{20-16};
+ let Inst{20-16} = imm26{25-21};
+ let Inst{15-0} = imm26{15-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-I instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|op|0|0|0|0|0|0|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I16>
+{
+ bits<16> imm16;
+ bits<5> eop;
+
+ let eop = _eop;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = eop;
+ let Inst{10-5} = 0;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format ASMACRO instruction class in Mips16 :
+// <EXTEND|select|p4|p3|RRR|p2|p1|p0>
+//===----------------------------------------------------------------------===//
+
+class FASMACRO16<dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmASMACRO16>
+{
+ bits<3> select;
+ bits<3> p4;
+ bits<5> p3;
+ bits<5> RRR = 0b11100;
+ bits<3> p2;
+ bits<3> p1;
+ bits<5> p0;
+
+
+ let Inst{26-24} = select;
+ let Inst{23-21} = p4;
+ let Inst{20-16} = p3;
+ let Inst{15-11} = RRR;
+ let Inst{10-8} = p2;
+ let Inst{7-5} = p1;
+ let Inst{4-0} = p0;
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// Format EXT-RI instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|op|rx|0|0|0|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RI16>
+{
+ bits<16> imm16;
+ bits<5> op;
+ bits<3> rx;
+
+ let op = _op;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = op;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = 0;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-RRI instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|op|rx|ry|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI16>
+{
+ bits<5> op;
+ bits<16> imm16;
+ bits<3> rx;
+ bits<3> ry;
+
+ let op=_op;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = op;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-RRI-A instruction class in Mips16 :
+// <|EXTEND|imm10:4|imm14:11|RRI-A|rx|ry|f|imm3:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI_A16>
+{
+ bits<15> imm15;
+ bits<3> rx;
+ bits<3> ry;
+ bits<1> f;
+
+ let f = _f;
+
+ let Inst{26-20} = imm15{10-4};
+ let Inst{19-16} = imm15{14-11};
+ let Inst{15-11} = 0b01000;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4} = f;
+ let Inst{3-0} = imm15{3-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-SHIFT instruction class in Mips16 :
+// <|EXTEND|sa 4:0|s5|0|SHIFT|rx|ry|0|f>
+//===----------------------------------------------------------------------===//
+
+class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_SHIFT16>
+{
+ bits<6> sa6;
+ bits<3> rx;
+ bits<3> ry;
+ bits<2> f;
+
+ let f = _f;
+
+ let Inst{26-22} = sa6{4-0};
+ let Inst{21} = sa6{5};
+ let Inst{20-16} = 0;
+ let Inst{15-11} = 0b00110;
+ let Inst{10-8} = rx;
+ let Inst{7-5} = ry;
+ let Inst{4-2} = 0;
+ let Inst{1-0} = f;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-I8 instruction class in Mips16 :
+// <|EXTEND|imm10:5|imm15:11|I8|funct|0|imm4:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I816>
+{
+ bits<16> imm16;
+ bits<5> I8;
+ bits<3> funct;
+
+ let funct = _funct;
+ let I8 = 0b0110;
+
+ let Inst{26-21} = imm16{10-5};
+ let Inst{20-16} = imm16{15-11};
+ let Inst{15-11} = I8;
+ let Inst{10-8} = funct;
+ let Inst{7-5} = 0;
+ let Inst{4-0} = imm16{4-0};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Format EXT-I8_SVRS instruction class in Mips16 :
+// <|EXTEND|xsregs|framesize7:4|aregs|I8|SVRS|s|ra|s0|s1|framesize3:0>
+//===----------------------------------------------------------------------===//
+
+class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+{
+ bits<3> xsregs =0;
+ bits<8> framesize =0;
+ bits<3> aregs =0;
+ bits<5> I8 = 0b01100;
+ bits<3> SVRS = 0b100;
+ bits<1> s;
+ bits<1> ra = 0;
+ bits<1> s0 = 0;
+ bits<1> s1 = 0;
+
+ let s= s_;
+
+ let Inst{26-24} = xsregs;
+ let Inst{23-20} = framesize{7-4};
+ let Inst{19} = 0;
+ let Inst{18-16} = aregs;
+ let Inst{15-11} = I8;
+ let Inst{10-8} = SVRS;
+ let Inst{7} = s;
+ let Inst{6} = ra;
+ let Inst{5} = s0;
+ let Inst{4} = s1;
+ let Inst{3-0} = framesize{3-0};
+
+
+}
+
+
+
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
new file mode 100644
index 000000000000..2bc286b6bb1f
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -0,0 +1,132 @@
+//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16InstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
+ RI(*tm.getSubtargetImpl(), *this) {}
+
+const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned Mips16InstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned Mips16InstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::Mov32R16;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Mips16InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "Implement this function.");
+}
+
+void Mips16InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "Implement this function.");
+}
+
+bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA16:
+ ExpandRetRA16(MBB, MI, Mips::JrRa16);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ assert(false && "Implement this function.");
+ return 0;
+}
+
+unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return 0;
+}
+
+void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+}
+
+const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
+ return new Mips16InstrInfo(TM);
+}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
new file mode 100644
index 000000000000..260c5b69b25f
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -0,0 +1,76 @@
+//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16INSTRUCTIONINFO_H
+#define MIPS16INSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "Mips16RegisterInfo.h"
+
+namespace llvm {
+
+class Mips16InstrInfo : public MipsInstrInfo {
+ const Mips16RegisterInfo RI;
+
+public:
+ explicit Mips16InstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
new file mode 100644
index 000000000000..94cf984769b8
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -0,0 +1,419 @@
+//===- Mips16InstrInfo.td - Target Description for Mips16 -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips16 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// RRR-type instruction format
+//
+
+class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> :
+ FRRR16<_f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $rx, $ry"), [], itin>;
+
+//
+// I8_MOV32R instruction format (used only by MOV32R instruction)
+//
+class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>:
+ FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz),
+ !strconcat(asmstr, "\t$r32, $rz"), [], itin>;
+
+//
+// EXT-RI instruction format
+//
+
+class FEXT_RI16_ins_base<bits<5> _op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FEXT_RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+
+class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
+ FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
+
+
+class FEXT_2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+
+//
+// RR-type instruction format
+//
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $ry"),
+ [], itin> {
+ let Constraints = "$rx = $rz";
+}
+
+let rx=0 in
+class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
+ string asmstr, InstrItinClass itin>:
+ FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"),
+ [], itin> ;
+
+//
+// EXT-RRI instruction format
+//
+
+class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
+class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
+//
+// EXT-SHIFT instruction format
+//
+class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
+ FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
+ !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
+
+//
+// Address operand
+def mem16 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPU16Regs, simm16);
+ let EncoderMethod = "getMemEncoding";
+}
+
+//
+// Some general instruction class info
+//
+//
+
+class ArithLogic16Defs<bit isCom=0> {
+ bits<5> shamt = 0;
+ bit isCommutable = isCom;
+ bit isReMaterializable = 1;
+ bit neverHasSideEffects = 1;
+}
+
+//
+
+// Format: ADDIU rx, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
+// To add a constant to a 32-bit integer.
+//
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0>;
+
+//
+
+// Format: ADDIU rx, pc, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
+// To add a constant to the program counter.
+//
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+//
+// Format: ADDU rz, rx, ry MIPS16e
+// Purpose: Add Unsigned Word (3-Operand)
+// To add 32-bit integers.
+//
+
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: AND rx, ry MIPS16e
+// Purpose: AND
+// To do a bitwise logical AND.
+
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: JR ra MIPS16e
+// Purpose: Jump Register Through Register ra
+// To execute a branch to the instruction address in the return
+// address register.
+//
+
+def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>;
+
+//
+// Format: LB ry, offset(rx) MIPS16e
+// Purpose: Load Byte (Extended)
+// To load a byte from memory as a signed value.
+//
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>;
+
+//
+// Format: LBU ry, offset(rx) MIPS16e
+// Purpose: Load Byte Unsigned (Extended)
+// To load a byte from memory as a unsigned value.
+//
+def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>;
+
+//
+// Format: LH ry, offset(rx) MIPS16e
+// Purpose: Load Halfword signed (Extended)
+// To load a halfword from memory as a signed value.
+//
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>;
+
+//
+// Format: LHU ry, offset(rx) MIPS16e
+// Purpose: Load Halfword unsigned (Extended)
+// To load a halfword from memory as an unsigned value.
+//
+def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>;
+
+//
+// Format: LI rx, immediate MIPS16e
+// Purpose: Load Immediate (Extended)
+// To load a constant into a GPR.
+//
+def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+
+//
+// Format: LW ry, offset(rx) MIPS16e
+// Purpose: Load Word (Extended)
+// To load a word from memory as a signed value.
+//
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
+
+//
+// Format: MOVE r32, rz MIPS16e
+// Purpose: Move
+// To move the contents of a GPR to a GPR.
+//
+def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+
+//
+// Format: NEG rx, ry MIPS16e
+// Purpose: Negate
+// To negate an integer value.
+//
+def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>;
+
+//
+// Format: NOT rx, ry MIPS16e
+// Purpose: Not
+// To complement an integer value
+//
+def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>;
+
+//
+// Format: OR rx, ry MIPS16e
+// Purpose: Or
+// To do a bitwise logical OR.
+//
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
+// (All args are optional) MIPS16e
+// Purpose: Restore Registers and Deallocate Stack Frame
+// To deallocate a stack frame before exit from a subroutine,
+// restoring return address and static registers, and adjusting
+// stack
+//
+
+// fixed form for restoring RA and the frame
+// for direct object emitter, encoding needs to be adjusted for the
+// frame size
+//
+let ra=1, s=0,s0=0,s1=0 in
+def RestoreRaF16:
+ FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
+ "restore \t$$ra, $frame_size", [], IILoad >;
+
+//
+// Format: SAVE {ra,}{s0/s1/s0-1,}{framesize} (All arguments are optional)
+// MIPS16e
+// Purpose: Save Registers and Set Up Stack Frame
+// To set up a stack frame on entry to a subroutine,
+// saving return address and static registers, and adjusting stack
+//
+let ra=1, s=1,s0=0,s1=0 in
+def SaveRaF16:
+ FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
+ "save \t$$ra, $frame_size", [], IILoad >;
+
+//
+// Format: SB ry, offset(rx) MIPS16e
+// Purpose: Store Byte (Extended)
+// To store a byte to memory.
+//
+def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>;
+
+//
+// Format: SH ry, offset(rx) MIPS16e
+// Purpose: Store Halfword (Extended)
+// To store a halfword to memory.
+//
+def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>;
+
+//
+// Format: SLL rx, ry, sa MIPS16e
+// Purpose: Shift Word Left Logical (Extended)
+// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
+//
+def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
+
+//
+// Format: SLLV ry, rx MIPS16e
+// Purpose: Shift Word Left Logical Variable
+// To execute a left-shift of a word by a variable number of bits.
+//
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+
+
+//
+// Format: SRAV ry, rx MIPS16e
+// Purpose: Shift Word Right Arithmetic Variable
+// To execute an arithmetic right-shift of a word by a variable
+// number of bits.
+//
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+
+
+//
+// Format: SRA rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Arithmetic (Extended)
+// To execute an arithmetic right-shift of a word by a fixed
+// number of bits—1 to 8 bits.
+//
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+
+
+//
+// Format: SRLV ry, rx MIPS16e
+// Purpose: Shift Word Right Logical Variable
+// To execute a logical right-shift of a word by a variable
+// number of bits.
+//
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+
+
+//
+// Format: SRL rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Logical (Extended)
+// To execute a logical right-shift of a word by a fixed
+// number of bits—1 to 31 bits.
+//
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+
+//
+// Format: SUBU rz, rx, ry MIPS16e
+// Purpose: Subtract Unsigned Word
+// To subtract 32-bit integers
+//
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+
+//
+// Format: SW ry, offset(rx) MIPS16e
+// Purpose: Store Word (Extended)
+// To store a word to memory.
+//
+def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>;
+
+//
+// Format: XOR rx, ry MIPS16e
+// Purpose: Xor
+// To do a bitwise logical XOR.
+//
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
+
+class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
+ let Predicates = [InMips16Mode];
+}
+
+// Unary Arith/Logic
+//
+class ArithLogicU_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r),
+ (I CPU16Regs:$r)>;
+
+def: ArithLogicU_pat<not, NotRxRy16>;
+def: ArithLogicU_pat<ineg, NegRxRy16>;
+
+class ArithLogic16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r),
+ (I CPU16Regs:$l, CPU16Regs:$r)>;
+
+def: ArithLogic16_pat<add, AdduRxRyRz16>;
+def: ArithLogic16_pat<and, AndRxRxRy16>;
+def: ArithLogic16_pat<or, OrRxRxRy16>;
+def: ArithLogic16_pat<sub, SubuRxRyRz16>;
+def: ArithLogic16_pat<xor, XorRxRxRy16>;
+
+// Arithmetic and logical instructions with 2 register operands.
+
+class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
+ (I CPU16Regs:$in, imm_type:$imm)>;
+
+def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
+def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
+def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
+def: ArithLogicI16_pat<sra, immZExt5, SraX16>;
+
+class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra),
+ (I CPU16Regs:$r, CPU16Regs:$ra)>;
+
+def: shift_rotate_reg16_pat<shl, SllvRxRy16>;
+def: shift_rotate_reg16_pat<sra, SravRxRy16>;
+def: shift_rotate_reg16_pat<srl, SrlvRxRy16>;
+
+class LoadM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>;
+
+def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>;
+def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>;
+def: LoadM16_pat<load_a, LwRxRyOffMemX16>;
+
+class StoreM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>;
+
+def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
+def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>;
+def: StoreM16_pat<store_a, SwRxRyOffMemX16>;
+
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1 in
+def JumpLinkReg16:
+ FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs),
+ "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>;
+
+// Mips16 pseudos
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
+ hasExtraSrcRegAllocReq = 1 in
+def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
+
+// Small immediates
+def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
+
+def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
new file mode 100644
index 000000000000..c15d1bf52e85
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16RegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void Mips16RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always
+ // referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object
+ // is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following,
+ // its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex))
+ Offset = SPOffset;
+ else
+ Offset = SPOffset + (int64_t)StackSize;
+
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+
+
+}
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
new file mode 100644
index 000000000000..3f4b3a762a1e
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -0,0 +1,37 @@
+//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16REGISTERINFO_H
+#define MIPS16REGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class Mips16RegisterInfo : public MipsRegisterInfo {
+public:
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+ const TargetInstrInfo &TII);
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 0382869255f4..20fc17807757 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -49,21 +49,24 @@ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
- def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, Requires<[NotN64]>;
- def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]> {
+ def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStandardEncoding]> {
let isCodeGenOnly = 1;
}
}
multiclass AtomicCmpSwap64<PatFrag Op, string Width> {
- def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, Requires<[NotN64]>;
+ def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStandardEncoding]>;
def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
- Requires<[IsN64]> {
+ Requires<[IsN64, HasStandardEncoding]> {
let isCodeGenOnly = 1;
}
}
}
-let usesCustomInserter = 1, Predicates = [HasMips64],
+let usesCustomInserter = 1, Predicates = [HasMips64, HasStandardEncoding],
DecoderNamespace = "Mips64" in {
defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
@@ -106,9 +109,15 @@ def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>;
def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>;
def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>;
def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>;
+let Pattern = []<dag> in {
+def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>;
+def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>;
+def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>;
+}
}
// Rotate Instructions
-let Predicates = [HasMips64r2], DecoderNamespace = "Mips64" in {
+let Predicates = [HasMips64r2, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
}
@@ -137,18 +146,34 @@ defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>;
defm ULD : LoadM64<0x37, "uld", load_u, 1>;
defm USD : StoreM64<0x3f, "usd", store_u, 1>;
+/// load/store left/right
+let isCodeGenOnly = 1 in {
+ defm LWL64 : LoadLeftRightM64<0x22, "lwl", MipsLWL>;
+ defm LWR64 : LoadLeftRightM64<0x26, "lwr", MipsLWR>;
+ defm SWL64 : StoreLeftRightM64<0x2a, "swl", MipsSWL>;
+ defm SWR64 : StoreLeftRightM64<0x2e, "swr", MipsSWR>;
+}
+defm LDL : LoadLeftRightM64<0x1a, "ldl", MipsLDL>;
+defm LDR : LoadLeftRightM64<0x1b, "ldr", MipsLDR>;
+defm SDL : StoreLeftRightM64<0x2c, "sdl", MipsSDL>;
+defm SDR : StoreLeftRightM64<0x2d, "sdr", MipsSDR>;
+
/// Load-linked, Store-conditional
-def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>;
-def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]> {
+def LLD : LLBase<0x34, "lld", CPU64Regs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
let isCodeGenOnly = 1;
}
-def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>;
-def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]> {
+def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
let isCodeGenOnly = 1;
}
/// Jump and Branch Instructions
-def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>;
+def JR64 : IndirectBranch<CPU64Regs>;
def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
@@ -183,74 +208,75 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
-def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
+def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
}
let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
- Requires<[IsN64]> {
- let isCodeGenOnly = 1;
-}
+def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
+ Requires<[IsN64, HasStandardEncoding]>;
let DecoderNamespace = "Mips64" in {
def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
def DEXT : ExtBase<3, "dext", CPU64Regs>;
def DINS : InsBase<7, "dins", CPU64Regs>;
-def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
- "dsll\t$rd, $rt, 32", [], IIAlu>;
-def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
- "sll\t$rd, $rt, 0", [], IIAlu>;
-let isCodeGenOnly = 1 in
-def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
- "sll\t$rd, $rt, 0", [], IIAlu>;
+let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
+ def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll\t$rd, $rt, 32", [], IIAlu>;
+ def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+}
}
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
// extended loads
-let Predicates = [NotN64] in {
- def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
- def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
- def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>;
- def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>;
- def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>;
- def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>;
- def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>;
+let Predicates = [NotN64, HasStandardEncoding] in {
+ def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>;
+ def : MipsPat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>;
}
-let Predicates = [IsN64] in {
- def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>;
- def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>;
+let Predicates = [IsN64, HasStandardEncoding] in {
+ def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>;
+ def : MipsPat<(zextloadi32_u addr:$a),
+ (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>;
}
// hi/lo relocs
-def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
-def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
-def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
-def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
-def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
-
-def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
-def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
-def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
-
-def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
- (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
- (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
- (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
- (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
+def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
+def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
+
+def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
+def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
+def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : MipsPat<(MipsLo tglobaltlsaddr:$in),
+ (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
+
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
+ (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
+ (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
+ (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>;
def : WrapperPat<tconstpool, DADDiu, CPU64Regs>;
@@ -270,19 +296,22 @@ defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
// select MipsDynAlloc
-def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>;
+def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>,
+ Requires<[IsN64, HasStandardEncoding]>;
// truncate
-def : Pat<(i32 (trunc CPU64Regs:$src)),
- (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>;
+def : MipsPat<(i32 (trunc CPU64Regs:$src)),
+ (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
+ Requires<[IsN64, HasStandardEncoding]>;
// 32-to-64-bit extension
-def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
-def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
-def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : MipsPat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
+def : MipsPat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
// Sign extend in register
-def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>;
+def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
+ (SLL64_64 CPU64Regs:$src)>;
-// bswap pattern
-def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
+// bswap MipsPattern
+def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8206cfc15704..00ff7545c14a 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,29 +13,29 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-asm-printer"
-#include "MipsAsmPrinter.h"
#include "Mips.h"
+#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
+#include "MipsMCInstLower.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -43,19 +43,6 @@
using namespace llvm;
-void MipsAsmPrinter::EmitInstrWithMacroNoAT(const MachineInstr *MI) {
- MCInst TmpInst;
-
- MCInstLowering.Lower(MI, TmpInst);
- OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
- if (MipsFI->getEmitNOAT())
- OutStreamer.EmitRawText(StringRef("\t.set\tat"));
- OutStreamer.EmitInstruction(TmpInst);
- if (MipsFI->getEmitNOAT())
- OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
- OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
-}
-
bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
MipsFI = MF.getInfo<MipsFunctionInfo>();
AsmPrinter::runOnMachineFunction(MF);
@@ -71,84 +58,33 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- unsigned Opc = MI->getOpcode();
- MCInst TmpInst0;
- SmallVector<MCInst, 4> MCInsts;
-
- switch (Opc) {
- case Mips::ULW:
- case Mips::ULH:
- case Mips::ULHu:
- case Mips::USW:
- case Mips::USH:
- case Mips::ULW_P8:
- case Mips::ULH_P8:
- case Mips::ULHu_P8:
- case Mips::USW_P8:
- case Mips::USH_P8:
- case Mips::ULD:
- case Mips::ULW64:
- case Mips::ULH64:
- case Mips::ULHu64:
- case Mips::USD:
- case Mips::USW64:
- case Mips::USH64:
- case Mips::ULD_P8:
- case Mips::ULW64_P8:
- case Mips::ULH64_P8:
- case Mips::ULHu64_P8:
- case Mips::USD_P8:
- case Mips::USW64_P8:
- case Mips::USH64_P8: {
- if (OutStreamer.hasRawTextSupport()) {
- EmitInstrWithMacroNoAT(MI);
- return;
- }
-
- MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts);
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
- != MCInsts.end(); ++I)
- OutStreamer.EmitInstruction(*I);
-
- return;
- }
- case Mips::CPRESTORE: {
- const MachineOperand &MO = MI->getOperand(0);
- assert(MO.isImm() && "CPRESTORE's operand must be an immediate.");
- int64_t Offset = MO.getImm();
-
- if (OutStreamer.hasRawTextSupport()) {
- if (!isInt<16>(Offset)) {
- EmitInstrWithMacroNoAT(MI);
+ // Direct object specific instruction lowering
+ if (!OutStreamer.hasRawTextSupport())
+ switch (MI->getOpcode()) {
+ case Mips::DSLL:
+ case Mips::DSRL:
+ case Mips::DSRA:
+ assert(MI->getNumOperands() == 3 &&
+ "Invalid no. of machine operands for shift!");
+ assert(MI->getOperand(2).isImm());
+ int64_t Shift = MI->getOperand(2).getImm();
+ if (Shift > 31) {
+ MCInst TmpInst0;
+ MCInstLowering.LowerLargeShift(MI, TmpInst0, Shift - 32);
+ OutStreamer.EmitInstruction(TmpInst0);
return;
}
- } else {
- MCInstLowering.LowerCPRESTORE(Offset, MCInsts);
-
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
- I != MCInsts.end(); ++I)
- OutStreamer.EmitInstruction(*I);
-
- return;
+ break;
}
- break;
- }
- case Mips::SETGP01: {
- MCInstLowering.LowerSETGP01(MI, MCInsts);
-
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
- I != MCInsts.end(); ++I)
- OutStreamer.EmitInstruction(*I);
-
- return;
- }
- default:
- break;
- }
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
- MCInstLowering.Lower(MI, TmpInst0);
- OutStreamer.EmitInstruction(TmpInst0);
+ do {
+ MCInst TmpInst0;
+ MCInstLowering.Lower(I++, TmpInst0);
+ OutStreamer.EmitInstruction(TmpInst0);
+ } while ((I != E) && I->isInsideBundle());
}
//===----------------------------------------------------------------------===//
@@ -197,9 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
// size of stack area to which FP callee-saved regs are saved.
- unsigned CPURegSize = Mips::CPURegsRegisterClass->getSize();
- unsigned FGR32RegSize = Mips::FGR32RegisterClass->getSize();
- unsigned AFGR64RegSize = Mips::AFGR64RegisterClass->getSize();
+ unsigned CPURegSize = Mips::CPURegsRegClass.getSize();
+ unsigned FGR32RegSize = Mips::FGR32RegClass.getSize();
+ unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
bool HasAFGR64Reg = false;
unsigned CSFPRegsSize = 0;
unsigned i, e = CSI.size();
@@ -207,11 +143,11 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
// Set FPU Bitmask.
for (i = 0; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (Mips::CPURegsRegisterClass->contains(Reg))
+ if (Mips::CPURegsRegClass.contains(Reg))
break;
unsigned RegNum = getMipsRegisterNumbering(Reg);
- if (Mips::AFGR64RegisterClass->contains(Reg)) {
+ if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
HasAFGR64Reg = true;
@@ -283,8 +219,15 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
}
void MipsAsmPrinter::EmitFunctionEntryLabel() {
- if (OutStreamer.hasRawTextSupport())
+ if (OutStreamer.hasRawTextSupport()) {
+ if (Subtarget->inMips16Mode())
+ OutStreamer.EmitRawText(StringRef("\t.set\tmips16"));
+ else
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomips16"));
+ // leave out until FSF available gas has micromips changes
+ // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips"));
OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ }
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -295,10 +238,6 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
emitFrameDirective();
- bool EmitCPLoad = (MF->getTarget().getRelocationModel() == Reloc::PIC_) &&
- Subtarget->isABI_O32() && MipsFI->globalBaseRegSet() &&
- MipsFI->globalBaseRegFixed();
-
if (OutStreamer.hasRawTextSupport()) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -306,20 +245,9 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
OutStreamer.EmitRawText(OS.str());
OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
-
- // Emit .cpload directive if needed.
- if (EmitCPLoad)
- OutStreamer.EmitRawText(StringRef("\t.cpload\t$25"));
-
OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
if (MipsFI->getEmitNOAT())
OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
- } else if (EmitCPLoad) {
- SmallVector<MCInst, 4> MCInsts;
- MCInstLowering.LowerCPLOAD(MCInsts);
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
- I != MCInsts.end(); ++I)
- OutStreamer.EmitInstruction(*I);
}
}
@@ -382,14 +310,99 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
}
// Print out an operand for an inline asm expression.
-bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant,const char *ExtraCode,
raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
- printOperand(MI, OpNo, O);
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O);
+ case 'X': // hex const int
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << "0x" << StringRef(utohexstr(MO.getImm())).lower();
+ return false;
+ case 'x': // hex const int (low 16 bits)
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << "0x" << StringRef(utohexstr(MO.getImm() & 0xffff)).lower();
+ return false;
+ case 'd': // decimal const int
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'm': // decimal const int minus 1
+ if ((MO.getType()) != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm() - 1;
+ return false;
+ case 'z': {
+ // $0 if zero, regular printing otherwise
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ int64_t Val = MO.getImm();
+ if (Val)
+ O << Val;
+ else
+ O << "$0";
+ return false;
+ }
+ case 'D': // Second part of a double word register operand
+ case 'L': // Low order register of a double word register operand
+ case 'M': // High order register of a double word register operand
+ {
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ // Number of registers represented by this operand. We are looking
+ // for 2 for 32 bit mode and 1 for 64 bit mode.
+ if (NumVals != 2) {
+ if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ O << '$' << MipsInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ return true;
+ }
+
+ unsigned RegOp = OpNum;
+ if (!Subtarget->isGP64bit()){
+ // Endianess reverses which register holds the high or low value
+ // between M and L.
+ switch(ExtraCode[0]) {
+ case 'M':
+ RegOp = (Subtarget->isLittle()) ? OpNum + 1 : OpNum;
+ break;
+ case 'L':
+ RegOp = (Subtarget->isLittle()) ? OpNum : OpNum + 1;
+ break;
+ case 'D': // Always the second part
+ RegOp = OpNum + 1;
+ }
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << '$' << MipsInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+ }
+ }
+ }
+
+ printOperand(MI, OpNum, O);
return false;
}
@@ -398,11 +411,12 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+
return false;
}
@@ -450,7 +464,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_BlockAddress: {
- MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress());
O << BA->getName();
break;
}
@@ -511,7 +525,7 @@ printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
void MipsAsmPrinter::
printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier) {
- const MachineOperand& MO = MI->getOperand(opNum);
+ const MachineOperand &MO = MI->getOperand(opNum);
O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 4b7e1d37662c..8aadefdcd141 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -145,6 +145,58 @@ def RetCC_MipsEABI : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// Mips FastCC Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_MipsO32_FastCC : CallingConv<[
+ // f64 arguments are passed in double-precision floating pointer registers.
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>,
+
+ // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_MipsN_FastCC : CallingConv<[
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64,
+ T2_64, T3_64, T4_64, T5_64, T6_64, T7_64,
+ T8_64, V1_64]>>,
+
+ // f64 arguments are passed in double-precision floating pointer registers.
+ CCIfType<[f64], CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, D4_64, D5_64,
+ D6_64, D7_64, D8_64, D9_64, D10_64, D11_64,
+ D12_64, D13_64, D14_64, D15_64, D16_64, D17_64,
+ D18_64, D19_64]>>,
+
+ // Stack parameter slots for i64 and f64 are 64-bit doublewords and
+ // 8-byte aligned.
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_Mips_FastCC : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers. All scratch registers,
+ // except for AT, V0 and T9, are available to be used as argument registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6,
+ T7, T8, V1]>>,
+
+ // f32 arguments are passed in single-precision floating pointer registers.
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
+ F11, F12, F13, F14, F15, F16, F17, F18, F19]>>,
+
+ // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>,
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FastCC>>,
+ CCDelegateTo<CC_MipsN_FastCC>
+]>;
+
+//===----------------------------------------------------------------------===//
// Mips Calling Convention Dispatch
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 7d819026da96..cb7022b9e29e 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -145,8 +145,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I)
+ for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
+ E = MBB->instr_end(); I != E; ++I)
emitInstruction(*I);
}
} while (MCE.finishFunction(MF));
@@ -258,7 +258,7 @@ void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV,
void MipsCodeEmitter::
emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, 0, 0, false));
+ Reloc, ES, 0, 0));
}
void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index da336804e510..b12b1f2b5ad4 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -61,41 +61,54 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
Instruction MOVZInst, Instruction SLTOp,
Instruction SLTuOp, Instruction SLTiOp,
Instruction SLTiuOp> {
- def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
- def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
- def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
}
multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
Instruction MOVZInst, Instruction XOROp> {
- def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
+ def : MipsPat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+multiclass MovzPats2<RegisterClass CRC, RegisterClass DRC,
+ Instruction MOVZInst, Instruction XORiOp> {
+ def : MipsPat<
+ (select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (XORiOp CRC:$lhs, immZExt16:$uimm16), DRC:$F)>;
}
multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
Instruction XOROp> {
- def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select CRC:$cond, DRC:$T, DRC:$F),
- (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
- def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
- (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
+ def : MipsPat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select CRC:$cond, DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
+ def : MipsPat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
}
// Instantiation of instructions.
def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
-let Predicates = [HasMips64],DecoderNamespace = "Mips64" in {
+let Predicates = [HasMips64, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> {
let isCodeGenOnly = 1;
@@ -106,7 +119,8 @@ let Predicates = [HasMips64],DecoderNamespace = "Mips64" in {
}
def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
-let Predicates = [HasMips64],DecoderNamespace = "Mips64" in {
+let Predicates = [HasMips64, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> {
let isCodeGenOnly = 1;
@@ -118,21 +132,22 @@ let Predicates = [HasMips64],DecoderNamespace = "Mips64" in {
def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
- Requires<[HasMips64]> {
+ Requires<[HasMips64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
- Requires<[HasMips64]> {
+ Requires<[HasMips64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
-let Predicates = [NotFP64bit] in {
+let Predicates = [NotFP64bit, HasStandardEncoding] in {
def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
}
-let Predicates = [IsFP64bit],DecoderNamespace = "Mips64" in {
+let Predicates = [IsFP64bit, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> {
let isCodeGenOnly = 1;
@@ -145,24 +160,25 @@ let Predicates = [IsFP64bit],DecoderNamespace = "Mips64" in {
def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
- Requires<[HasMips64]> {
+ Requires<[HasMips64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
- Requires<[HasMips64]> {
+ Requires<[HasMips64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
-let Predicates = [NotFP64bit] in {
+let Predicates = [NotFP64bit, HasStandardEncoding] in {
def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
}
-let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in {
+let Predicates = [IsFP64bit, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
}
@@ -170,7 +186,8 @@ let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in {
// Instantiation of conditional move patterns.
defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
-let Predicates = [HasMips64] in {
+defm : MovzPats2<CPURegs, CPURegs, MOVZ_I_I, XORi>;
+let Predicates = [HasMips64, HasStandardEncoding] in {
defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
SLTiu64>;
@@ -179,10 +196,13 @@ let Predicates = [HasMips64] in {
defm : MovzPats1<CPURegs, CPU64Regs, MOVZ_I_I64, XOR>;
defm : MovzPats1<CPU64Regs, CPURegs, MOVZ_I64_I, XOR64>;
defm : MovzPats1<CPU64Regs, CPU64Regs, MOVZ_I64_I64, XOR64>;
+ defm : MovzPats2<CPURegs, CPU64Regs, MOVZ_I_I64, XORi>;
+ defm : MovzPats2<CPU64Regs, CPURegs, MOVZ_I64_I, XORi64>;
+ defm : MovzPats2<CPU64Regs, CPU64Regs, MOVZ_I64_I64, XORi64>;
}
defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
-let Predicates = [HasMips64] in {
+let Predicates = [HasMips64, HasStandardEncoding] in {
defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
@@ -191,19 +211,19 @@ let Predicates = [HasMips64] in {
defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
-let Predicates = [HasMips64] in {
+let Predicates = [HasMips64, HasStandardEncoding] in {
defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
SLTiu64>;
defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
}
-let Predicates = [NotFP64bit] in {
+let Predicates = [NotFP64bit, HasStandardEncoding] in {
defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
}
-let Predicates = [IsFP64bit] in {
+let Predicates = [IsFP64bit, HasStandardEncoding] in {
defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
SLTiu64>;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index debf2f1b85c1..2bba8a38024d 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -36,12 +36,21 @@ static cl::opt<bool> EnableDelaySlotFiller(
cl::desc("Fill the Mips delay slots useful instructions."),
cl::Hidden);
+// This option can be used to silence complaints by machine verifier passes.
+static cl::opt<bool> SkipDelaySlotFiller(
+ "skip-mips-delay-filler",
+ cl::init(false),
+ cl::desc("Skip MIPS' delay slot filling pass."),
+ cl::Hidden);
+
namespace {
struct Filler : public MachineFunctionPass {
+ typedef MachineBasicBlock::instr_iterator InstrIter;
+ typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter;
TargetMachine &TM;
const TargetInstrInfo *TII;
- MachineBasicBlock::iterator LastFiller;
+ InstrIter LastFiller;
static char ID;
Filler(TargetMachine &tm)
@@ -53,6 +62,9 @@ namespace {
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) {
+ if (SkipDelaySlotFiller)
+ return false;
+
bool Changed = false;
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
@@ -61,27 +73,27 @@ namespace {
}
bool isDelayFiller(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator candidate);
+ InstrIter candidate);
- void insertCallUses(MachineBasicBlock::iterator MI,
- SmallSet<unsigned, 32>& RegDefs,
- SmallSet<unsigned, 32>& RegUses);
+ void insertCallUses(InstrIter MI,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
- void insertDefsUses(MachineBasicBlock::iterator MI,
- SmallSet<unsigned, 32>& RegDefs,
- SmallSet<unsigned, 32>& RegUses);
+ void insertDefsUses(InstrIter MI,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
- bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ bool IsRegInSet(SmallSet<unsigned, 32> &RegSet,
unsigned Reg);
- bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool delayHasHazard(InstrIter candidate,
bool &sawLoad, bool &sawStore,
SmallSet<unsigned, 32> &RegDefs,
SmallSet<unsigned, 32> &RegUses);
bool
- findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot,
- MachineBasicBlock::iterator &Filler);
+ findDelayInstr(MachineBasicBlock &MBB, InstrIter slot,
+ InstrIter &Filler);
};
@@ -93,14 +105,14 @@ namespace {
bool Filler::
runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- LastFiller = MBB.end();
+ LastFiller = MBB.instr_end();
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I)
if (I->hasDelaySlot()) {
++FilledSlots;
Changed = true;
- MachineBasicBlock::iterator D;
+ InstrIter D;
if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
MBB.splice(llvm::next(I), &MBB, D);
@@ -111,6 +123,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
// Record the filler instruction that filled the delay slot.
// The instruction after it will be visited in the next iteration.
LastFiller = ++I;
+
+ // Set InsideBundle bit so that the machine verifier doesn't expect this
+ // instruction to be a terminator.
+ LastFiller->setIsInsideBundle();
}
return Changed;
@@ -123,8 +139,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
}
bool Filler::findDelayInstr(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator slot,
- MachineBasicBlock::iterator &Filler) {
+ InstrIter slot,
+ InstrIter &Filler) {
SmallSet<unsigned, 32> RegDefs;
SmallSet<unsigned, 32> RegUses;
@@ -133,13 +149,13 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
bool sawLoad = false;
bool sawStore = false;
- for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) {
+ for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) {
// skip debug value
if (I->isDebugValue())
continue;
// Convert to forward iterator.
- MachineBasicBlock::iterator FI(llvm::next(I).base());
+ InstrIter FI(llvm::next(I).base());
if (I->hasUnmodeledSideEffects()
|| I->isInlineAsm()
@@ -165,7 +181,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
return false;
}
-bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+bool Filler::delayHasHazard(InstrIter candidate,
bool &sawLoad, bool &sawStore,
SmallSet<unsigned, 32> &RegDefs,
SmallSet<unsigned, 32> &RegUses) {
@@ -213,9 +229,9 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
}
// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
-void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
- SmallSet<unsigned, 32>& RegDefs,
- SmallSet<unsigned, 32>& RegUses) {
+void Filler::insertDefsUses(InstrIter MI,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses) {
// If MI is a call or return, just examine the explicit non-variadic operands.
MCInstrDesc MCID = MI->getDesc();
unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
@@ -240,14 +256,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
}
//returns true if the Reg or its alias is in the RegSet.
-bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
- if (RegSet.count(Reg))
- return true;
- // check Aliased Registers
- for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
- *Alias; ++Alias)
- if (RegSet.count(*Alias))
+bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) {
+ // Check Reg and all aliased Registers.
+ for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
+ AI.isValid(); ++AI)
+ if (RegSet.count(*AI))
return true;
-
return false;
}
diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp
deleted file mode 100644
index 119d1a824688..000000000000
--- a/lib/Target/Mips/MipsEmitGPRestore.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- MipsEmitGPRestore.cpp - Emit GP Restore Instruction ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass emits instructions that restore $gp right
-// after jalr instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "emit-gp-restore"
-
-#include "Mips.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-namespace {
- struct Inserter : public MachineFunctionPass {
-
- TargetMachine &TM;
- const TargetInstrInfo *TII;
-
- static char ID;
- Inserter(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
-
- virtual const char *getPassName() const {
- return "Mips Emit GP Restore";
- }
-
- bool runOnMachineFunction(MachineFunction &F);
- };
- char Inserter::ID = 0;
-} // end of anonymous namespace
-
-bool Inserter::runOnMachineFunction(MachineFunction &F) {
- MipsFunctionInfo *MipsFI = F.getInfo<MipsFunctionInfo>();
-
- if ((TM.getRelocationModel() != Reloc::PIC_) ||
- (!MipsFI->globalBaseRegFixed()))
- return false;
-
- bool Changed = false;
- int FI = MipsFI->getGPFI();
-
- for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
- MFI != MFE; ++MFI) {
- MachineBasicBlock& MBB = *MFI;
- MachineBasicBlock::iterator I = MFI->begin();
-
- // If MBB is a landing pad, insert instruction that restores $gp after
- // EH_LABEL.
- if (MBB.isLandingPad()) {
- // Find EH_LABEL first.
- for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ;
-
- // Insert lw.
- ++I;
- DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
- .addImm(0);
- Changed = true;
- }
-
- while (I != MFI->end()) {
- if (I->getOpcode() != Mips::JALR) {
- ++I;
- continue;
- }
-
- DebugLoc dl = I->getDebugLoc();
- // emit lw $gp, ($gp save slot on stack) after jalr
- BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
- .addImm(0);
- Changed = true;
- }
- }
-
- return Changed;
-}
-
-/// createMipsEmitGPRestorePass - Returns a pass that emits instructions that
-/// restores $gp clobbered by jalr instructions.
-FunctionPass *llvm::createMipsEmitGPRestorePass(MipsTargetMachine &tm) {
- return new Inserter(tm);
-}
-
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
deleted file mode 100644
index baeae97a4f52..000000000000
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-//===-- MipsExpandPseudo.cpp - Expand Pseudo Instructions ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass expands pseudo instructions into target instructions after register
-// allocation but before post-RA scheduling.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mips-expand-pseudo"
-
-#include "Mips.h"
-#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-namespace {
- struct MipsExpandPseudo : public MachineFunctionPass {
-
- TargetMachine &TM;
- const TargetInstrInfo *TII;
-
- static char ID;
- MipsExpandPseudo(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
-
- virtual const char *getPassName() const {
- return "Mips PseudoInstrs Expansion";
- }
-
- bool runOnMachineFunction(MachineFunction &F);
- bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
-
- private:
- void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);
- void ExpandExtractElementF64(MachineBasicBlock&,
- MachineBasicBlock::iterator);
- };
- char MipsExpandPseudo::ID = 0;
-} // end of anonymous namespace
-
-bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) {
- bool Changed = false;
-
- for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)
- Changed |= runOnMachineBasicBlock(*I);
-
- return Changed;
-}
-
-bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
-
- bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
- const MCInstrDesc& MCid = I->getDesc();
-
- switch(MCid.getOpcode()) {
- default:
- ++I;
- continue;
- case Mips::SETGP2:
- // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9"
- BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu),
- I->getOperand(0).getReg())
- .addReg(Mips::V0).addReg(I->getOperand(1).getReg());
- break;
- case Mips::BuildPairF64:
- ExpandBuildPairF64(MBB, I);
- break;
- case Mips::ExtractElementF64:
- ExpandExtractElementF64(MBB, I);
- break;
- }
-
- // delete original instr
- MBB.erase(I++);
- Changed = true;
- }
-
- return Changed;
-}
-
-void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
- MachineBasicBlock::iterator I) {
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
- const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
- DebugLoc dl = I->getDebugLoc();
- const uint16_t* SubReg =
- TM.getRegisterInfo()->getSubRegisters(DstReg);
-
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);
- BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);
-}
-
-void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
- MachineBasicBlock::iterator I) {
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = I->getOperand(1).getReg();
- unsigned N = I->getOperand(2).getImm();
- const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
- DebugLoc dl = I->getDebugLoc();
- const uint16_t* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
-
- BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
-}
-
-/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
-/// instrs into real instrs
-FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
- return new MipsExpandPseudo(tm);
-}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index f8ea3d0321d2..8c0474b0eec7 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,6 +82,14 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
+const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16FrameLowering(ST);
+
+ return llvm::createMipsSEFrameLowering(ST);
+}
+
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
@@ -89,238 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
-
-bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
- return true;
-}
-
-// Build an instruction sequence to load an immediate that is too large to fit
-// in 16-bit and add the result to Reg.
-static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64,
- const MipsInstrInfo &TII, MachineBasicBlock& MBB,
- MachineBasicBlock::iterator II, DebugLoc DL) {
- unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
- unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu;
- unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
- MipsAnalyzeImmediate AnalyzeImm;
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */);
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
-
- // The first instruction can be a LUi, which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == LUi)
- BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
- else
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- // Build the remaining instructions in Seq.
- for (++Inst; Inst != Seq.end(); ++Inst)
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg);
-}
-
-void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- const MipsRegisterInfo *RegInfo =
- static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
- // First, compute final stack size.
- unsigned RegSize = STI.isGP32bit() ? 4 : 8;
- unsigned StackAlign = getStackAlignment();
- unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ?
- (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) :
- MipsFI->getMaxCallFrameSize();
- uint64_t StackSize = RoundUpToAlignment(LocalVarAreaOffset, StackAlign) +
- RoundUpToAlignment(MFI->getStackSize(), StackAlign);
-
- // Update stack size
- MFI->setStackSize(StackSize);
-
- // Emit instructions that set the global base register if the target ABI is
- // O32.
- if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32() &&
- !MipsFI->globalBaseRegFixed()) {
- // See MipsInstrInfo.td for explanation.
- MachineBasicBlock *NewEntry = MF.CreateMachineBasicBlock();
- MF.insert(&MBB, NewEntry);
- NewEntry->addSuccessor(&MBB);
-
- // Copy live in registers.
- for (MachineBasicBlock::livein_iterator R = MBB.livein_begin();
- R != MBB.livein_end(); ++R)
- NewEntry->addLiveIn(*R);
-
- BuildMI(*NewEntry, NewEntry->begin(), dl, TII.get(Mips:: SETGP01),
- Mips::V0);
- }
-
- // No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->adjustsStack()) return;
-
- MachineModuleInfo &MMI = MF.getMMI();
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- MachineLocation DstML, SrcML;
-
- // Adjust stack.
- if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize)
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
- else { // Expand immediate that doesn't fit in 16-bit.
- MipsFI->setEmitNOAT();
- expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
- }
-
- // emit ".cfi_def_cfa_offset StackSize"
- MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
- DstML = MachineLocation(MachineLocation::VirtualFP);
- SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
- Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
-
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
- if (CSI.size()) {
- // Find the instruction past the last instruction that saves a callee-saved
- // register to the stack.
- for (unsigned i = 0; i < CSI.size(); ++i)
- ++MBBI;
-
- // Iterate over list of callee-saved registers and emit .cfi_offset
- // directives.
- MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
-
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
-
- // If Reg is a double precision register, emit two cfa_offsets,
- // one for each of the paired single precision registers.
- if (Mips::AFGR64RegisterClass->contains(Reg)) {
- const uint16_t *SubRegs = RegInfo->getSubRegisters(Reg);
- MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
- MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
- MachineLocation SrcML0(*SubRegs);
- MachineLocation SrcML1(*(SubRegs + 1));
-
- if (!STI.isLittle())
- std::swap(SrcML0, SrcML1);
-
- Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
- Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
- }
- else {
- // Reg is either in CPURegs or FGR32.
- DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
- SrcML = MachineLocation(Reg);
- Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
- }
- }
- }
-
- // if framepointer enabled, set it to point to the stack pointer.
- if (hasFP(MF)) {
- // Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
-
- // emit ".cfi_def_cfa_register $fp"
- MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
- DstML = MachineLocation(FP);
- SrcML = MachineLocation(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
- }
-
- // Restore GP from the saved stack location
- if (MipsFI->needGPSaveRestore()) {
- unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI());
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset)
- .addReg(Mips::GP);
- }
-}
-
-void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
- DebugLoc dl = MBBI->getDebugLoc();
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
- // if framepointer enabled, restore the stack pointer.
- if (hasFP(MF)) {
- // Find the first instruction that restores a callee-saved register.
- MachineBasicBlock::iterator I = MBBI;
-
- for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
- --I;
-
- // Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
- }
-
- // Get the number of bytes from FrameInfo
- uint64_t StackSize = MFI->getStackSize();
-
- if (!StackSize)
- return;
-
- // Adjust stack.
- if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize)
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
- else // Expand immediate that doesn't fit in 16-bit.
- expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
-}
-
-void MipsFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineRegisterInfo& MRI = MF.getRegInfo();
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-
- // FIXME: remove this code if register allocator can correctly mark
- // $fp and $ra used or unused.
-
- // Mark $fp and $ra as used or unused.
- if (hasFP(MF))
- MRI.setPhysRegUsed(FP);
-
- // The register allocator might determine $ra is used after seeing
- // instruction "jr $ra", but we do not want PrologEpilogInserter to insert
- // instructions to save/restore $ra unless there is a function call.
- // To correct this, $ra is explicitly marked unused if there is no
- // function call.
- if (MF.getFrameInfo()->hasCalls())
- MRI.setPhysRegUsed(Mips::RA);
- else {
- MRI.setPhysRegUnused(Mips::RA);
- MRI.setPhysRegUnused(Mips::RA_64);
- }
-}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index bd1d89f04bc1..ed7b7fe76c2b 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -27,23 +27,19 @@ protected:
public:
explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
- STI(sti) {
- }
+ : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
+ sti.hasMips64() ? 16 : 8), STI(sti) {}
- bool targetHandlesStackFrameRounding() const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ static const MipsFrameLowering *create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST);
bool hasFP(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
};
+/// Create MipsInstrInfo objects.
+const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
+const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
+
} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f0651c61311b..5a97c17ec851 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -125,20 +125,19 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- bool FixGlobalBaseReg = MipsFI->globalBaseRegFixed();
-
- if (Subtarget.isABI_O32() && FixGlobalBaseReg)
- // $gp is the global base register.
- V0 = V1 = GlobalBaseReg;
- else {
- const TargetRegisterClass *RC;
- RC = Subtarget.isABI_N64() ?
- Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
- }
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else if (Subtarget.inMips16Mode())
+ RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
if (Subtarget.isABI_N64()) {
MF.getRegInfo().addLiveIn(Mips::T9_64);
@@ -150,10 +149,25 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
const GlobalValue *FName = MF.getFunction();
BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- } else if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ return;
+ }
+
+ if (Subtarget.inMips16Mode()) {
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
// Set global register to __gnu_local_gp.
//
// lui $v0, %hi(__gnu_local_gp)
@@ -162,27 +176,48 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
.addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
.addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- } else {
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (Subtarget.isABI_N32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- } else if (!MipsFI->globalBaseRegFixed()) {
- assert(Subtarget.isABI_O32());
-
- BuildMI(MBB, I, DL, TII.get(Mips::SETGP2), GlobalBaseReg)
- .addReg(Mips::T9);
- }
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
}
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
}
bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -207,12 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
// Replace uses with ZeroReg.
for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E; ++U) {
+ E = MRI->use_end(); U != E;) {
MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
MachineInstr *MI = MO.getParent();
+ ++U;
// Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
continue;
MO.setReg(ZeroReg);
@@ -253,21 +290,6 @@ bool MipsDAGToDAGISel::
SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
EVT ValTy = Addr.getValueType();
- // If Parent is an unaligned f32 load or store, select a (base + index)
- // floating point load/store instruction (luxc1 or suxc1).
- const LSBaseSDNode* LS = 0;
-
- if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
- EVT VT = LS->getMemoryVT();
-
- if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
- assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
- "Unaligned loads/stores not supported for this type.");
- if (VT == MVT::f32)
- return false;
- }
- }
-
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
@@ -316,17 +338,20 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
// lui $2, %hi($CPI1_0)
// lwc1 $f0, %lo($CPI1_0)($2)
if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
- SDValue LoVal = Addr.getOperand(1);
- if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
- isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
+ SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
Base = Addr.getOperand(0);
- Offset = LoVal.getOperand(0);
+ Offset = Opnd0;
return true;
}
}
// If an indexed floating point load/store can be emitted, return false.
- if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
Subtarget.hasMips32r2Or64())
return false;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index ace47ab07910..c5207c67376d 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -81,6 +81,14 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
case MipsISD::Ins: return "MipsISD::Ins";
+ case MipsISD::LWL: return "MipsISD::LWL";
+ case MipsISD::LWR: return "MipsISD::LWR";
+ case MipsISD::SWL: return "MipsISD::SWL";
+ case MipsISD::SWR: return "MipsISD::SWR";
+ case MipsISD::LDL: return "MipsISD::LDL";
+ case MipsISD::LDR: return "MipsISD::LDR";
+ case MipsISD::SDL: return "MipsISD::SDL";
+ case MipsISD::SDR: return "MipsISD::SDR";
default: return NULL;
}
}
@@ -98,20 +106,25 @@ MipsTargetLowering(MipsTargetMachine &TM)
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// Set up the register classes
- addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
if (HasMips64)
- addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass);
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->inMips16Mode()) {
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+ addRegisterClass(MVT::i32, &Mips::CPURARegRegClass);
+ }
if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
// When dealing with single precision only, use libcalls
if (!Subtarget->isSingleFloat()) {
if (HasMips64)
- addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
else
- addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
}
}
@@ -139,15 +152,18 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
if (!TM.Options.NoNaNsFPMath) {
setOperationAction(ISD::FABS, MVT::f32, Custom);
@@ -161,7 +177,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+ }
+
+ if (!HasMips64) {
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
setOperationAction(ISD::SDIV, MVT::i32, Expand);
@@ -192,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
if (!Subtarget->hasMips32r2())
setOperationAction(ISD::ROTR, MVT::i32, Expand);
@@ -199,9 +224,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
if (!Subtarget->hasMips64r2())
setOperationAction(ISD::ROTR, MVT::i64, Expand);
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
@@ -243,9 +265,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setInsertFencesForAtomic(true);
- if (Subtarget->isSingleFloat())
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
-
if (!Subtarget->hasSEInReg()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -261,6 +280,13 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
}
+ if (HasMips64) {
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
+ setTruncStoreAction(MVT::i64, MVT::i32, Custom);
+ }
+
setTargetDAGCombine(ISD::ADDE);
setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::SDIVREM);
@@ -268,6 +294,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::ADD);
setMinFunctionAlignment(HasMips64 ? 3 : 2);
@@ -276,6 +303,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
+
+ maxStoresPerMemcpy = 16;
}
bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
@@ -284,10 +313,7 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
switch (SVT) {
case MVT::i64:
case MVT::i32:
- case MVT::i16:
return true;
- case MVT::f32:
- return Subtarget->hasMips32r2Or64();
default:
return false;
}
@@ -305,17 +331,17 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
// Return true if pattern matching was successful.
-static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
// ADDENode's second operand must be a flag output of an ADDC node in order
// for the matching to be successful.
- SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
if (ADDCNode->getOpcode() != ISD::ADDC)
return false;
SDValue MultHi = ADDENode->getOperand(0);
SDValue MultLo = ADDCNode->getOperand(0);
- SDNode* MultNode = MultHi.getNode();
+ SDNode *MultNode = MultHi.getNode();
unsigned MultOpc = MultHi.getOpcode();
// MultHi and MultLo must be generated by the same node,
@@ -378,17 +404,17 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
// Return true if pattern matching was successful.
-static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
// SUBENode's second operand must be a flag output of an SUBC node in order
// for the matching to be successful.
- SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
if (SUBCNode->getOpcode() != ISD::SUBC)
return false;
SDValue MultHi = SUBENode->getOperand(1);
SDValue MultLo = SUBCNode->getOperand(1);
- SDNode* MultNode = MultHi.getNode();
+ SDNode *MultNode = MultHi.getNode();
unsigned MultOpc = MultHi.getOpcode();
// MultHi and MultLo must be generated by the same node,
@@ -443,9 +469,9 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
return true;
}
-static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
@@ -456,9 +482,9 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
}
-static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
@@ -469,9 +495,9 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
}
-static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -546,7 +572,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
// Creates and returns an FPCmp node from a setcc node.
// Returns Op if setcc is not a floating point comparison.
-static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
// must be a SETCC node
if (Op.getOpcode() != ISD::SETCC)
return Op;
@@ -568,7 +594,7 @@ static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
}
// Creates and returns a CMovFPT/F node.
-static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
SDValue False, DebugLoc DL) {
bool invert = InvertFPCondCode((Mips::CondCode)
cast<ConstantSDNode>(Cond.getOperand(2))
@@ -578,9 +604,9 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -604,16 +630,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
const DebugLoc DL = N->getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
SDValue True = N->getOperand(1);
-
+
SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
-
+
return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
-static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
// Pattern match EXT.
// $dst = and ((sra or srl) $src , pos), (2**size - 1)
// => ext $dst, $src, size, pos
@@ -651,9 +677,9 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
DAG.getConstant(SMSize, MVT::i32));
}
-static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
// Pattern match INS.
// $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
// where mask1 = (2**size - 1) << pos, mask0 = ~mask1
@@ -705,6 +731,33 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
}
+static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Add = N->getOperand(1);
+
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue Lo = Add.getOperand(1);
+
+ if ((Lo.getOpcode() != MipsISD::Lo) ||
+ (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable))
+ return SDValue();
+
+ EVT ValTy = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0),
+ Add.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo);
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -720,11 +773,13 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case ISD::UDIVREM:
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
- return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return PerformANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
return PerformORCombine(N, DAG, DCI, Subtarget);
+ case ISD::ADD:
+ return PerformADDCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -737,19 +792,25 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
}
return SDValue();
}
@@ -784,7 +845,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
/*
static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
DebugLoc dl,
- const MipsSubtarget* Subtarget,
+ const MipsSubtarget *Subtarget,
const TargetInstrInfo *TII,
bool isFPCmp, unsigned Opc) {
// There is no need to expand CMov instructions if target has
@@ -1440,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
SDValue MipsTargetLowering::
-LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
-{
- MachineFunction &MF = DAG.getMachineFunction();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP;
-
- assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
- cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
- "Cannot lower if the alignment of the allocated space is larger than \
- that of the stack.");
-
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- // Get a reference from Mips stack pointer
- SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy());
-
- // Subtract the dynamic size from the actual stack size to
- // obtain the new stack size.
- SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size);
-
- // The Sub result contains the new stack start address, so it
- // must be placed in the stack pointer register.
- Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue());
-
- // This node always has two return values: a new stack pointer
- // value and a chain
- SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other);
- SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
- SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
-
- return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
-}
-
-SDValue MipsTargetLowering::
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
// The first operand is the chain, the second is the condition, the third is
@@ -1512,6 +1537,19 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const
Op.getDebugLoc());
}
+SDValue MipsTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getOperand(0).getValueType();
+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, getSetCCResultType(Ty),
+ Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(4));
+
+ return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
+ Op.getOperand(3));
+}
+
SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = CreateFPCmp(DAG, Op);
@@ -1614,10 +1652,13 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- // General Dynamic TLS Model
- bool LocalDynamic = GV->hasInternalLinkage();
- unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD;
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+ // General Dynamic and Local Dynamic TLS Model.
+ unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
+ : MipsII::MO_TLSGD;
+
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT,
GetGlobalReg(DAG, PtrVT), TGA);
@@ -1632,16 +1673,16 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
Entry.Ty = PtrTy;
Args.push_back(Entry);
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(DAG.getEntryNode(), PtrTy,
+ TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false, /*doesNotRet=*/false,
/*isReturnValueUsed=*/true,
TlsGetAddr, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
- if (!LocalDynamic)
+ if (model != TLSModel::LocalDynamic)
return Ret;
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -1655,7 +1696,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
}
SDValue Offset;
- if (GV->isDeclaration()) {
+ if (model == TLSModel::InitialExec) {
// Initial Exec TLS Model
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_GOTTPREL);
@@ -1666,6 +1707,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
false, false, false, 0);
} else {
// Local Exec TLS Model
+ assert(model == TLSModel::LocalExec);
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_HI);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -1942,9 +1984,26 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ // check the depth
+ assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
+ "Return address can be determined only for current frame.");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ EVT VT = Op.getValueType();
+ unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA;
+ MFI->setReturnAddressIsTaken(true);
+
+ // Return RA, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT);
+}
+
// TODO: set SType according to the desired memory barrier behavior.
SDValue
-MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const {
+MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
unsigned SType = 0;
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
@@ -1952,7 +2011,7 @@ MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const {
}
SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
// FIXME: Need pseudo-fence for 'singlethread' fences
// FIXME: Set SType for weaker fences where supported/appropriate.
unsigned SType = 0;
@@ -1961,6 +2020,210 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
DAG.getConstant(SType, MVT::i32));
}
+SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+
+ // if shamt < 32:
+ // lo = (shl lo, shamt)
+ // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt))
+ // else:
+ // lo = 0
+ // hi = (shl lo, shamt[4:0])
+ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
+ DAG.getConstant(-1, MVT::i32));
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo,
+ DAG.getConstant(1, MVT::i32));
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, ShiftRight1Lo,
+ Not);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt);
+ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
+ DAG.getConstant(0x20, MVT::i32));
+ Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
+ DAG.getConstant(0, MVT::i32), ShiftLeftLo);
+ Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
+
+ SDValue Ops[2] = {Lo, Hi};
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+ bool IsSRA) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+
+ // if shamt < 32:
+ // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt))
+ // if isSRA:
+ // hi = (sra hi, shamt)
+ // else:
+ // hi = (srl hi, shamt)
+ // else:
+ // if isSRA:
+ // lo = (sra hi, shamt[4:0])
+ // hi = (sra hi, 31)
+ // else:
+ // lo = (srl hi, shamt[4:0])
+ // hi = 0
+ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
+ DAG.getConstant(-1, MVT::i32));
+ SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi,
+ DAG.getConstant(1, MVT::i32));
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, ShiftLeft1Hi, Not);
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32,
+ Hi, Shamt);
+ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
+ DAG.getConstant(0x20, MVT::i32));
+ SDValue Shift31 = DAG.getNode(ISD::SRA, DL, MVT::i32, Hi,
+ DAG.getConstant(31, MVT::i32));
+ Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftRightHi, Or);
+ Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
+ IsSRA ? Shift31 : DAG.getConstant(0, MVT::i32),
+ ShiftRightHi);
+
+ SDValue Ops[2] = {Lo, Hi};
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
+ SDValue Chain, SDValue Src, unsigned Offset) {
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
+ EVT BasePtrVT = Ptr.getValueType();
+ DebugLoc DL = LD->getDebugLoc();
+ SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+
+ if (Offset)
+ Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
+ DAG.getConstant(Offset, BasePtrVT));
+
+ SDValue Ops[] = { Chain, Ptr, Src };
+ return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
+ LD->getMemOperand());
+}
+
+// Expand an unaligned 32 or 64-bit integer load node.
+SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT MemVT = LD->getMemoryVT();
+
+ // Return if load is aligned or if MemVT is neither i32 nor i64.
+ if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
+ ((MemVT != MVT::i32) && (MemVT != MVT::i64)))
+ return SDValue();
+
+ bool IsLittle = Subtarget->isLittle();
+ EVT VT = Op.getValueType();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Chain = LD->getChain(), Undef = DAG.getUNDEF(VT);
+
+ assert((VT == MVT::i32) || (VT == MVT::i64));
+
+ // Expand
+ // (set dst, (i64 (load baseptr)))
+ // to
+ // (set tmp, (ldl (add baseptr, 7), undef))
+ // (set dst, (ldr baseptr, tmp))
+ if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) {
+ SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
+ IsLittle ? 7 : 0);
+ return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
+ IsLittle ? 0 : 7);
+ }
+
+ SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
+ IsLittle ? 3 : 0);
+ SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
+ IsLittle ? 0 : 3);
+
+ // Expand
+ // (set dst, (i32 (load baseptr))) or
+ // (set dst, (i64 (sextload baseptr))) or
+ // (set dst, (i64 (extload baseptr)))
+ // to
+ // (set tmp, (lwl (add baseptr, 3), undef))
+ // (set dst, (lwr baseptr, tmp))
+ if ((VT == MVT::i32) || (ExtType == ISD::SEXTLOAD) ||
+ (ExtType == ISD::EXTLOAD))
+ return LWR;
+
+ assert((VT == MVT::i64) && (ExtType == ISD::ZEXTLOAD));
+
+ // Expand
+ // (set dst, (i64 (zextload baseptr)))
+ // to
+ // (set tmp0, (lwl (add baseptr, 3), undef))
+ // (set tmp1, (lwr baseptr, tmp0))
+ // (set tmp2, (shl tmp1, 32))
+ // (set dst, (srl tmp2, 32))
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue Const32 = DAG.getConstant(32, MVT::i32);
+ SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32);
+ SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32);
+ SDValue Ops[] = { SRL, LWR.getValue(1) };
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
+ SDValue Chain, unsigned Offset) {
+ SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
+ EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
+ DebugLoc DL = SD->getDebugLoc();
+ SDVTList VTList = DAG.getVTList(MVT::Other);
+
+ if (Offset)
+ Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
+ DAG.getConstant(Offset, BasePtrVT));
+
+ SDValue Ops[] = { Chain, Value, Ptr };
+ return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
+ SD->getMemOperand());
+}
+
+// Expand an unaligned 32 or 64-bit integer store node.
+SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *SD = cast<StoreSDNode>(Op);
+ EVT MemVT = SD->getMemoryVT();
+
+ // Return if store is aligned or if MemVT is neither i32 nor i64.
+ if ((SD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
+ ((MemVT != MVT::i32) && (MemVT != MVT::i64)))
+ return SDValue();
+
+ bool IsLittle = Subtarget->isLittle();
+ SDValue Value = SD->getValue(), Chain = SD->getChain();
+ EVT VT = Value.getValueType();
+
+ // Expand
+ // (store val, baseptr) or
+ // (truncstore val, baseptr)
+ // to
+ // (swl val, (add baseptr, 3))
+ // (swr val, baseptr)
+ if ((VT == MVT::i32) || SD->isTruncatingStore()) {
+ SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain,
+ IsLittle ? 3 : 0);
+ return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
+ }
+
+ assert(VT == MVT::i64);
+
+ // Expand
+ // (store val, baseptr)
+ // to
+ // (sdl val, (add baseptr, 7))
+ // (sdr val, baseptr)
+ SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
+ return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -2153,11 +2416,11 @@ static unsigned getNextIntArgReg(unsigned Reg) {
// Write ByVal Arg to arg registers and stack.
static void
-WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
- SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
- SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+WriteByValArg(SDValue Chain, DebugLoc dl,
+ SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
MVT PtrType, bool isLittle) {
unsigned LocMemOffset = VA.getLocMemOffset();
unsigned Offset = 0;
@@ -2229,26 +2492,26 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
return;
}
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remaining part of byval arg to it using memcpy.
+ // Copy remaining part of byval arg using memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
- LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(RemainingSize, MVT::i32),
- std::min(ByValAlign, (unsigned)4),
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr,
+ DAG.getIntPtrConstant(LocMemOffset));
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(RemainingSize, MVT::i32),
+ std::min(ByValAlign, (unsigned)4),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
}
// Copy Mips64 byVal arg to registers and stack.
void static
-PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
- SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
- SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+PassByValArg64(SDValue Chain, DebugLoc dl,
+ SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
EVT PtrTy, bool isLittle) {
unsigned ByValSize = Flags.getByValSize();
unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8);
@@ -2318,30 +2581,35 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
assert(MemCpySize && "MemCpySize must not be zero.");
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remainder of byval arg to it with memcpy.
+ // Copy remainder of byval arg to it with memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
DAG.getConstant(Offset, PtrTy));
- LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(MemCpySize, PtrTy), Alignment,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr,
+ DAG.getIntPtrConstant(LocMemOffset));
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(MemCpySize, PtrTy), Alignment,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
}
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
/// TODO: isTailCall.
SDValue
-MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
// MIPs target does not yet support tail call optimization.
isTailCall = false;
@@ -2356,7 +2624,9 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- if (IsO32)
+ if (CallConv == CallingConv::Fast)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC);
+ else if (IsO32)
CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
else if (HasMips64)
AnalyzeMips64CallOperands(CCInfo, Outs);
@@ -2365,54 +2635,32 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
-
- // Chain is the output chain of the last Load/Store or CopyToReg node.
- // ByValChain is the output chain of the last Memcpy node created for copying
- // byval arguments to the stack.
- SDValue Chain, CallSeqStart, ByValChain;
- SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
- Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
- ByValChain = InChain;
-
- // If this is the first call, create a stack frame object that points to
- // a location to which .cprestore saves $gp.
- if (IsO32 && IsPIC && MipsFI->globalBaseRegFixed() && !MipsFI->getGPFI())
- MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
-
- // Get the frame index of the stack frame object that points to the location
- // of dynamically allocated area on the stack.
- int DynAllocFI = MipsFI->getDynAllocFI();
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
// Update size of the maximum argument space.
// For O32, a minimum of four words (16 bytes) of argument space is
// allocated.
- if (IsO32)
+ if (IsO32 && (CallConv != CallingConv::Fast))
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
- unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
-
- if (MaxCallFrameSize < NextStackOffset) {
- MipsFI->setMaxCallFrameSize(NextStackOffset);
-
- // Set the offsets relative to $sp of the $gp restore slot and dynamically
- // allocated stack space. These offsets must be aligned to a boundary
- // determined by the stack alignment of the ABI.
- unsigned StackAlignment = TFL->getStackAlignment();
- NextStackOffset = (NextStackOffset + StackAlignment - 1) /
- StackAlignment * StackAlignment;
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
- if (MipsFI->needGPSaveRestore())
- MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+ IsN64 ? Mips::SP_64 : Mips::SP,
+ getPointerTy());
- MFI->setObjectOffset(DynAllocFI, NextStackOffset);
- }
+ if (MipsFI->getMaxCallFrameSize() < NextStackOffset)
+ MipsFI->setMaxCallFrameSize(NextStackOffset);
// With EABI is it possible to have 16 args on registers.
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
-
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
SDValue Arg = OutVals[i];
@@ -2425,11 +2673,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
if (IsO32)
- WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr,
MFI, DAG, Arg, VA, Flags, getPointerTy(),
Subtarget->isLittle());
else
- PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr,
MFI, DAG, Arg, VA, Flags, getPointerTy(),
Subtarget->isLittle());
continue;
@@ -2479,29 +2727,14 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// Register can't get to this point...
assert(VA.isMemLoc());
- // Create the frame index object for this incoming parameter
- LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), true);
- SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
-
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0));
}
- // Extend range of indices of frame objects for outgoing arguments that were
- // created during this function call. Skip this step if no such objects were
- // created.
- if (LastFI)
- MipsFI->extendOutArgFIRange(FirstFI, LastFI);
-
- // If a memcpy has been created to copy a byval arg to a stack, replace the
- // chain input of CallSeqStart with ByValChain.
- if (InChain != ByValChain)
- DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
- NextStackOffsetVal);
-
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -2565,6 +2798,9 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
}
}
+ // T9 register operand.
+ SDValue T9;
+
// T9 should contain the address of the callee function if
// -reloction-model=pic or it is an indirect call.
if (IsPICCall || !GlobalOrExternal) {
@@ -2572,7 +2808,19 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
InFlag = Chain.getValue(1);
- Callee = DAG.getRegister(T9Reg, getPointerTy());
+
+ if (Subtarget->inMips16Mode())
+ T9 = DAG.getRegister(T9Reg, getPointerTy());
+ else
+ Callee = DAG.getRegister(T9Reg, getPointerTy());
+ }
+
+ // Insert node "GP copy globalreg" before call to function.
+ // Lazy-binding stubs require GP to point to the GOT.
+ if (IsPICCall) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty)));
}
// Build a sequence of copy-to-reg nodes chained together with token
@@ -2600,6 +2848,10 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add T9 register operand.
+ if (T9.getNode())
+ Ops.push_back(T9);
+
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
@@ -2613,8 +2865,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NextStackOffset, true),
+ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
@@ -2635,7 +2886,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
@@ -2654,9 +2905,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Formal Arguments Calling Convention Implementation
//===----------------------------------------------------------------------===//
static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
- std::vector<SDValue>& OutChains,
+ std::vector<SDValue> &OutChains,
SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
const Argument *FuncArg) {
unsigned LocMem = VA.getLocMemOffset();
unsigned FirstWord = LocMem / 4;
@@ -2668,7 +2919,7 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
break;
unsigned SrcReg = O32IntRegs[CurWord];
- unsigned Reg = AddLiveIn(MF, SrcReg, Mips::CPURegsRegisterClass);
+ unsigned Reg = AddLiveIn(MF, SrcReg, &Mips::CPURegsRegClass);
SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN,
DAG.getConstant(i * 4, MVT::i32));
SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32),
@@ -2681,8 +2932,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
// Create frame object on stack and copy registers used for byval passing to it.
static unsigned
CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
- std::vector<SDValue>& OutChains, SelectionDAG &DAG,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ std::vector<SDValue> &OutChains, SelectionDAG &DAG,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
MachineFrameInfo *MFI, bool IsRegLoc,
SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
EVT PtrTy, const Argument *FuncArg) {
@@ -2705,7 +2956,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
// Copy arg registers.
for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs);
++Reg, ++I) {
- unsigned VReg = AddLiveIn(MF, *Reg, Mips::CPU64RegsRegisterClass);
+ unsigned VReg = AddLiveIn(MF, *Reg, &Mips::CPU64RegsRegClass);
SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN,
DAG.getConstant(I * 8, PtrTy));
SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64),
@@ -2741,7 +2992,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- if (IsO32)
+ if (CallConv == CallingConv::Fast)
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC);
+ else if (IsO32)
CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
else
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
@@ -2781,13 +3034,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
- RC = Mips::CPURegsRegisterClass;
+ RC = &Mips::CPURegsRegClass;
else if (RegVT == MVT::i64)
- RC = Mips::CPU64RegsRegisterClass;
+ RC = &Mips::CPU64RegsRegClass;
else if (RegVT == MVT::f32)
- RC = Mips::FGR32RegisterClass;
+ RC = &Mips::FGR32RegClass;
else if (RegVT == MVT::f64)
- RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass;
+ RC = HasMips64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
else
llvm_unreachable("RegVT not supported by FormalArguments Lowering");
@@ -2861,8 +3114,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs);
int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot.
- const TargetRegisterClass *RC
- = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass;
+ const TargetRegisterClass *RC = IsO32 ?
+ (const TargetRegisterClass*)&Mips::CPURegsRegClass :
+ (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
unsigned RegSize = RC->getSize();
int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize;
@@ -2926,7 +3180,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
@@ -2972,11 +3226,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// Return on Mips is always a "jr $ra"
if (Flag.getNode())
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
- Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
- else // Return Void
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
- Chain, DAG.getRegister(Mips::RA, MVT::i32));
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain);
}
//===----------------------------------------------------------------------===//
@@ -2995,13 +3248,19 @@ getConstraintType(const std::string &Constraint) const
// unless generating MIPS16 code.
// 'y' : Equivalent to r; retained for
// backwards compatibility.
- // 'f' : Floating Point registers.
+ // 'c' : A register suitable for use in an indirect
+ // jump. This will always be $25 for -mabicalls.
+ // 'l' : The lo register. 1 word storage.
+ // 'x' : The hilo register pair. Double word storage.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default : break;
case 'd':
case 'y':
case 'f':
+ case 'c':
+ case 'l':
+ case 'x':
return C_RegisterClass;
}
}
@@ -3035,6 +3294,22 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
if (type->isFloatTy())
weight = CW_Register;
break;
+ case 'c': // $25 for indirect jumps
+ case 'l': // lo register
+ case 'x': // hilo register pair
+ if (type->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'I': // signed 16 bit immediate
+ case 'J': // integer zero
+ case 'K': // unsigned 16 bit immediate
+ case 'L': // signed 32 bit immediate where lower 16 bits are 0
+ case 'N': // immediate in the range of -65535 to -1 (inclusive)
+ case 'O': // signed 15 bit immediate (+- 16383)
+ case 'P': // immediate in the range of 65535 to 1 (inclusive)
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
}
return weight;
}
@@ -3050,30 +3325,152 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
case 'y': // Same as 'r'. Exists for compatibility.
case 'r':
- if (VT == MVT::i32)
- return std::make_pair(0U, Mips::CPURegsRegisterClass);
- assert(VT == MVT::i64 && "Unexpected type.");
- return std::make_pair(0U, Mips::CPU64RegsRegisterClass);
+ if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8)
+ return std::make_pair(0U, &Mips::CPURegsRegClass);
+ if (VT == MVT::i64 && !HasMips64)
+ return std::make_pair(0U, &Mips::CPURegsRegClass);
+ if (VT == MVT::i64 && HasMips64)
+ return std::make_pair(0U, &Mips::CPU64RegsRegClass);
+ // This will generate an error message
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, Mips::FGR32RegisterClass);
+ return std::make_pair(0U, &Mips::FGR32RegClass);
if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
if (Subtarget->isFP64bit())
- return std::make_pair(0U, Mips::FGR64RegisterClass);
- else
- return std::make_pair(0U, Mips::AFGR64RegisterClass);
+ return std::make_pair(0U, &Mips::FGR64RegClass);
+ return std::make_pair(0U, &Mips::AFGR64RegClass);
}
+ break;
+ case 'c': // register suitable for indirect jump
+ if (VT == MVT::i32)
+ return std::make_pair((unsigned)Mips::T9, &Mips::CPURegsRegClass);
+ assert(VT == MVT::i64 && "Unexpected type.");
+ return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass);
+ case 'l': // register suitable for indirect jump
+ if (VT == MVT::i32)
+ return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass);
+ return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass);
+ case 'x': // register suitable for indirect jump
+ // Fixme: Not triggering the use of both hi and low
+ // This will generate an error message
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
}
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break; // This will fall through to the generic implementation
+ case 'I': // Signed 16 bit constant
+ // If this fails, the parent routine will give an error
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if (isInt<16>(Val)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'J': // integer zero
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getZExtValue();
+ if (Val == 0) {
+ Result = DAG.getTargetConstant(0, Type);
+ break;
+ }
+ }
+ return;
+ case 'K': // unsigned 16 bit immediate
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ uint64_t Val = (uint64_t)C->getZExtValue();
+ if (isUInt<16>(Val)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'L': // signed 32 bit immediate where lower 16 bits are 0
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)){
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'N': // immediate in the range of -65535 to -1 (inclusive)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((Val >= -65535) && (Val <= -1)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'O': // signed 15 bit immediate
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((isInt<15>(Val))) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ case 'P': // immediate in the range of 1 to 65535 (inclusive)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ EVT Type = Op.getValueType();
+ int64_t Val = C->getSExtValue();
+ if ((Val <= 65535) && (Val >= 1)) {
+ Result = DAG.getTargetConstant(Val, Type);
+ break;
+ }
+ }
+ return;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
bool
MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The Mips target isn't yet aware of offsets.
return false;
}
+EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (Subtarget->hasMips64())
+ return MVT::i64;
+
+ return MVT::i32;
+}
+
bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (VT != MVT::f32 && VT != MVT::f64)
return false;
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index c36f40f639f3..95ea8fa885fb 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -79,7 +79,17 @@ namespace llvm {
Sync,
Ext,
- Ins
+ Ins,
+
+ // Load/Store Left/Right nodes.
+ LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LWR,
+ SWL,
+ SWR,
+ LDL,
+ LDR,
+ SDL,
+ SDR
};
}
@@ -122,19 +132,25 @@ namespace llvm {
// Lower Operand specifics
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+ bool IsSRA) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -144,13 +160,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
@@ -176,8 +186,22 @@ namespace llvm {
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 14d8f1e7ba94..3e78c4564310 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -54,10 +54,14 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in
// Feature predicates.
//===----------------------------------------------------------------------===//
-def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">;
-def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">;
-def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">;
-def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">;
+def IsFP64bit : Predicate<"Subtarget.isFP64bit()">,
+ AssemblerPredicate<"FeatureFP64Bit">;
+def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">,
+ AssemblerPredicate<"!FeatureFP64Bit">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">,
+ AssemblerPredicate<"FeatureSingleFloat">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">,
+ AssemblerPredicate<"!FeatureSingleFloat">;
// FP immediate patterns.
def fpimm0 : PatLeaf<(fpimm), [{
@@ -97,7 +101,7 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
}
// FP indexed load.
class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, PatFrag FOp>:
+ RegisterClass PRC, SDPatternOperator FOp = null_frag>:
FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
!strconcat(opstr, "\t$fd, $index($base)"),
[(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
@@ -106,7 +110,7 @@ class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
// FP indexed store.
class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, PatFrag FOp>:
+ RegisterClass PRC, SDPatternOperator FOp= null_frag>:
FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
!strconcat(opstr, "\t$fs, $index($base)"),
[(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
@@ -117,15 +121,15 @@ class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
multiclass FFR1_W_M<bits<6> funct, string opstr> {
def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
- Requires<[NotFP64bit]>;
+ Requires<[NotFP64bit, HasStandardEncoding]>;
def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
- Requires<[IsFP64bit]> {
+ Requires<[IsFP64bit, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
}
// Instructions that convert an FP value to 64-bit fixed point.
-let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in
+let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in
multiclass FFR1_L_M<bits<6> funct, string opstr> {
def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
@@ -135,9 +139,9 @@ multiclass FFR1_L_M<bits<6> funct, string opstr> {
multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
- Requires<[NotFP64bit]>;
+ Requires<[NotFP64bit, HasStandardEncoding]>;
def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
- Requires<[IsFP64bit]> {
+ Requires<[IsFP64bit, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
}
@@ -146,9 +150,9 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
let isCommutable = isComm in {
def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
- Requires<[NotFP64bit]>;
+ Requires<[NotFP64bit, HasStandardEncoding]>;
def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
- Requires<[IsFP64bit]> {
+ Requires<[IsFP64bit, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
}
@@ -185,13 +189,13 @@ def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>;
def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>;
def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>;
-let Predicates = [NotFP64bit] in {
+let Predicates = [NotFP64bit, HasStandardEncoding] in {
def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
}
-let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in {
+let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in {
def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
@@ -199,7 +203,7 @@ let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in {
def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
}
-let Predicates = [NoNaNsFPMath] in {
+let Predicates = [NoNaNsFPMath, HasStandardEncoding] in {
defm FABS : FFR1P_M<0x5, "abs", fabs>;
defm FNEG : FFR1P_M<0x7, "neg", fneg>;
}
@@ -242,14 +246,14 @@ def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
- Requires<[NotFP64bit]>;
+ Requires<[NotFP64bit, HasStandardEncoding]>;
def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
- Requires<[IsFP64bit]> {
+ Requires<[IsFP64bit, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
/// Floating Point Memory Instructions
-let Predicates = [IsN64], DecoderNamespace = "Mips64" in {
+let Predicates = [IsN64, HasStandardEncoding], DecoderNamespace = "Mips64" in {
def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>;
def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>;
def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> {
@@ -260,81 +264,91 @@ let Predicates = [IsN64], DecoderNamespace = "Mips64" in {
}
}
-let Predicates = [NotN64] in {
+let Predicates = [NotN64, HasStandardEncoding] in {
def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>;
def SWC1 : FPStore<0x39, "swc1", FGR32, mem>;
}
-let Predicates = [NotN64, HasMips64], DecoderNamespace = "Mips64" in {
+let Predicates = [NotN64, HasMips64, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
}
-let Predicates = [NotN64, NotMips64] in {
+let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>;
def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>;
}
// Indexed loads and stores.
-let Predicates = [HasMips32r2Or64] in {
+let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
- def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
- def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
}
-let Predicates = [HasMips32r2, NotMips64] in {
+let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>;
def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>;
}
-let Predicates = [HasMips64, NotN64], DecoderNamespace="Mips64" in {
+let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in {
def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>;
def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>;
}
// n64
-let Predicates = [IsN64], isCodeGenOnly=1 in {
+let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
- def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>;
- def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>;
def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>;
}
+// Load/store doubleword indexed unaligned.
+let Predicates = [NotMips64, HasStandardEncoding] in {
+ def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
+ def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+}
+
+let Predicates = [HasMips64, HasStandardEncoding],
+ DecoderNamespace="Mips64" in {
+ def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
+ def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+}
+
/// Floating-point Aritmetic
defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
defm FDIV : FFR2P_M<0x03, "div", fdiv>;
defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
defm FSUB : FFR2P_M<0x01, "sub", fsub>;
-let Predicates = [HasMips32r2] in {
+let Predicates = [HasMips32r2, HasStandardEncoding] in {
def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
}
-let Predicates = [HasMips32r2, NoNaNsFPMath] in {
+let Predicates = [HasMips32r2, NoNaNsFPMath, HasStandardEncoding] in {
def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
}
-let Predicates = [HasMips32r2, NotFP64bit] in {
+let Predicates = [HasMips32r2, NotFP64bit, HasStandardEncoding] in {
def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
}
-let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in {
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStandardEncoding] in {
def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
}
-let Predicates = [HasMips32r2, IsFP64bit], isCodeGenOnly=1 in {
+let Predicates = [HasMips32r2, IsFP64bit, HasStandardEncoding], isCodeGenOnly=1 in {
def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
}
-let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath], isCodeGenOnly=1 in {
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding],
+ isCodeGenOnly=1 in {
def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
}
@@ -391,8 +405,10 @@ class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
/// Floating Point Compare
let Defs=[FCR31] in {
def FCMP_S32 : FCMP<0x10, FGR32, "s">;
- def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>;
- def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]> {
+ def FCMP_D32 : FCMP<0x11, AFGR64, "d">,
+ Requires<[NotFP64bit, HasStandardEncoding]>;
+ def FCMP_D64 : FCMP<0x11, FGR64, "d">,
+ Requires<[IsFP64bit, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
}
@@ -400,69 +416,59 @@ let Defs=[FCR31] in {
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
//===----------------------------------------------------------------------===//
-def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
- "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
def BuildPairF64 :
- MipsPseudo<(outs AFGR64:$dst),
- (ins CPURegs:$lo, CPURegs:$hi), "",
- [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+ PseudoSE<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi), "",
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
// allocation.
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
def ExtractElementF64 :
- MipsPseudo<(outs CPURegs:$dst),
- (ins AFGR64:$src, i32imm:$n), "",
- [(set CPURegs:$dst,
- (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+ PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+ [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
//===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
-def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
-
-def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
-def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
-
-let Predicates = [NotFP64bit] in {
- def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
- def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
- def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
- def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
-}
-
-let Predicates = [IsFP64bit] in {
- def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>;
- def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
-
- def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>;
- def : Pat<(f32 (sint_to_fp CPU64Regs:$src)),
- (CVT_S_L (DMTC1 CPU64Regs:$src))>;
- def : Pat<(f64 (sint_to_fp CPU64Regs:$src)),
- (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
-
- def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>;
- def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
- def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
-
- def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
- def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
-}
-
-// Patterns for unaligned floating point loads and stores.
-let Predicates = [HasMips32r2Or64, NotN64] in {
- def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
- def : Pat<(store_u FGR32:$src, CPURegs:$addr),
- (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
-}
-
-let Predicates = [IsN64] in {
- def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
- def : Pat<(store_u FGR32:$src, CPU64Regs:$addr),
- (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
+def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>;
+def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
+
+def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
+def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
+
+let Predicates = [NotFP64bit, HasStandardEncoding] in {
+ def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
+ (CVT_D32_W (MTC1 CPURegs:$src))>;
+ def : MipsPat<(i32 (fp_to_sint AFGR64:$src)),
+ (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
+ def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
+ def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
+}
+
+let Predicates = [IsFP64bit, HasStandardEncoding] in {
+ def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
+ def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
+
+ def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
+ (CVT_D64_W (MTC1 CPURegs:$src))>;
+ def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)),
+ (CVT_S_L (DMTC1 CPU64Regs:$src))>;
+ def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)),
+ (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
+
+ def : MipsPat<(i32 (fp_to_sint FGR64:$src)),
+ (MFC1 (TRUNC_W_D64 FGR64:$src))>;
+ def : MipsPat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
+ def : MipsPat<(i64 (fp_to_sint FGR64:$src)),
+ (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
+
+ def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
+ def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 841eba0ec0a2..8feb853572ff 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -72,20 +72,33 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
field bits<32> SoftFail = 0;
}
+// Mips32/64 Instruction Format
+class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format f>:
+ MipsInst<outs, ins, asmstr, pattern, itin, f> {
+ let Predicates = [HasStandardEncoding];
+}
+
// Mips Pseudo Instructions Format
class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
+// Mips32/64 Pseudo Instruction Format
+class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsPseudo<outs, ins, asmstr, pattern> {
+ let Predicates = [HasStandardEncoding];
+}
+
//===----------------------------------------------------------------------===//
// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
//===----------------------------------------------------------------------===//
class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin, FrmR>
+ InstSE<outs, ins, asmstr, pattern, itin, FrmR>
{
bits<5> rd;
bits<5> rs;
@@ -108,7 +121,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI>
{
bits<5> rt;
bits<5> rs;
@@ -123,7 +136,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+ InstSE<outs, ins, asmstr, pattern, itin, FrmI>
{
bits<5> rs;
bits<5> rt;
@@ -141,7 +154,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmJ>
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
{
bits<26> addr;
@@ -169,7 +182,7 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
{
bits<5> fd;
bits<5> fs;
@@ -193,7 +206,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
//===----------------------------------------------------------------------===//
class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
{
bits<5> ft;
bits<5> base;
@@ -211,7 +224,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
//===----------------------------------------------------------------------===//
class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> fs;
bits<5> ft;
@@ -232,7 +245,7 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> rd;
bits<5> rs;
@@ -253,7 +266,7 @@ class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> fd;
bits<5> fs;
@@ -300,7 +313,7 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
// Floating point madd/msub/nmadd/nmsub.
class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
list<dag> pattern>
- : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+ : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
bits<5> fd;
bits<5> fr;
bits<5> fs;
@@ -318,7 +331,7 @@ class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
// FP indexed load/store instructions.
class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> base;
bits<5> index;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index a3a18bff6553..50e3eb534e88 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "MipsMachineFunction.h"
@@ -26,67 +27,19 @@
using namespace llvm;
-MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
- TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
- RI(*TM.getSubtargetImpl(), *this),
- UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
+ TM(tm), UncondBrOpc(UncondBr) {}
-const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
- return RI;
-}
-
-static bool isZeroImm(const MachineOperand &op) {
- return op.isImm() && op.getImm() == 0;
-}
-
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned MipsInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
- (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
- (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
- (Opc == Mips::LDC164_P8)) {
- if ((MI->getOperand(1).isFI()) && // is a stack slot
- (MI->getOperand(2).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(2)))) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- }
+const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16InstrInfo(TM);
- return 0;
+ return llvm::createMipsSEInstrInfo(TM);
}
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned MipsInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
- (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
- (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
- (Opc == Mips::SDC164_P8)) {
- if ((MI->getOperand(1).isFI()) && // is a stack slot
- (MI->getOperand(2).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(2)))) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- }
- return 0;
+bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
+ return op.isImm() && op.getImm() == 0;
}
/// insertNoop - If data hazard condition is found insert the target nop
@@ -98,78 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
-void MipsInstrInfo::
-copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned Opc = 0, ZeroReg = 0;
-
- if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
- if (Mips::CPURegsRegClass.contains(SrcReg))
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
- else if (Mips::CCRRegClass.contains(SrcReg))
- Opc = Mips::CFC1;
- else if (Mips::FGR32RegClass.contains(SrcReg))
- Opc = Mips::MFC1;
- else if (SrcReg == Mips::HI)
- Opc = Mips::MFHI, SrcReg = 0;
- else if (SrcReg == Mips::LO)
- Opc = Mips::MFLO, SrcReg = 0;
- }
- else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
- if (Mips::CCRRegClass.contains(DestReg))
- Opc = Mips::CTC1;
- else if (Mips::FGR32RegClass.contains(DestReg))
- Opc = Mips::MTC1;
- else if (DestReg == Mips::HI)
- Opc = Mips::MTHI, DestReg = 0;
- else if (DestReg == Mips::LO)
- Opc = Mips::MTLO, DestReg = 0;
- }
- else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_S;
- else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_D32;
- else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_D64;
- else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
- Opc = Mips::MOVCCRToCCR;
- else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
- if (Mips::CPU64RegsRegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
- else if (SrcReg == Mips::HI64)
- Opc = Mips::MFHI64, SrcReg = 0;
- else if (SrcReg == Mips::LO64)
- Opc = Mips::MFLO64, SrcReg = 0;
- else if (Mips::FGR64RegClass.contains(SrcReg))
- Opc = Mips::DMFC1;
- }
- else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
- if (DestReg == Mips::HI64)
- Opc = Mips::MTHI64, DestReg = 0;
- else if (DestReg == Mips::LO64)
- Opc = Mips::MTLO64, DestReg = 0;
- else if (Mips::FGR64RegClass.contains(DestReg))
- Opc = Mips::DMTC1;
- }
-
- assert(Opc && "Cannot copy registers");
-
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
-
- if (DestReg)
- MIB.addReg(DestReg, RegState::Define);
-
- if (ZeroReg)
- MIB.addReg(ZeroReg);
-
- if (SrcReg)
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
- unsigned Flag) {
+MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -178,60 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
MFI.getObjectSize(FI), Align);
}
-void MipsInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
-
- unsigned Opc = 0;
-
- if (RC == Mips::CPURegsRegisterClass)
- Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
- else if (RC == Mips::CPU64RegsRegisterClass)
- Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
- else if (RC == Mips::FGR32RegisterClass)
- Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
- else if (RC == Mips::AFGR64RegisterClass)
- Opc = Mips::SDC1;
- else if (RC == Mips::FGR64RegisterClass)
- Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
-
- assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-}
-
-void MipsInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
- unsigned Opc = 0;
-
- if (RC == Mips::CPURegsRegisterClass)
- Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
- else if (RC == Mips::CPU64RegsRegisterClass)
- Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
- else if (RC == Mips::FGR32RegisterClass)
- Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
- else if (RC == Mips::AFGR64RegisterClass)
- Opc = Mips::LDC1;
- else if (RC == Mips::FGR64RegisterClass)
- Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
-
- assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
- .addMemOperand(MMO);
-}
-
MachineInstr*
MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *MDPtr,
@@ -245,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
// Branch Analysis
//===----------------------------------------------------------------------===//
-static unsigned GetAnalyzableBrOpc(unsigned Opc) {
- return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
- Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
- Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
- Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
- Opc == Mips::J) ?
- Opc : 0;
-}
-
-/// GetOppositeBranchOpc - Return the inverse of the specified
-/// opcode, e.g. turning BEQ to BNE.
-unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
-{
- switch (Opc) {
- default: llvm_unreachable("Illegal opcode!");
- case Mips::BEQ: return Mips::BNE;
- case Mips::BNE: return Mips::BEQ;
- case Mips::BGTZ: return Mips::BLEZ;
- case Mips::BGEZ: return Mips::BLTZ;
- case Mips::BLTZ: return Mips::BGEZ;
- case Mips::BLEZ: return Mips::BGTZ;
- case Mips::BEQ64: return Mips::BNE64;
- case Mips::BNE64: return Mips::BEQ64;
- case Mips::BGTZ64: return Mips::BLEZ64;
- case Mips::BGEZ64: return Mips::BLTZ64;
- case Mips::BLTZ64: return Mips::BGEZ64;
- case Mips::BLEZ64: return Mips::BGTZ64;
- case Mips::BC1T: return Mips::BC1F;
- case Mips::BC1F: return Mips::BC1T;
- }
-}
-
-static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
- MachineBasicBlock *&BB,
- SmallVectorImpl<MachineOperand>& Cond) {
+void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const {
assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
int NumOp = Inst->getNumExplicitOperands();
@@ -450,7 +246,62 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
{
assert( (Cond.size() && Cond.size() <= 3) &&
"Invalid Mips branch condition!");
- Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+ Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm()));
return false;
}
+/// Return the number of bytes of code the specified instruction may be.
+unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MI->getDesc().getSize();
+ case TargetOpcode::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ }
+}
+
+unsigned
+llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII,
+ MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ bool LastInstrIsADDiu,
+ MipsAnalyzeImmediate::Inst *LastInst) {
+ MipsAnalyzeImmediate AnalyzeImm;
+ unsigned Size = IsN64 ? 64 : 32;
+ unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
+ unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu);
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+ if (LastInst && (Seq.size() == 1)) {
+ *LastInst = *Inst;
+ return 0;
+ }
+
+ // The first instruction can be a LUi, which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == LUi)
+ BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+ else
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ // Build the remaining instructions in Seq. Skip the last instruction if
+ // LastInst is not 0.
+ for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst)
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ if (LastInst)
+ *LastInst = *Inst;
+
+ return Seq.size() - !!LastInst;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 4be727dd8994..7d5625906248 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,6 +15,7 @@
#define MIPSINSTRUCTIONINFO_H
#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -24,87 +25,85 @@
namespace llvm {
-namespace Mips {
- /// GetOppositeBranchOpc - Return the inverse of the specified
- /// opcode, e.g. turning BEQ to BNE.
- unsigned GetOppositeBranchOpc(unsigned Opc);
-}
-
class MipsInstrInfo : public MipsGenInstrInfo {
+protected:
MipsTargetMachine &TM;
- bool IsN64;
- const MipsRegisterInfo RI;
unsigned UncondBrOpc;
+
public:
- explicit MipsInstrInfo(MipsTargetMachine &TM);
+ explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const MipsRegisterInfo &getRegisterInfo() const;
-
- /// isLoadFromStackSlot - If the specified machine instruction is a direct
- /// load from a stack slot, return the virtual or physical register number of
- /// the destination along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- /// isStoreToStackSlot - If the specified machine instruction is a direct
- /// store to a stack slot, return the virtual or physical register number of
- /// the source reg along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ static const MipsInstrInfo *create(MipsTargetMachine &TM);
/// Branch Analysis
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-private:
- void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
- const SmallVectorImpl<MachineOperand>& Cond) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-public:
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
const MDNode *MDPtr,
DebugLoc DL) const;
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0;
+
+ /// Return the number of bytes of code the specified instruction may be.
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+protected:
+ bool isZeroImm(const MachineOperand &op) const;
+
+ MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0;
+
+ void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const;
+
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
};
+namespace Mips {
+ /// Emit a series of instructions to load an immediate. All instructions
+ /// except for the last one are emitted. The function returns the number of
+ /// MachineInstrs generated. The opcode-immediate pair of the last
+ /// instruction is returned in LastInst, if it is not 0.
+ unsigned
+ loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII,
+ MachineBasicBlock& MBB, MachineBasicBlock::iterator II,
+ DebugLoc DL, bool LastInstrIsADDiu,
+ MipsAnalyzeImmediate::Inst *LastInst);
+}
+
+/// Create MipsInstrInfo objects.
+const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
+const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
+
}
#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 873d2bd99aed..da15d4de22e8 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -11,17 +11,11 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "MipsInstrFormats.td"
//===----------------------------------------------------------------------===//
// Mips profiles and nodes
//===----------------------------------------------------------------------===//
-def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
@@ -49,6 +43,10 @@ def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>, SDTCisSameAs<2, 3>,
SDTCisSameAs<0, 4>]>;
+def SDTMipsLoadLR : SDTypeProfile<1, 2,
+ [SDTCisInt<0>, SDTCisPtrTy<1>,
+ SDTCisSameAs<0, 2>]>;
+
// Call
def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
@@ -72,8 +70,7 @@ def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
// Return
-def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
- SDNPOptInGlue]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>;
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
@@ -118,6 +115,23 @@ def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>;
def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+def MipsLWL : SDNode<"MipsISD::LWL", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsLWR : SDNode<"MipsISD::LWR", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsSWL : SDNode<"MipsISD::SWL", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def MipsSWR : SDNode<"MipsISD::SWR", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def MipsLDL : SDNode<"MipsISD::LDL", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsLDR : SDNode<"MipsISD::LDR", SDTMipsLoadLR,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def MipsSDL : SDNode<"MipsISD::SDL", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def MipsSDR : SDNode<"MipsISD::SDR", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
@@ -145,12 +159,26 @@ def IsN64 : Predicate<"Subtarget.isABI_N64()">,
AssemblerPredicate<"FeatureN64">;
def NotN64 : Predicate<"!Subtarget.isABI_N64()">,
AssemblerPredicate<"!FeatureN64">;
+def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">,
+ AssemblerPredicate<"FeatureMips16">;
def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">,
AssemblerPredicate<"FeatureMips32">;
def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
AssemblerPredicate<"FeatureMips32">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
AssemblerPredicate<"FeatureMips32">;
+def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">,
+ AssemblerPredicate<"!FeatureMips16">;
+
+class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
+ let Predicates = [HasStandardEncoding];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
@@ -190,6 +218,7 @@ def mem : Operand<i32> {
def mem64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
+ let EncoderMethod = "getMemEncoding";
}
def mem_ea : Operand<i32> {
@@ -252,7 +281,8 @@ def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
// Mips Address Mode! SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
-def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
+def addr :
+ ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// Pattern fragment for load/store
@@ -418,21 +448,13 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
let isPseudo = Pseudo;
}
-// Unaligned Memory Load/Store
-let canFoldAsLoad = 1 in
-class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {}
-
-class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {}
-
// 32-bit load.
multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
- Requires<[NotN64]>;
+ Requires<[NotN64, HasStandardEncoding]>;
def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
- Requires<[IsN64]> {
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
@@ -442,31 +464,21 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
- Requires<[NotN64]>;
+ Requires<[NotN64, HasStandardEncoding]>;
def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
- Requires<[IsN64]> {
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
-// 32-bit load.
-multiclass LoadUnAlign32<bits<6> op> {
- def #NAME# : LoadUnAlign<op, CPURegs, mem>,
- Requires<[NotN64]>;
- def _P8 : LoadUnAlign<op, CPURegs, mem64>,
- Requires<[IsN64]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
// 32-bit store.
multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
- Requires<[NotN64]>;
+ Requires<[NotN64, HasStandardEncoding]>;
def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
- Requires<[IsN64]> {
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
@@ -476,20 +488,69 @@ multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
- Requires<[NotN64]>;
+ Requires<[NotN64, HasStandardEncoding]>;
def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
- Requires<[IsN64]> {
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
-// 32-bit store.
-multiclass StoreUnAlign32<bits<6> op> {
- def #NAME# : StoreUnAlign<op, CPURegs, mem>,
- Requires<[NotN64]>;
- def _P8 : StoreUnAlign<op, CPURegs, mem64>,
- Requires<[IsN64]> {
+// Load/Store Left/Right
+let canFoldAsLoad = 1 in
+class LoadLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
+ RegisterClass RC, Operand MemOpnd> :
+ FMem<op, (outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr, RC:$src))], IILoad> {
+ string Constraints = "$src = $rt";
+}
+
+class StoreLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
+ RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"), [(OpNode RC:$rt, addr:$addr)],
+ IIStore>;
+
+// 32-bit load left/right.
+multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
+ def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+// 64-bit load left/right.
+multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
+ def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+// 32-bit store left/right.
+multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
+ def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
+ let DecoderNamespace = "Mips64";
+ let isCodeGenOnly = 1;
+ }
+}
+
+// 64-bit store left/right.
+multiclass StoreLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
+ def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
@@ -503,6 +564,7 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
+ let Defs = [AT];
}
class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
@@ -514,6 +576,7 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
+ let Defs = [AT];
}
// SetCC
@@ -541,8 +604,9 @@ class JumpFJ<bits<6> op, string instr_asm>:
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
- let Predicates = [RelocStatic];
+ let Predicates = [RelocStatic, HasStandardEncoding];
let DecoderMethod = "DecodeJumpTarget";
+ let Defs = [AT];
}
// Unconditional branch
@@ -555,23 +619,37 @@ class UncondBranch<bits<6> op, string instr_asm>:
let isTerminator = 1;
let isBarrier = 1;
let hasDelaySlot = 1;
- let Predicates = [RelocPIC];
+ let Predicates = [RelocPIC, HasStandardEncoding];
+ let Defs = [AT];
}
-let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
- isIndirectBranch = 1 in
-class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
- FR<op, func, (outs), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rs"), [(brind RC:$rs)], IIBranch> {
+// Base class for indirect branch and return instruction classes.
+let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFR<RegisterClass RC, list<dag> pattern>:
+ FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", pattern, IIBranch> {
let rt = 0;
let rd = 0;
let shamt = 0;
}
+// Indirect branch
+class IndirectBranch<RegisterClass RC>: JumpFR<RC, [(brind RC:$rs)]> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Return instruction
+class RetBase<RegisterClass RC>: JumpFR<RC, []> {
+ let isReturn = 1;
+ let isCodeGenOnly = 1;
+ let hasCtrlDep = 1;
+ let hasExtraSrcRegAllocReq = 1;
+}
+
// Jump and Link (Call)
-let isCall=1, hasDelaySlot=1 in {
+let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<bits<6> op, string instr_asm>:
- FJ<op, (outs), (ins calltarget:$target, variable_ops),
+ FJ<op, (outs), (ins calltarget:$target),
!strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
IIBranch> {
let DecoderMethod = "DecodeJumpTarget";
@@ -579,7 +657,7 @@ let isCall=1, hasDelaySlot=1 in {
class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm,
RegisterClass RC>:
- FR<op, func, (outs), (ins RC:$rs, variable_ops),
+ FR<op, func, (outs), (ins RC:$rs),
!strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> {
let rt = 0;
let rd = 31;
@@ -587,7 +665,7 @@ let isCall=1, hasDelaySlot=1 in {
}
class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>:
- FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16, variable_ops),
+ FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16),
!strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> {
let rt = _rt;
}
@@ -644,16 +722,18 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
let neverHasSideEffects = 1;
}
-class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> :
- FMem<0x09, (outs RC:$rt), (ins Mem:$addr),
- instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>;
+class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
+ FMem<opc, (outs RC:$rt), (ins Mem:$addr),
+ instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
+ let isCodeGenOnly = 1;
+}
// Count Leading Ones/Zeros in Word
class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
!strconcat(instr_asm, "\t$rd, $rs"),
[(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
- Requires<[HasBitCount]> {
+ Requires<[HasBitCount, HasStandardEncoding]> {
let shamt = 0;
let rt = rd;
}
@@ -662,7 +742,7 @@ class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
!strconcat(instr_asm, "\t$rd, $rs"),
[(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
- Requires<[HasBitCount]> {
+ Requires<[HasBitCount, HasStandardEncoding]> {
let shamt = 0;
let rt = rd;
}
@@ -675,7 +755,7 @@ class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
[(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
let rs = 0;
let shamt = sa;
- let Predicates = [HasSEInReg];
+ let Predicates = [HasSEInReg, HasStandardEncoding];
}
// Subword Swap
@@ -684,7 +764,7 @@ class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
!strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
let rs = 0;
let shamt = sa;
- let Predicates = [HasSwap];
+ let Predicates = [HasSwap, HasStandardEncoding];
let neverHasSideEffects = 1;
}
@@ -705,7 +785,7 @@ class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
bits<5> sz;
let rd = sz;
let shamt = pos;
- let Predicates = [HasMips32r2];
+ let Predicates = [HasMips32r2, HasStandardEncoding];
}
class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
@@ -718,20 +798,22 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
bits<5> sz;
let rd = sz;
let shamt = pos;
- let Predicates = [HasMips32r2];
+ let Predicates = [HasMips32r2, HasStandardEncoding];
let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
RegisterClass PRC> :
- MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
+ !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
- def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, Requires<[NotN64]>;
- def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]> {
+ def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
}
@@ -739,13 +821,15 @@ multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
// Atomic Compare & Swap.
class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
RegisterClass PRC> :
- MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+ !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
multiclass AtomicCmpSwap32<PatFrag Op, string Width> {
- def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, Requires<[NotN64]>;
- def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]> {
+ def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
+ Requires<[NotN64, HasStandardEncoding]>;
+ def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
}
@@ -767,12 +851,15 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
// Pseudo instructions
//===----------------------------------------------------------------------===//
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+// Return RA.
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
+def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
+
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
"!ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"!ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -782,31 +869,8 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
// are used, we have the same behavior, but get also a bunch of warnings
// from the assembler.
let neverHasSideEffects = 1 in
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp),
- ".cprestore\t$loc", []>;
-
-// For O32 ABI & PIC & non-fixed global base register, the following instruction
-// seqeunce is emitted to set the global base register:
-//
-// 0. lui $2, %hi(_gp_disp)
-// 1. addiu $2, $2, %lo(_gp_disp)
-// 2. addu $globalbasereg, $2, $t9
-//
-// SETGP01 is emitted during Prologue/Epilogue insertion and then converted to
-// instructions 0 and 1 in the sequence above during MC lowering.
-// SETGP2 is emitted just before register allocation and converted to
-// instruction 2 just prior to post-RA scheduling.
-//
-// These pseudo instructions are needed to ensure no instructions are inserted
-// before or between instructions 0 and 1, which is a limitation imposed by
-// GNU linker.
-
-let isTerminator = 1, isBarrier = 1 in
-def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>;
-
-let neverHasSideEffects = 1 in
-def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "",
- []>;
+def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
+ ".cprestore\t$loc", []>;
let usesCustomInserter = 1 in {
defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
@@ -876,7 +940,7 @@ def SRLV : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
def SRAV : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
// Rotate Instructions
-let Predicates = [HasMips32r2] in {
+let Predicates = [HasMips32r2, HasStandardEncoding] in {
def ROTR : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
def ROTRV : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
}
@@ -899,15 +963,15 @@ defm ULW : LoadM32<0x23, "ulw", load_u, 1>;
defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
defm USW : StoreM32<0x2b, "usw", store_u, 1>;
-/// Primitives for unaligned
-defm LWL : LoadUnAlign32<0x22>;
-defm LWR : LoadUnAlign32<0x26>;
-defm SWL : StoreUnAlign32<0x2A>;
-defm SWR : StoreUnAlign32<0x2E>;
+/// load/store left/right
+defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>;
+defm LWR : LoadLeftRightM32<0x26, "lwr", MipsLWR>;
+defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
+defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
let hasSideEffects = 1 in
-def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
- [(MipsSync imm:$stype)], NoItinerary, FrmOther>
+def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
+ [(MipsSync imm:$stype)], NoItinerary, FrmOther>
{
bits<5> stype;
let Opcode = 0;
@@ -917,19 +981,23 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
}
/// Load-linked, Store-conditional
-def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>;
-def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]> {
+def LL : LLBase<0x30, "ll", CPURegs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
-def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>;
-def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]> {
+def SC : SCBase<0x38, "sc", CPURegs, mem>,
+ Requires<[NotN64, HasStandardEncoding]>;
+def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>,
+ Requires<[IsN64, HasStandardEncoding]> {
let DecoderNamespace = "Mips64";
}
/// Jump and Branch Instructions
def J : JumpFJ<0x02, "j">;
-def JR : JumpFR<0x00, 0x08, "jr", CPURegs>;
+def JR : IndirectBranch<CPURegs>;
def B : UncondBranch<0x04, "b">;
def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
def BNE : CBranch<0x05, "bne", setne, CPURegs>;
@@ -938,15 +1006,16 @@ def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>;
def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
+let rt = 0, rs = 0, isBranch = 1, isTerminator = 1, isBarrier = 1,
+ hasDelaySlot = 1, Defs = [RA] in
+def BAL_BR: FI<0x1, (outs), (ins brtarget:$imm16), "bal\t$imm16", [], IIBranch>;
+
def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>;
def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>;
-let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1,
- isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in
- def RET : FR <0x00, 0x08, (outs), (ins CPURegs:$target),
- "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+def RET : RetBase<CPURegs>;
/// Multiply and Divide Instructions.
def MULT : Mult32<0x18, "mult", IIImul>;
@@ -978,17 +1047,13 @@ let addr=0 in
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
- let isCodeGenOnly = 1;
-}
+def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
// DynAlloc node points to dynamically allocated stack space.
// $sp is added to the list of implicitly used registers to prevent dead code
// elimination from removing instructions that modify $sp.
let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
- let isCodeGenOnly = 1;
-}
+def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
@@ -999,7 +1064,7 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
// MUL is a assembly macro in the current used ISAs. In recent ISA's
// it is a real instruction.
def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
- Requires<[HasMips32]>;
+ Requires<[HasMips32, HasStandardEncoding]>;
def RDHWR : ReadHardware<CPURegs, HWRegs>;
@@ -1011,67 +1076,67 @@ def INS : InsBase<4, "ins", CPURegs>;
//===----------------------------------------------------------------------===//
// Small immediates
-def : Pat<(i32 immSExt16:$in),
- (ADDiu ZERO, imm:$in)>;
-def : Pat<(i32 immZExt16:$in),
- (ORi ZERO, imm:$in)>;
-def : Pat<(i32 immLow16Zero:$in),
- (LUi (HI16 imm:$in))>;
+def : MipsPat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : MipsPat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+def : MipsPat<(i32 immLow16Zero:$in),
+ (LUi (HI16 imm:$in))>;
// Arbitrary immediates
-def : Pat<(i32 imm:$imm),
+def : MipsPat<(i32 imm:$imm),
(ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
-// Carry patterns
-def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
- (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
-def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
- (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : Pat<(addc CPURegs:$src, immSExt16:$imm),
- (ADDiu CPURegs:$src, imm:$imm)>;
+// Carry MipsPatterns
+def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : MipsPat<(addc CPURegs:$src, immSExt16:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
// Call
-def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
- (JAL tglobaladdr:$dst)>;
-def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
- (JAL texternalsym:$dst)>;
-//def : Pat<(MipsJmpLink CPURegs:$dst),
-// (JALR CPURegs:$dst)>;
+def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+//def : MipsPat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
// hi/lo relocs
-def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
-def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
-def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-
-def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
-def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
-def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-
-def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
- (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
- (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
- (ADDiu CPURegs:$hi, tjumptable:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
- (ADDiu CPURegs:$hi, tconstpool:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
+def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+
+def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+
+def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+ (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
// gp_rel relocs
-def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
- (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
-def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
- (ADDiu CPURegs:$gp, tconstpool:$in)>;
+def : MipsPat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : MipsPat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
// wrapper_pic
class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
- Pat<(MipsWrapper RC:$gp, node:$in),
- (ADDiuOp RC:$gp, node:$in)>;
+ MipsPat<(MipsWrapper RC:$gp, node:$in),
+ (ADDiuOp RC:$gp, node:$in)>;
def : WrapperPat<tglobaladdr, ADDiu, CPURegs>;
def : WrapperPat<tconstpool, ADDiu, CPURegs>;
@@ -1081,58 +1146,58 @@ def : WrapperPat<tjumptable, ADDiu, CPURegs>;
def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
// Mips does not have "not", so we expand our way
-def : Pat<(not CPURegs:$in),
- (NOR CPURegs:$in, ZERO)>;
+def : MipsPat<(not CPURegs:$in),
+ (NOR CPURegs:$in, ZERO)>;
// extended loads
-let Predicates = [NotN64] in {
- def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
- def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
- def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>;
- def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>;
+let Predicates = [NotN64, HasStandardEncoding] in {
+ def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
+ def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>;
}
-let Predicates = [IsN64] in {
- def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
- def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
- def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>;
- def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>;
+let Predicates = [IsN64, HasStandardEncoding] in {
+ def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>;
}
// peepholes
-let Predicates = [NotN64] in {
- def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
- def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>;
+let Predicates = [NotN64, HasStandardEncoding] in {
+ def : MipsPat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+ def : MipsPat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>;
}
-let Predicates = [IsN64] in {
- def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
- def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>;
+let Predicates = [IsN64, HasStandardEncoding] in {
+ def : MipsPat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
+ def : MipsPat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>;
}
// brcond patterns
multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
Instruction SLTiuOp, Register ZEROReg> {
-def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
- (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
-def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
- (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
+ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
+ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
-def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
- (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
- (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
-def : Pat<(brcond RC:$cond, bb:$dst),
- (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond RC:$cond, bb:$dst),
+ (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
}
defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
@@ -1140,39 +1205,39 @@ defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
// setcc patterns
multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
Instruction SLTuOp, Register ZEROReg> {
- def : Pat<(seteq RC:$lhs, RC:$rhs),
- (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
- def : Pat<(setne RC:$lhs, RC:$rhs),
- (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
+ def : MipsPat<(seteq RC:$lhs, RC:$rhs),
+ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setne RC:$lhs, RC:$rhs),
+ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
}
multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
- def : Pat<(setle RC:$lhs, RC:$rhs),
- (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
- def : Pat<(setule RC:$lhs, RC:$rhs),
- (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+ def : MipsPat<(setle RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+ def : MipsPat<(setule RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
}
multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
- def : Pat<(setgt RC:$lhs, RC:$rhs),
- (SLTOp RC:$rhs, RC:$lhs)>;
- def : Pat<(setugt RC:$lhs, RC:$rhs),
- (SLTuOp RC:$rhs, RC:$lhs)>;
+ def : MipsPat<(setgt RC:$lhs, RC:$rhs),
+ (SLTOp RC:$rhs, RC:$lhs)>;
+ def : MipsPat<(setugt RC:$lhs, RC:$rhs),
+ (SLTuOp RC:$rhs, RC:$lhs)>;
}
multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
- def : Pat<(setge RC:$lhs, RC:$rhs),
- (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
- def : Pat<(setuge RC:$lhs, RC:$rhs),
- (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setge RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setuge RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
}
multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
Instruction SLTiuOp> {
- def : Pat<(setge RC:$lhs, immSExt16:$rhs),
- (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
- def : Pat<(setuge RC:$lhs, immSExt16:$rhs),
- (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : MipsPat<(setge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
}
defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>;
@@ -1182,10 +1247,10 @@ defm : SetgePats<CPURegs, SLT, SLTu>;
defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
// select MipsDynAlloc
-def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
// bswap pattern
-def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
//===----------------------------------------------------------------------===//
// Floating Point Support
@@ -1195,3 +1260,8 @@ include "MipsInstrFPU.td"
include "Mips64InstrInfo.td"
include "MipsCondMov.td"
+//
+// Mips16
+
+include "Mips16InstrFormats.td"
+include "Mips16InstrInfo.td"
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 76ca3e176727..052046a8a45d 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -27,7 +27,52 @@ using namespace llvm;
void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)Old;
+ const unsigned NopInstr = 0x0;
+
+ // If the functions are in the same memory segment, insert PC-region branch.
+ if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
+ unsigned *OldInstruction = (unsigned *)Old;
+ *OldInstruction = 0x08000000;
+ unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
+
+ JTargetAddr >>= 2;
+ *OldInstruction |= JTargetAddr;
+
+ // Insert a NOP.
+ OldInstruction++;
+ *OldInstruction = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
+ } else {
+ // We need to clear hint bits from the instruction, in case it is 'jr ra'.
+ const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
+ unsigned* CurrentInstr = (unsigned*)Old;
+ unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
+ unsigned* NextInstr = CurrentInstr + 1;
+ unsigned NextInstrHintClear = (*NextInstr) & HintMask;
+
+ // Do absolute jump if there are 2 or more instructions before return from
+ // the old function.
+ if ((CurrInstrHintClear != ReturnSequence) &&
+ (NextInstrHintClear != ReturnSequence)) {
+ const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
+ const unsigned JrT0Instr = 0x01000008;
+ // lui t0, high 16 bit of the NewAddr
+ (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
+ // addiu t0, t0, low 16 bit of the NewAddr
+ (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
+ // jr t0
+ (*(CurrentInstr++)) = JrT0Instr;
+ (*CurrentInstr) = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
+ } else {
+ // Unsupported case
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ }
+ }
}
/// JITCompilerFunction - This contains the address of the JIT function used to
@@ -154,8 +199,8 @@ TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
return Result;
}
-void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
+void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) {
JCE.emitAlignment(4);
void *Addr = (void*) (JCE.getCurrentPCValue());
if (!sys::Memory::setRangeWritable(Addr, 16))
@@ -193,7 +238,7 @@ void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
+ unsigned NumRelocs, unsigned char *GOTBase) {
for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
index f4c4ae86d38d..637a31866034 100644
--- a/lib/Target/Mips/MipsJITInfo.h
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -45,8 +45,8 @@ class MipsJITInfo : public TargetJITInfo {
/// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
/// small native function that simply calls the function at the specified
/// address.
- virtual void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE);
+ virtual void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE);
/// getLazyResolverFunction - Expose the lazy resolver to the JIT.
virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
@@ -55,7 +55,7 @@ class MipsJITInfo : public TargetJITInfo {
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase);
+ unsigned NumRelocs, unsigned char *GOTBase);
/// Initialize - Initialize internal stage for the function being JITted.
void Initialize(const MachineFunction &MF, bool isPIC) {
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
new file mode 100644
index 000000000000..f78203f70531
--- /dev/null
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -0,0 +1,419 @@
+//===-- MipsLongBranch.cpp - Emit long branches ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands a branch or jump instruction into a long branch if its
+// offset is too large to fit into its immediate field.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-long-branch"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(LongBranches, "Number of long branches.");
+
+static cl::opt<bool> SkipLongBranch(
+ "skip-mips-long-branch",
+ cl::init(false),
+ cl::desc("MIPS: Skip long branch pass."),
+ cl::Hidden);
+
+static cl::opt<bool> ForceLongBranch(
+ "force-mips-long-branch",
+ cl::init(false),
+ cl::desc("MIPS: Expand all branches to long format."),
+ cl::Hidden);
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+ struct MBBInfo {
+ uint64_t Size;
+ bool HasLongBranch;
+ MachineInstr *Br;
+
+ MBBInfo() : Size(0), HasLongBranch(false), Br(0) {}
+ };
+
+ class MipsLongBranch : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ MipsLongBranch(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Long Branch";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ void splitMBB(MachineBasicBlock *MBB);
+ void initMBBInfo();
+ int64_t computeOffset(const MachineInstr *Br);
+ void replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL,
+ MachineBasicBlock *MBBOpnd);
+ void expandToLongBranch(MBBInfo &Info);
+
+ const TargetMachine &TM;
+ const MipsInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<MBBInfo, 16> MBBInfos;
+ };
+
+ char MipsLongBranch::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
+ return new MipsLongBranch(tm);
+}
+
+/// Iterate over list of Br's operands and search for a MachineBasicBlock
+/// operand.
+static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) {
+ for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = Br.getOperand(I);
+
+ if (MO.isMBB())
+ return MO.getMBB();
+ }
+
+ assert(false && "This instruction does not have an MBB operand.");
+ return 0;
+}
+
+// Traverse the list of instructions backwards until a non-debug instruction is
+// found or it reaches E.
+static ReverseIter getNonDebugInstr(ReverseIter B, ReverseIter E) {
+ for (; B != E; ++B)
+ if (!B->isDebugValue())
+ return B;
+
+ return E;
+}
+
+// Split MBB if it has two direct jumps/branches.
+void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
+ ReverseIter End = MBB->rend();
+ ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End);
+
+ // Return if MBB has no branch instructions.
+ if ((LastBr == End) ||
+ (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch()))
+ return;
+
+ ReverseIter FirstBr = getNonDebugInstr(llvm::next(LastBr), End);
+
+ // MBB has only one branch instruction if FirstBr is not a branch
+ // instruction.
+ if ((FirstBr == End) ||
+ (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch()))
+ return;
+
+ assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found.");
+
+ // Create a new MBB. Move instructions in MBB to the newly created MBB.
+ MachineBasicBlock *NewMBB =
+ MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+
+ // Insert NewMBB and fix control flow.
+ MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
+ NewMBB->transferSuccessors(MBB);
+ NewMBB->removeSuccessor(Tgt);
+ MBB->addSuccessor(NewMBB);
+ MBB->addSuccessor(Tgt);
+ MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+
+ NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end());
+}
+
+// Fill MBBInfos.
+void MipsLongBranch::initMBBInfo() {
+ // Split the MBBs if they have two branches. Each basic block should have at
+ // most one branch after this loop is executed.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;)
+ splitMBB(I++);
+
+ MF->RenumberBlocks();
+ MBBInfos.clear();
+ MBBInfos.resize(MF->size());
+
+ for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I);
+
+ // Compute size of MBB.
+ for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
+ MI != MBB->instr_end(); ++MI)
+ MBBInfos[I].Size += TII->GetInstSizeInBytes(&*MI);
+
+ // Search for MBB's branch instruction.
+ ReverseIter End = MBB->rend();
+ ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
+
+ if ((Br != End) && !Br->isIndirectBranch() &&
+ (Br->isConditionalBranch() ||
+ (Br->isUnconditionalBranch() &&
+ TM.getRelocationModel() == Reloc::PIC_)))
+ MBBInfos[I].Br = (++Br).base();
+ }
+}
+
+// Compute offset of branch in number of bytes.
+int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
+ int64_t Offset = 0;
+ int ThisMBB = Br->getParent()->getNumber();
+ int TargetMBB = getTargetMBB(*Br)->getNumber();
+
+ // Compute offset of a forward branch.
+ if (ThisMBB < TargetMBB) {
+ for (int N = ThisMBB + 1; N < TargetMBB; ++N)
+ Offset += MBBInfos[N].Size;
+
+ return Offset + 4;
+ }
+
+ // Compute offset of a backward branch.
+ for (int N = ThisMBB; N >= TargetMBB; --N)
+ Offset += MBBInfos[N].Size;
+
+ return -Offset + 4;
+}
+
+// Replace Br with a branch which has the opposite condition code and a
+// MachineBasicBlock operand MBBOpnd.
+void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
+ DebugLoc DL, MachineBasicBlock *MBBOpnd) {
+ unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode());
+ const MCInstrDesc &NewDesc = TII->get(NewOpc);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
+
+ for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = Br->getOperand(I);
+
+ if (!MO.isReg()) {
+ assert(MO.isMBB() && "MBB operand expected.");
+ break;
+ }
+
+ MIB.addReg(MO.getReg());
+ }
+
+ MIB.addMBB(MBBOpnd);
+
+ Br->eraseFromParent();
+}
+
+// Expand branch instructions to long branches.
+void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
+ I.HasLongBranch = true;
+
+ bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI();
+ bool N64 = ABI == MipsSubtarget::N64;
+
+ MachineBasicBlock::iterator Pos;
+ MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br);
+ DebugLoc DL = I.Br->getDebugLoc();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator FallThroughMBB = ++MachineFunction::iterator(MBB);
+ MachineBasicBlock *LongBrMBB = MF->CreateMachineBasicBlock(BB);
+
+ MF->insert(FallThroughMBB, LongBrMBB);
+ MBB->removeSuccessor(TgtMBB);
+ MBB->addSuccessor(LongBrMBB);
+
+ if (IsPIC) {
+ // $longbr:
+ // addiu $sp, $sp, -regsize * 2
+ // sw $ra, 0($sp)
+ // bal $baltgt
+ // sw $a3, regsize($sp)
+ // $baltgt:
+ // lui $a3, %hi($baltgt)
+ // lui $at, %hi($tgt)
+ // addiu $a3, $a3, %lo($baltgt)
+ // addiu $at, $at, %lo($tgt)
+ // subu $at, $at, $a3
+ // addu $at, $ra, $at
+ //
+ // if n64:
+ // lui $a3, %highest($baltgt)
+ // lui $ra, %highest($tgt)
+ // addiu $a3, $a3, %higher($baltgt)
+ // addiu $ra, $ra, %higher($tgt)
+ // dsll $a3, $a3, 32
+ // dsll $ra, $ra, 32
+ // subu $at, $at, $a3
+ // addu $at, $at, $ra
+ //
+ // lw $ra, 0($sp)
+ // lw $a3, regsize($sp)
+ // jr $at
+ // addiu $sp, $sp, regsize * 2
+ // $fallthrough:
+ //
+ MF->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(FallThroughMBB, BalTgtMBB);
+ LongBrMBB->addSuccessor(BalTgtMBB);
+ BalTgtMBB->addSuccessor(TgtMBB);
+
+ int RegSize = N64 ? 8 : 4;
+ unsigned AT = N64 ? Mips::AT_64 : Mips::AT;
+ unsigned A3 = N64 ? Mips::A3_64 : Mips::A3;
+ unsigned SP = N64 ? Mips::SP_64 : Mips::SP;
+ unsigned RA = N64 ? Mips::RA_64 : Mips::RA;
+ unsigned Load = N64 ? Mips::LD_P8 : Mips::LW;
+ unsigned Store = N64 ? Mips::SD_P8 : Mips::SW;
+ unsigned LUi = N64 ? Mips::LUi64 : Mips::LUi;
+ unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu;
+ unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu;
+ unsigned SUBu = N64 ? Mips::SUBu : Mips::SUBu;
+ unsigned JR = N64 ? Mips::JR64 : Mips::JR;
+
+ Pos = LongBrMBB->begin();
+
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP)
+ .addImm(-RegSize * 2);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(RA).addReg(SP)
+ .addImm(0);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(A3).addReg(SP)
+ .addImm(RegSize)->setIsInsideBundle();
+
+ Pos = BalTgtMBB->begin();
+
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3)
+ .addMBB(BalTgtMBB, MipsII::MO_ABS_HI);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), AT)
+ .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3)
+ .addMBB(BalTgtMBB, MipsII::MO_ABS_LO);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(RA).addReg(AT);
+
+ if (N64) {
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3)
+ .addMBB(BalTgtMBB, MipsII::MO_HIGHEST);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), RA)
+ .addMBB(TgtMBB, MipsII::MO_HIGHEST);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3)
+ .addMBB(BalTgtMBB, MipsII::MO_HIGHER);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), RA).addReg(RA)
+ .addMBB(TgtMBB, MipsII::MO_HIGHER);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), A3).addReg(A3)
+ .addImm(32);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), RA).addReg(RA)
+ .addImm(32);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(AT).addReg(RA);
+ I.Size += 4 * 8;
+ }
+
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), RA).addReg(SP).addImm(0);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), A3).addReg(SP).addImm(RegSize);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(JR)).addReg(AT);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP)
+ .addImm(RegSize * 2)->setIsInsideBundle();
+ I.Size += 4 * 14;
+ } else {
+ // $longbr:
+ // j $tgt
+ // nop
+ // $fallthrough:
+ //
+ Pos = LongBrMBB->begin();
+ LongBrMBB->addSuccessor(TgtMBB);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
+ I.Size += 4 * 2;
+ }
+
+ if (I.Br->isUnconditionalBranch()) {
+ // Change branch destination.
+ assert(I.Br->getDesc().getNumOperands() == 1);
+ I.Br->RemoveOperand(0);
+ I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB));
+ } else
+ // Change branch destination and reverse condition.
+ replaceBranch(*MBB, I.Br, DL, FallThroughMBB);
+}
+
+static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
+ MachineBasicBlock &MBB = F.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL = MBB.findDebugLoc(MBB.begin());
+ BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0)
+ .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ MBB.removeLiveIn(Mips::V0);
+}
+
+bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+ if ((TM.getRelocationModel() == Reloc::PIC_) &&
+ TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
+ F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
+ emitGPDisp(F, TII);
+
+ if (SkipLongBranch)
+ return true;
+
+ MF = &F;
+ initMBBInfo();
+
+ SmallVector<MBBInfo, 16>::iterator I, E = MBBInfos.end();
+ bool EverMadeChange = false, MadeChange = true;
+
+ while (MadeChange) {
+ MadeChange = false;
+
+ for (I = MBBInfos.begin(); I != E; ++I) {
+ // Skip if this MBB doesn't have a branch or the branch has already been
+ // converted to a long branch.
+ if (!I->Br || I->HasLongBranch)
+ continue;
+
+ if (!ForceLongBranch)
+ // Check if offset fits into 16-bit immediate field of branches.
+ if (isInt<16>(computeOffset(I->Br) / 4))
+ continue;
+
+ expandToLongBranch(*I);
+ ++LongBranches;
+ EverMadeChange = MadeChange = true;
+ }
+ }
+
+ if (EverMadeChange)
+ MF->RenumberBlocks();
+
+ return true;
+}
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 1597b9334450..d4c5e6dd74cf 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
: AsmPrinter(asmprinter) {}
-void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) {
+void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) {
Mang = M;
Ctx = C;
}
@@ -61,6 +61,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break;
case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break;
+ case MipsII::MO_HIGHER: Kind = MCSymbolRefExpr::VK_Mips_HIGHER; break;
+ case MipsII::MO_HIGHEST: Kind = MCSymbolRefExpr::VK_Mips_HIGHEST; break;
}
switch (MOTy) {
@@ -70,14 +72,17 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MachineOperand::MO_GlobalAddress:
Symbol = Mang->getSymbol(MO.getGlobal());
+ Offset += MO.getOffset();
break;
case MachineOperand::MO_BlockAddress:
Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ Offset += MO.getOffset();
break;
case MachineOperand::MO_ExternalSymbol:
Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ Offset += MO.getOffset();
break;
case MachineOperand::MO_JumpTableIndex:
@@ -86,8 +91,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MachineOperand::MO_ConstantPoolIndex:
Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
- if (MO.getOffset())
- Offset += MO.getOffset();
+ Offset += MO.getOffset();
break;
default:
@@ -103,71 +107,23 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
assert(Offset > 0);
const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
- const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
- return MCOperand::CreateExpr(AddExpr);
+ const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(Add);
}
-static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0,
- const MCOperand& Opnd1,
- const MCOperand& Opnd2 = MCOperand()) {
+/*
+static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand &Opnd0,
+ const MCOperand &Opnd1,
+ const MCOperand &Opnd2 = MCOperand()) {
Inst.setOpcode(Opc);
Inst.addOperand(Opnd0);
Inst.addOperand(Opnd1);
if (Opnd2.isValid())
Inst.addOperand(Opnd2);
}
+*/
-// Lower ".cpload $reg" to
-// "lui $gp, %hi(_gp_disp)"
-// "addiu $gp, $gp, %lo(_gp_disp)"
-// "addu $gp, $gp, $t9"
-void MipsMCInstLower::LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts) {
- MCOperand GPReg = MCOperand::CreateReg(Mips::GP);
- MCOperand T9Reg = MCOperand::CreateReg(Mips::T9);
- StringRef SymName("_gp_disp");
- const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
- const MCSymbolRefExpr *MCSym;
-
- MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
- MCOperand SymHi = MCOperand::CreateExpr(MCSym);
- MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
- MCOperand SymLo = MCOperand::CreateExpr(MCSym);
-
- MCInsts.resize(3);
-
- CreateMCInst(MCInsts[0], Mips::LUi, GPReg, SymHi);
- CreateMCInst(MCInsts[1], Mips::ADDiu, GPReg, GPReg, SymLo);
- CreateMCInst(MCInsts[2], Mips::ADDu, GPReg, GPReg, T9Reg);
-}
-
-// Lower ".cprestore offset" to "sw $gp, offset($sp)".
-void MipsMCInstLower::LowerCPRESTORE(int64_t Offset,
- SmallVector<MCInst, 4>& MCInsts) {
- assert(isInt<32>(Offset) && (Offset >= 0) &&
- "Imm operand of .cprestore must be a non-negative 32-bit value.");
-
- MCOperand SPReg = MCOperand::CreateReg(Mips::SP), BaseReg = SPReg;
- MCOperand GPReg = MCOperand::CreateReg(Mips::GP);
-
- if (!isInt<16>(Offset)) {
- unsigned Hi = ((Offset + 0x8000) >> 16) & 0xffff;
- Offset &= 0xffff;
- MCOperand ATReg = MCOperand::CreateReg(Mips::AT);
- BaseReg = ATReg;
-
- // lui at,hi
- // addu at,at,sp
- MCInsts.resize(2);
- CreateMCInst(MCInsts[0], Mips::LUi, ATReg, MCOperand::CreateImm(Hi));
- CreateMCInst(MCInsts[1], Mips::ADDu, ATReg, ATReg, SPReg);
- }
-
- MCInst Sw;
- CreateMCInst(Sw, Mips::SW, GPReg, BaseReg, MCOperand::CreateImm(Offset));
- MCInsts.push_back(Sw);
-}
-
-MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
unsigned offset) const {
MachineOperandType MOTy = MO.getType();
@@ -205,139 +161,31 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI,
- SmallVector<MCInst,
- 4>& MCInsts) {
- unsigned Opc = MI->getOpcode();
- MCInst Instr1, Instr2, Instr3, Move;
-
- bool TwoInstructions = false;
-
- assert(MI->getNumOperands() == 3);
- assert(MI->getOperand(0).isReg());
- assert(MI->getOperand(1).isReg());
-
- MCOperand Target = LowerOperand(MI->getOperand(0));
- MCOperand Base = LowerOperand(MI->getOperand(1));
- MCOperand ATReg = MCOperand::CreateReg(Mips::AT);
- MCOperand ZeroReg = MCOperand::CreateReg(Mips::ZERO);
-
- MachineOperand UnLoweredName = MI->getOperand(2);
- MCOperand Name = LowerOperand(UnLoweredName);
-
- Move.setOpcode(Mips::ADDu);
- Move.addOperand(Target);
- Move.addOperand(ATReg);
- Move.addOperand(ZeroReg);
-
- switch (Opc) {
- case Mips::ULW: {
- // FIXME: only works for little endian right now
- MCOperand AdjName = LowerOperand(UnLoweredName, 3);
- if (Base.getReg() == (Target.getReg())) {
- Instr1.setOpcode(Mips::LWL);
- Instr1.addOperand(ATReg);
- Instr1.addOperand(Base);
- Instr1.addOperand(AdjName);
- Instr2.setOpcode(Mips::LWR);
- Instr2.addOperand(ATReg);
- Instr2.addOperand(Base);
- Instr2.addOperand(Name);
- Instr3 = Move;
- } else {
- TwoInstructions = true;
- Instr1.setOpcode(Mips::LWL);
- Instr1.addOperand(Target);
- Instr1.addOperand(Base);
- Instr1.addOperand(AdjName);
- Instr2.setOpcode(Mips::LWR);
- Instr2.addOperand(Target);
- Instr2.addOperand(Base);
- Instr2.addOperand(Name);
- }
+// If the D<shift> instruction has a shift amount that is greater
+// than 31 (checked in calling routine), lower it to a D<shift>32 instruction
+void MipsMCInstLower::LowerLargeShift(const MachineInstr *MI,
+ MCInst& Inst,
+ int64_t Shift) {
+ // rt
+ Inst.addOperand(LowerOperand(MI->getOperand(0)));
+ // rd
+ Inst.addOperand(LowerOperand(MI->getOperand(1)));
+ // saminus32
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+
+ switch (MI->getOpcode()) {
+ default:
+ // Calling function is not synchronized
+ llvm_unreachable("Unexpected shift instruction");
break;
- }
- case Mips::ULHu: {
- // FIXME: only works for little endian right now
- MCOperand AdjName = LowerOperand(UnLoweredName, 1);
- Instr1.setOpcode(Mips::LBu);
- Instr1.addOperand(ATReg);
- Instr1.addOperand(Base);
- Instr1.addOperand(AdjName);
- Instr2.setOpcode(Mips::LBu);
- Instr2.addOperand(Target);
- Instr2.addOperand(Base);
- Instr2.addOperand(Name);
- Instr3.setOpcode(Mips::INS);
- Instr3.addOperand(Target);
- Instr3.addOperand(ATReg);
- Instr3.addOperand(MCOperand::CreateImm(0x8));
- Instr3.addOperand(MCOperand::CreateImm(0x18));
+ case Mips::DSLL:
+ Inst.setOpcode(Mips::DSLL32);
break;
- }
-
- case Mips::USW: {
- // FIXME: only works for little endian right now
- assert (Base.getReg() != Target.getReg());
- TwoInstructions = true;
- MCOperand AdjName = LowerOperand(UnLoweredName, 3);
- Instr1.setOpcode(Mips::SWL);
- Instr1.addOperand(Target);
- Instr1.addOperand(Base);
- Instr1.addOperand(AdjName);
- Instr2.setOpcode(Mips::SWR);
- Instr2.addOperand(Target);
- Instr2.addOperand(Base);
- Instr2.addOperand(Name);
+ case Mips::DSRL:
+ Inst.setOpcode(Mips::DSRL32);
break;
- }
- case Mips::USH: {
- MCOperand AdjName = LowerOperand(UnLoweredName, 1);
- Instr1.setOpcode(Mips::SB);
- Instr1.addOperand(Target);
- Instr1.addOperand(Base);
- Instr1.addOperand(Name);
- Instr2.setOpcode(Mips::SRL);
- Instr2.addOperand(ATReg);
- Instr2.addOperand(Target);
- Instr2.addOperand(MCOperand::CreateImm(8));
- Instr3.setOpcode(Mips::SB);
- Instr3.addOperand(ATReg);
- Instr3.addOperand(Base);
- Instr3.addOperand(AdjName);
+ case Mips::DSRA:
+ Inst.setOpcode(Mips::DSRA32);
break;
}
- default:
- // FIXME: need to add others
- llvm_unreachable("unaligned instruction not processed");
- }
-
- MCInsts.push_back(Instr1);
- MCInsts.push_back(Instr2);
- if (!TwoInstructions) MCInsts.push_back(Instr3);
-}
-
-// Convert
-// "setgp01 $reg"
-// to
-// "lui $reg, %hi(_gp_disp)"
-// "addiu $reg, $reg, %lo(_gp_disp)"
-void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI,
- SmallVector<MCInst, 4>& MCInsts) {
- const MachineOperand &MO = MI->getOperand(0);
- assert(MO.isReg());
- MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg());
- StringRef SymName("_gp_disp");
- const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
- const MCSymbolRefExpr *MCSym;
-
- MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
- MCOperand SymHi = MCOperand::CreateExpr(MCSym);
- MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
- MCOperand SymLo = MCOperand::CreateExpr(MCSym);
-
- MCInsts.resize(2);
-
- CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi);
- CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo);
}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index c1d007d2f539..0abb996a6877 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -31,13 +31,10 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
MipsAsmPrinter &AsmPrinter;
public:
MipsMCInstLower(MipsAsmPrinter &asmprinter);
- void Initialize(Mangler *mang, MCContext* C);
+ void Initialize(Mangler *mang, MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- void LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts);
- void LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts);
- void LowerUnalignedLoadStore(const MachineInstr *MI,
- SmallVector<MCInst, 4>& MCInsts);
- void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
+ void LowerLargeShift(const MachineInstr *MI, MCInst &Inst, int64_t Shift);
+
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index b00c62b09f4c..362173eda3a4 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -22,10 +22,6 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
-bool MipsFunctionInfo::globalBaseRegFixed() const {
- return FixGlobalBaseReg;
-}
-
bool MipsFunctionInfo::globalBaseRegSet() const {
return GlobalBaseReg;
}
@@ -37,13 +33,13 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
- if (FixGlobalBaseReg) // $gp is the global base register.
- return GlobalBaseReg = ST.isABI_N64() ? Mips::GP_64 : Mips::GP;
-
const TargetRegisterClass *RC;
- RC = ST.isABI_N64() ?
- Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass;
-
+ if (ST.inMips16Mode())
+ RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ else
+ RC = ST.isABI_N64() ?
+ (const TargetRegisterClass*)&Mips::CPU64RegsRegClass :
+ (const TargetRegisterClass*)&Mips::CPURegsRegClass;
return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
}
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 0fde55cb62e8..df3c4c0de0fb 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,8 +14,11 @@
#ifndef MIPS_MACHINE_FUNCTION_INFO_H
#define MIPS_MACHINE_FUNCTION_INFO_H
+#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include <utility>
namespace llvm {
@@ -45,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo {
// OutArgFIRange: Range of indices of all frame objects created during call to
// LowerCall except for the frame object for restoring $gp.
std::pair<int, int> InArgFIRange, OutArgFIRange;
- int GPFI; // Index of the frame object for restoring $gp
- mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
bool EmitNOAT;
@@ -55,8 +56,7 @@ public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
- MaxCallFrameSize(0), EmitNOAT(false)
+ OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -74,25 +74,9 @@ public:
OutArgFIRange.second = LastFI;
}
- int getGPFI() const { return GPFI; }
- void setGPFI(int FI) { GPFI = FI; }
- bool needGPSaveRestore() const { return getGPFI(); }
- bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
-
- // The first call to this function creates a frame object for dynamically
- // allocated stack area.
- int getDynAllocFI() const {
- if (!DynAllocFI)
- DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
-
- return DynAllocFI;
- }
- bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
-
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
- bool globalBaseRegFixed() const;
bool globalBaseRegSet() const;
unsigned getGlobalBaseReg();
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f30de449f6d5..ae6ae3a59005 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -16,9 +16,11 @@
#include "MipsRegisterInfo.h"
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
#include "MipsSubtarget.h"
#include "MipsMachineFunction.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Type.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -35,7 +37,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/DebugInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
@@ -54,8 +55,7 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
/// Mips Callee Saved Registers
const uint16_t* MipsRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const
-{
+getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
else if (!Subtarget.hasMips64())
@@ -64,12 +64,11 @@ getCalleeSavedRegs(const MachineFunction *MF) const
return CSR_N32_SaveList;
assert(Subtarget.isABI_N64());
- return CSR_N64_SaveList;
+ return CSR_N64_SaveList;
}
const uint32_t*
-MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
-{
+MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_RegMask;
else if (!Subtarget.hasMips64())
@@ -78,23 +77,21 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
return CSR_N32_RegMask;
assert(Subtarget.isABI_N64());
- return CSR_N64_RegMask;
+ return CSR_N64_RegMask;
}
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
static const uint16_t ReservedCPURegs[] = {
- Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
- Mips::SP, Mips::FP, Mips::RA
+ Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, Mips::SP
};
static const uint16_t ReservedCPU64Regs[] = {
- Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
- Mips::SP_64, Mips::FP_64, Mips::RA_64
+ Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, Mips::SP_64
};
BitVector Reserved(getNumRegs());
- typedef TargetRegisterClass::iterator RegIter;
+ typedef TargetRegisterClass::const_iterator RegIter;
for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
Reserved.set(ReservedCPURegs[I]);
@@ -104,31 +101,36 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ReservedCPU64Regs[I]);
// Reserve all registers in AFGR64.
- for (RegIter Reg = Mips::AFGR64RegisterClass->begin();
- Reg != Mips::AFGR64RegisterClass->end(); ++Reg)
+ for (RegIter Reg = Mips::AFGR64RegClass.begin(),
+ EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
Reserved.set(*Reg);
- }
- else {
+ } else {
// Reserve all registers in CPU64Regs & FGR64.
- for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin();
- Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg)
+ for (RegIter Reg = Mips::CPU64RegsRegClass.begin(),
+ EReg = Mips::CPU64RegsRegClass.end(); Reg != EReg; ++Reg)
Reserved.set(*Reg);
- for (RegIter Reg = Mips::FGR64RegisterClass->begin();
- Reg != Mips::FGR64RegisterClass->end(); ++Reg)
+ for (RegIter Reg = Mips::FGR64RegClass.begin(),
+ EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg)
Reserved.set(*Reg);
}
- // If GP is dedicated as a global base register, reserve it.
- if (MF.getInfo<MipsFunctionInfo>()->globalBaseRegFixed()) {
- Reserved.set(Mips::GP);
- Reserved.set(Mips::GP_64);
+ // Reserve FP if this function should have a dedicated frame pointer register.
+ if (MF.getTarget().getFrameLowering()->hasFP(MF)) {
+ Reserved.set(Mips::FP);
+ Reserved.set(Mips::FP_64);
}
// Reserve hardware registers.
Reserved.set(Mips::HWR29);
Reserved.set(Mips::HWR29_64);
+ // Reserve RA if in mips16 mode.
+ if (Subtarget.inMips16Mode()) {
+ Reserved.set(Mips::RA);
+ Reserved.set(Mips::RA_64);
+ }
+
return Reserved;
}
@@ -137,13 +139,9 @@ MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
return true;
}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
+bool
+MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
}
// FrameIndex represent objects inside a abstract stack.
@@ -154,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -175,88 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
<< "spOffset : " << spOffset << "\n"
<< "stackSize : " << stackSize << "\n");
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- int MinCSFI = 0;
- int MaxCSFI = -1;
-
- if (CSI.size()) {
- MinCSFI = CSI[0].getFrameIdx();
- MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
- }
-
- // The following stack frame objects are always referenced relative to $sp:
- // 1. Outgoing arguments.
- // 2. Pointer to dynamically allocated stack space.
- // 3. Locations for callee-saved registers.
- // Everything else is referenced relative to whatever register
- // getFrameRegister() returns.
- unsigned FrameReg;
-
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
- FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
- else
- FrameReg = getFrameRegister(MF);
-
- // Calculate final offset.
- // - There is no need to change the offset if the frame object is one of the
- // following: an outgoing argument, pointer to a dynamically allocated
- // stack space or a $gp restore location,
- // - If the frame object is any of the following, its offset must be adjusted
- // by adding the size of the stack:
- // incoming argument, callee-saved register location or local variable.
- int64_t Offset;
-
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
- MipsFI->isDynAllocFI(FrameIndex))
- Offset = spOffset;
- else
- Offset = spOffset + (int64_t)stackSize;
-
- Offset += MI.getOperand(i+1).getImm();
-
- DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
-
- // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
- // field.
- if (!MI.isDebugValue() && !isInt<16>(Offset)) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = II->getDebugLoc();
- MipsAnalyzeImmediate AnalyzeImm;
- unsigned Size = Subtarget.isABI_N64() ? 64 : 32;
- unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi;
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */);
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
-
- MipsFI->setEmitNOAT();
-
- // The first instruction can be a LUi, which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == LUi)
- BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
- else
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- // Build the remaining instructions in Seq except for the last one.
- for (++Inst; Inst != Seq.end() - 1; ++Inst)
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
-
- FrameReg = ATReg;
- Offset = SignExtend64<16>(Inst->ImmOpnd);
- }
-
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
}
unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 0716d29b2f38..9a05e94be991 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -25,10 +25,12 @@ class MipsSubtarget;
class TargetInstrInfo;
class Type;
-struct MipsRegisterInfo : public MipsGenRegisterInfo {
+class MipsRegisterInfo : public MipsGenRegisterInfo {
+protected:
const MipsSubtarget &Subtarget;
const TargetInstrInfo &TII;
+public:
MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
/// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -42,16 +44,14 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -65,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const = 0;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index ce399a031201..b255e4222b7e 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -70,8 +70,8 @@ class HWR<bits<5> num, string n> : MipsReg<n> {
let Namespace = "Mips" in {
// General Purpose Registers
- def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
- def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>;
+ def ZERO : MipsGPRReg< 0, "zero">, DwarfRegNum<[0]>;
+ def AT : MipsGPRReg< 1, "at">, DwarfRegNum<[1]>;
def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>;
def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>;
def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>;
@@ -98,14 +98,14 @@ let Namespace = "Mips" in {
def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>;
def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>;
def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>;
- def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>;
- def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>;
- def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
- def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+ def GP : MipsGPRReg< 28, "gp">, DwarfRegNum<[28]>;
+ def SP : MipsGPRReg< 29, "sp">, DwarfRegNum<[29]>;
+ def FP : MipsGPRReg< 30, "fp">, DwarfRegNum<[30]>;
+ def RA : MipsGPRReg< 31, "ra">, DwarfRegNum<[31]>;
// General Purpose 64-bit Registers
- def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>, DwarfRegNum<[0]>;
- def AT_64 : Mips64GPRReg< 1, "AT", [AT]>, DwarfRegNum<[1]>;
+ def ZERO_64 : Mips64GPRReg< 0, "zero", [ZERO]>, DwarfRegNum<[0]>;
+ def AT_64 : Mips64GPRReg< 1, "at", [AT]>, DwarfRegNum<[1]>;
def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>;
def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>;
def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>;
@@ -132,97 +132,97 @@ let Namespace = "Mips" in {
def T9_64 : Mips64GPRReg< 25, "25", [T9]>, DwarfRegNum<[25]>;
def K0_64 : Mips64GPRReg< 26, "26", [K0]>, DwarfRegNum<[26]>;
def K1_64 : Mips64GPRReg< 27, "27", [K1]>, DwarfRegNum<[27]>;
- def GP_64 : Mips64GPRReg< 28, "GP", [GP]>, DwarfRegNum<[28]>;
- def SP_64 : Mips64GPRReg< 29, "SP", [SP]>, DwarfRegNum<[29]>;
- def FP_64 : Mips64GPRReg< 30, "FP", [FP]>, DwarfRegNum<[30]>;
- def RA_64 : Mips64GPRReg< 31, "RA", [RA]>, DwarfRegNum<[31]>;
+ def GP_64 : Mips64GPRReg< 28, "gp", [GP]>, DwarfRegNum<[28]>;
+ def SP_64 : Mips64GPRReg< 29, "sp", [SP]>, DwarfRegNum<[29]>;
+ def FP_64 : Mips64GPRReg< 30, "fp", [FP]>, DwarfRegNum<[30]>;
+ def RA_64 : Mips64GPRReg< 31, "ra", [RA]>, DwarfRegNum<[31]>;
/// Mips Single point precision FPU Registers
- def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
- def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
- def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>;
- def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>;
- def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>;
- def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>;
- def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>;
- def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>;
- def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>;
- def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>;
- def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
- def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
- def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
- def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
- def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
- def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
- def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
- def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
- def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
- def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
- def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
- def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
- def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
- def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
- def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
- def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
- def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
- def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
- def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
- def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
- def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
- def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+ def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "f10">, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "f11">, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "f12">, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "f13">, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "f14">, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "f15">, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "f16">, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "f17">, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "f18">, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "f19">, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "f20">, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "f21">, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "f22">, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "f23">, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "f24">, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "f25">, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "f26">, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "f27">, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "f28">, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "f29">, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "f30">, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "f31">, DwarfRegNum<[63]>;
/// Mips Double point precision FPU Registers (aliased
/// with the single precision to hold 64 bit values)
- def D0 : AFPR< 0, "F0", [F0, F1]>;
- def D1 : AFPR< 2, "F2", [F2, F3]>;
- def D2 : AFPR< 4, "F4", [F4, F5]>;
- def D3 : AFPR< 6, "F6", [F6, F7]>;
- def D4 : AFPR< 8, "F8", [F8, F9]>;
- def D5 : AFPR<10, "F10", [F10, F11]>;
- def D6 : AFPR<12, "F12", [F12, F13]>;
- def D7 : AFPR<14, "F14", [F14, F15]>;
- def D8 : AFPR<16, "F16", [F16, F17]>;
- def D9 : AFPR<18, "F18", [F18, F19]>;
- def D10 : AFPR<20, "F20", [F20, F21]>;
- def D11 : AFPR<22, "F22", [F22, F23]>;
- def D12 : AFPR<24, "F24", [F24, F25]>;
- def D13 : AFPR<26, "F26", [F26, F27]>;
- def D14 : AFPR<28, "F28", [F28, F29]>;
- def D15 : AFPR<30, "F30", [F30, F31]>;
+ def D0 : AFPR< 0, "f0", [F0, F1]>;
+ def D1 : AFPR< 2, "f2", [F2, F3]>;
+ def D2 : AFPR< 4, "f4", [F4, F5]>;
+ def D3 : AFPR< 6, "f6", [F6, F7]>;
+ def D4 : AFPR< 8, "f8", [F8, F9]>;
+ def D5 : AFPR<10, "f10", [F10, F11]>;
+ def D6 : AFPR<12, "f12", [F12, F13]>;
+ def D7 : AFPR<14, "f14", [F14, F15]>;
+ def D8 : AFPR<16, "f16", [F16, F17]>;
+ def D9 : AFPR<18, "f18", [F18, F19]>;
+ def D10 : AFPR<20, "f20", [F20, F21]>;
+ def D11 : AFPR<22, "f22", [F22, F23]>;
+ def D12 : AFPR<24, "f24", [F24, F25]>;
+ def D13 : AFPR<26, "f26", [F26, F27]>;
+ def D14 : AFPR<28, "f28", [F28, F29]>;
+ def D15 : AFPR<30, "f30", [F30, F31]>;
/// Mips Double point precision FPU Registers in MFP64 mode.
- def D0_64 : AFPR64<0, "F0", [F0]>, DwarfRegNum<[32]>;
- def D1_64 : AFPR64<1, "F1", [F1]>, DwarfRegNum<[33]>;
- def D2_64 : AFPR64<2, "F2", [F2]>, DwarfRegNum<[34]>;
- def D3_64 : AFPR64<3, "F3", [F3]>, DwarfRegNum<[35]>;
- def D4_64 : AFPR64<4, "F4", [F4]>, DwarfRegNum<[36]>;
- def D5_64 : AFPR64<5, "F5", [F5]>, DwarfRegNum<[37]>;
- def D6_64 : AFPR64<6, "F6", [F6]>, DwarfRegNum<[38]>;
- def D7_64 : AFPR64<7, "F7", [F7]>, DwarfRegNum<[39]>;
- def D8_64 : AFPR64<8, "F8", [F8]>, DwarfRegNum<[40]>;
- def D9_64 : AFPR64<9, "F9", [F9]>, DwarfRegNum<[41]>;
- def D10_64 : AFPR64<10, "F10", [F10]>, DwarfRegNum<[42]>;
- def D11_64 : AFPR64<11, "F11", [F11]>, DwarfRegNum<[43]>;
- def D12_64 : AFPR64<12, "F12", [F12]>, DwarfRegNum<[44]>;
- def D13_64 : AFPR64<13, "F13", [F13]>, DwarfRegNum<[45]>;
- def D14_64 : AFPR64<14, "F14", [F14]>, DwarfRegNum<[46]>;
- def D15_64 : AFPR64<15, "F15", [F15]>, DwarfRegNum<[47]>;
- def D16_64 : AFPR64<16, "F16", [F16]>, DwarfRegNum<[48]>;
- def D17_64 : AFPR64<17, "F17", [F17]>, DwarfRegNum<[49]>;
- def D18_64 : AFPR64<18, "F18", [F18]>, DwarfRegNum<[50]>;
- def D19_64 : AFPR64<19, "F19", [F19]>, DwarfRegNum<[51]>;
- def D20_64 : AFPR64<20, "F20", [F20]>, DwarfRegNum<[52]>;
- def D21_64 : AFPR64<21, "F21", [F21]>, DwarfRegNum<[53]>;
- def D22_64 : AFPR64<22, "F22", [F22]>, DwarfRegNum<[54]>;
- def D23_64 : AFPR64<23, "F23", [F23]>, DwarfRegNum<[55]>;
- def D24_64 : AFPR64<24, "F24", [F24]>, DwarfRegNum<[56]>;
- def D25_64 : AFPR64<25, "F25", [F25]>, DwarfRegNum<[57]>;
- def D26_64 : AFPR64<26, "F26", [F26]>, DwarfRegNum<[58]>;
- def D27_64 : AFPR64<27, "F27", [F27]>, DwarfRegNum<[59]>;
- def D28_64 : AFPR64<28, "F28", [F28]>, DwarfRegNum<[60]>;
- def D29_64 : AFPR64<29, "F29", [F29]>, DwarfRegNum<[61]>;
- def D30_64 : AFPR64<30, "F30", [F30]>, DwarfRegNum<[62]>;
- def D31_64 : AFPR64<31, "F31", [F31]>, DwarfRegNum<[63]>;
+ def D0_64 : AFPR64<0, "f0", [F0]>, DwarfRegNum<[32]>;
+ def D1_64 : AFPR64<1, "f1", [F1]>, DwarfRegNum<[33]>;
+ def D2_64 : AFPR64<2, "f2", [F2]>, DwarfRegNum<[34]>;
+ def D3_64 : AFPR64<3, "f3", [F3]>, DwarfRegNum<[35]>;
+ def D4_64 : AFPR64<4, "f4", [F4]>, DwarfRegNum<[36]>;
+ def D5_64 : AFPR64<5, "f5", [F5]>, DwarfRegNum<[37]>;
+ def D6_64 : AFPR64<6, "f6", [F6]>, DwarfRegNum<[38]>;
+ def D7_64 : AFPR64<7, "f7", [F7]>, DwarfRegNum<[39]>;
+ def D8_64 : AFPR64<8, "f8", [F8]>, DwarfRegNum<[40]>;
+ def D9_64 : AFPR64<9, "f9", [F9]>, DwarfRegNum<[41]>;
+ def D10_64 : AFPR64<10, "f10", [F10]>, DwarfRegNum<[42]>;
+ def D11_64 : AFPR64<11, "f11", [F11]>, DwarfRegNum<[43]>;
+ def D12_64 : AFPR64<12, "f12", [F12]>, DwarfRegNum<[44]>;
+ def D13_64 : AFPR64<13, "f13", [F13]>, DwarfRegNum<[45]>;
+ def D14_64 : AFPR64<14, "f14", [F14]>, DwarfRegNum<[46]>;
+ def D15_64 : AFPR64<15, "f15", [F15]>, DwarfRegNum<[47]>;
+ def D16_64 : AFPR64<16, "f16", [F16]>, DwarfRegNum<[48]>;
+ def D17_64 : AFPR64<17, "f17", [F17]>, DwarfRegNum<[49]>;
+ def D18_64 : AFPR64<18, "f18", [F18]>, DwarfRegNum<[50]>;
+ def D19_64 : AFPR64<19, "f19", [F19]>, DwarfRegNum<[51]>;
+ def D20_64 : AFPR64<20, "f20", [F20]>, DwarfRegNum<[52]>;
+ def D21_64 : AFPR64<21, "f21", [F21]>, DwarfRegNum<[53]>;
+ def D22_64 : AFPR64<22, "f22", [F22]>, DwarfRegNum<[54]>;
+ def D23_64 : AFPR64<23, "f23", [F23]>, DwarfRegNum<[55]>;
+ def D24_64 : AFPR64<24, "f24", [F24]>, DwarfRegNum<[56]>;
+ def D25_64 : AFPR64<25, "f25", [F25]>, DwarfRegNum<[57]>;
+ def D26_64 : AFPR64<26, "f26", [F26]>, DwarfRegNum<[58]>;
+ def D27_64 : AFPR64<27, "f27", [F27]>, DwarfRegNum<[59]>;
+ def D28_64 : AFPR64<28, "f28", [F28]>, DwarfRegNum<[60]>;
+ def D29_64 : AFPR64<29, "f29", [F29]>, DwarfRegNum<[61]>;
+ def D30_64 : AFPR64<30, "f30", [F30]>, DwarfRegNum<[62]>;
+ def D31_64 : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>;
// Hi/Lo registers
def HI : Register<"hi">, DwarfRegNum<[64]>;
@@ -236,6 +236,9 @@ let Namespace = "Mips" in {
// Status flags register
def FCR31 : Register<"31">;
+ // fcc0 register
+ def FCC0 : Register<"fcc0">;
+
// Hardware register $29
def HWR29 : Register<"29">;
def HWR29_64 : Register<"29">;
@@ -246,26 +249,41 @@ let Namespace = "Mips" in {
//===----------------------------------------------------------------------===//
def CPURegs : RegisterClass<"Mips", [i32], 32, (add
+ // Reserved
+ ZERO, AT,
// Return Values and Arguments
V0, V1, A0, A1, A2, A3,
// Not preserved across procedure calls
- T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ T0, T1, T2, T3, T4, T5, T6, T7,
// Callee save
S0, S1, S2, S3, S4, S5, S6, S7,
+ // Not preserved across procedure calls
+ T8, T9,
// Reserved
- ZERO, AT, K0, K1, GP, SP, FP, RA)>;
+ K0, K1, GP, SP, FP, RA)>;
def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add
+// Reserved
+ ZERO_64, AT_64,
// Return Values and Arguments
V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
// Not preserved across procedure calls
- T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64,
+ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64,
// Callee save
S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64,
+ // Not preserved across procedure calls
+ T8_64, T9_64,
// Reserved
- ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> {
- let SubRegClasses = [(CPURegs sub_32)];
-}
+ K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)>;
+
+def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Callee save
+ S0, S1)>;
+
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -278,26 +296,24 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Return Values and Arguments
- D0, D1, D6, D7,
+ D0, D1,
+ // Not preserved across procedure calls
+ D2, D3, D4, D5,
+ // Return Values and Arguments
+ D6, D7,
// Not preserved across procedure calls
- D2, D3, D4, D5, D8, D9,
+ D8, D9,
// Callee save
- D10, D11, D12, D13, D14, D15)> {
- let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
-}
+ D10, D11, D12, D13, D14, D15)>;
-def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> {
- let SubRegClasses = [(FGR32 sub_32)];
-}
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Condition Register for floating point operations
-def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
// Hi/Lo Registers
def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
- let SubRegClasses = [(HILO sub_32)];
-}
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
new file mode 100644
index 000000000000..1c598471a0aa
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -0,0 +1,210 @@
+//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // First, compute final stack size.
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (CSI.size()) {
+ // Find the instruction past the last instruction that saves a callee-saved
+ // register to the stack.
+ for (unsigned i = 0; i < CSI.size(); ++i)
+ ++MBBI;
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+
+ // If Reg is a double precision register, emit two cfa_offsets,
+ // one for each of the paired single precision registers.
+ if (Mips::AFGR64RegClass.contains(Reg)) {
+ MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+ MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+ MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
+ MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
+
+ if (!STI.isLittle())
+ std::swap(SrcML0, SrcML1);
+
+ Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+ Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+ } else {
+ // Reg is either in CPURegs or FGR32.
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(Reg);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+ }
+ }
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
+
+ // emit ".cfi_def_cfa_register $fp"
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+ DstML = MachineLocation(FP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+ }
+}
+
+void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // if framepointer enabled, restore the stack pointer.
+ if (hasFP(MF)) {
+ // Find the first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instruction "move $sp, $fp" at this location.
+ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+ }
+
+ // Get the number of bytes from FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
+}
+
+bool MipsSEFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *EntryBlock = MF->begin();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. Do not add if the register is
+ // RA and return address is taken, because it has already been added in
+ // method MipsTargetLowering::LowerRETURNADDR.
+ // It's killed at the spill, unless the register is RA and return address
+ // is taken.
+ unsigned Reg = CSI[i].getReg();
+ bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
+ && MF->getFrameInfo()->isReturnAddressTaken();
+ if (!IsRAAndRetAddrIsTaken)
+ EntryBlock->addLiveIn(Reg);
+
+ // Insert the spill to the stack frame.
+ bool IsKill = !IsRAAndRetAddrIsTaken;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+
+ return true;
+}
+
+bool
+MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Reserve call frame if the size of the maximum call frame fits into 16-bit
+ // immediate field and there are no variable sized objects on the stack.
+ return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+}
+
+void MipsSEFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+
+ // Mark $fp as used if function has dedicated frame pointer.
+ if (hasFP(MF))
+ MRI.setPhysRegUsed(FP);
+}
+
+const MipsFrameLowering *
+llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) {
+ return new MipsSEFrameLowering(ST);
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
new file mode 100644
index 000000000000..6481a0ac86d7
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -0,0 +1,44 @@
+//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSE_FRAMEINFO_H
+#define MIPSSE_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+
+class MipsSEFrameLowering : public MipsFrameLowering {
+public:
+ explicit MipsSEFrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
new file mode 100644
index 000000000000..eeb1de36efc6
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -0,0 +1,320 @@
+//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm,
+ tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
+ RI(*tm.getSubtargetImpl(), *this),
+ IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {}
+
+const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsSEInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsSEInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ Opc = Mips::MFC1;
+ else if (SrcReg == Mips::HI)
+ Opc = Mips::MFHI, SrcReg = 0;
+ else if (SrcReg == Mips::LO)
+ Opc = Mips::MFLO, SrcReg = 0;
+ }
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
+ if (Mips::CCRRegClass.contains(DestReg))
+ Opc = Mips::CTC1;
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ Opc = Mips::MTC1;
+ else if (DestReg == Mips::HI)
+ Opc = Mips::MTHI, DestReg = 0;
+ else if (DestReg == Mips::LO)
+ Opc = Mips::MTLO, DestReg = 0;
+ }
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D64;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
+ else if (Mips::FGR64RegClass.contains(SrcReg))
+ Opc = Mips::DMFC1;
+ }
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
+ else if (Mips::FGR64RegClass.contains(DestReg))
+ Opc = Mips::DMTC1;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void MipsSEInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+}
+
+void MipsSEInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
+}
+
+bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA:
+ ExpandRetRA(MBB, MI, Mips::RET);
+ break;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, MI);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, MI);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ: return Mips::BNE;
+ case Mips::BNE: return Mips::BEQ;
+ case Mips::BGTZ: return Mips::BLEZ;
+ case Mips::BGEZ: return Mips::BLTZ;
+ case Mips::BLTZ: return Mips::BGEZ;
+ case Mips::BLEZ: return Mips::BGTZ;
+ case Mips::BEQ64: return Mips::BNE64;
+ case Mips::BNE64: return Mips::BEQ64;
+ case Mips::BGTZ64: return Mips::BLEZ64;
+ case Mips::BGEZ64: return Mips::BLTZ64;
+ case Mips::BLTZ64: return Mips::BGEZ64;
+ case Mips::BLEZ64: return Mips::BGTZ64;
+ case Mips::BC1T: return Mips::BC1F;
+ case Mips::BC1F: return Mips::BC1T;
+ }
+}
+
+/// Adjust SP by Amount bytes.
+void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+
+ if (isInt<16>(Amount))// addi sp, sp, amount
+ BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
+ else { // Expand immediate that doesn't fit in 16-bit.
+ unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+
+ MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0);
+ BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg);
+ }
+}
+
+unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
+ Opc : 0;
+}
+
+void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+}
+
+void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+
+ assert(N < 2 && "Invalid immediate");
+ unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+ unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+}
+
+void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
+ .addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
+ .addReg(HiReg);
+}
+
+const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
+ return new MipsSEInstrInfo(TM);
+}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
new file mode 100644
index 000000000000..346e74dba485
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -0,0 +1,86 @@
+//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEINSTRUCTIONINFO_H
+#define MIPSSEINSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSERegisterInfo.h"
+
+namespace llvm {
+
+class MipsSEInstrInfo : public MipsInstrInfo {
+ const MipsSERegisterInfo RI;
+ bool IsN64;
+
+public:
+ explicit MipsSEInstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+ /// Adjust SP by Amount bytes.
+ void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+ void ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ void ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
new file mode 100644
index 000000000000..043a1ef6833b
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -0,0 +1,138 @@
+//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSERegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsSERegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
+ unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+
+ II->adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
+void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following, its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex))
+ Offset = SPOffset;
+ else
+ Offset = SPOffset + (int64_t)StackSize;
+
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+ // field.
+ if (!MI.isDebugValue() && !isInt<16>(Offset)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
+ MipsAnalyzeImmediate::Inst LastInst(0, 0);
+
+ MipsFI->setEmitNOAT();
+ Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
+ &LastInst);
+ BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+
+ FrameReg = ATReg;
+ Offset = SignExtend64<16>(LastInst.ImmOpnd);
+ }
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+}
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
new file mode 100644
index 000000000000..4b17b33e9a21
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -0,0 +1,39 @@
+//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEREGISTERINFO_H
+#define MIPSSEREGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class MipsSERegisterInfo : public MipsRegisterInfo {
+public:
+ MipsSERegisterInfo(const MipsSubtarget &Subtarget,
+ const TargetInstrInfo &TII);
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 00347df9ac84..11ff8092af2e 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -30,7 +30,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
- HasMinMax(false), HasSwap(false), HasBitCount(false)
+ HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false)
{
std::string CPUName = CPU;
if (CPUName.empty())
@@ -58,9 +58,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
bool
MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ RegClassVector &CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_NONE;
CriticalPathRCs.clear();
CriticalPathRCs.push_back(hasMips64() ?
&Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 7faf77baa650..3215c44be0ef 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -86,6 +86,9 @@ protected:
// HasBitCount - Count leading '1' and '0' bits.
bool HasBitCount;
+ // InMips16 -- can process Mips16 instructions
+ bool InMips16Mode;
+
InstrItineraryData InstrItins;
public:
@@ -124,8 +127,11 @@ public:
bool isSingleFloat() const { return IsSingleFloat; }
bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
+ bool inMips16Mode() const { return InMips16Mode; }
bool isLinux() const { return IsLinux; }
+ bool hasStandardEncoding() const { return !inMips16Mode(); }
+
/// Features related to the presence of specific instructions.
bool hasSEInReg() const { return HasSEInReg; }
bool hasCondMov() const { return HasCondMov; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 858723bad9d7..03a024a361d5 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -13,6 +13,8 @@
#include "MipsTargetMachine.h"
#include "Mips.h"
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
@@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
- RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
- RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
+ RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -48,8 +50,8 @@ MipsTargetMachine(const Target &T, StringRef TT,
(Subtarget.isABI_N64() ?
"E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
- InstrInfo(*this),
- FrameLowering(Subtarget),
+ InstrInfo(MipsInstrInfo::create(*this)),
+ FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
TLInfo(*this), TSInfo(*this), JITInfo() {
}
@@ -71,24 +73,6 @@ MipselTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void Mips64ebTargetMachine::anchor() { }
-
-Mips64ebTargetMachine::
-Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-
-void Mips64elTargetMachine::anchor() { }
-
-Mips64elTargetMachine::
-Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
namespace {
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
@@ -105,8 +89,6 @@ public:
}
virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // namespace
@@ -118,7 +100,7 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- PM->add(createMipsISelDag(getMipsTargetMachine()));
+ addPass(createMipsISelDag(getMipsTargetMachine()));
return false;
}
@@ -126,20 +108,13 @@ bool MipsPassConfig::addInstSelector() {
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MipsPassConfig::addPreEmitPass() {
- PM->add(createMipsDelaySlotFillerPass(getMipsTargetMachine()));
- return true;
-}
+ MipsTargetMachine &TM = getMipsTargetMachine();
+ addPass(createMipsDelaySlotFillerPass(TM));
-bool MipsPassConfig::addPreRegAlloc() {
- // Do not restore $gp if target is Mips64.
- // In N32/64, $gp is a callee-saved register.
- if (!getMipsSubtarget().hasMips64())
- PM->add(createMipsEmitGPRestorePass(getMipsTargetMachine()));
- return true;
-}
+ // NOTE: long branch has not been implemented for mips16.
+ if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+ addPass(createMipsLongBranchPass(TM));
-bool MipsPassConfig::addPreSched2() {
- PM->add(createMipsExpandPseudoPass(getMipsTargetMachine()));
return true;
}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 80c00e80f12c..21b49e673ca9 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -25,56 +25,56 @@
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class formatted_raw_ostream;
+class formatted_raw_ostream;
+class MipsRegisterInfo;
+
+class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ const MipsInstrInfo *InstrInfo;
+ const MipsFrameLowering *FrameLowering;
+ MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
- class MipsTargetMachine : public LLVMTargetMachine {
- MipsSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- MipsInstrInfo InstrInfo;
- MipsFrameLowering FrameLowering;
- MipsTargetLowering TLInfo;
- MipsSelectionDAGInfo TSInfo;
- MipsJITInfo JITInfo;
-
- public:
- MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- virtual const MipsInstrInfo *getInstrInfo() const
- { return &InstrInfo; }
- virtual const TargetFrameLowering *getFrameLowering() const
- { return &FrameLowering; }
- virtual const MipsSubtarget *getSubtargetImpl() const
- { return &Subtarget; }
- virtual const TargetData *getTargetData() const
- { return &DataLayout;}
- virtual MipsJITInfo *getJITInfo()
- { return &JITInfo; }
-
-
- virtual const MipsRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
-
- virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- // Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE);
-
- };
+public:
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool isLittle);
+
+ virtual ~MipsTargetMachine() { delete InstrInfo; }
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return FrameLowering; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const MipsTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+};
-/// MipsebTargetMachine - Mips32 big endian target machine.
+/// MipsebTargetMachine - Mips32/64 big endian target machine.
///
class MipsebTargetMachine : public MipsTargetMachine {
virtual void anchor();
@@ -85,7 +85,7 @@ public:
CodeGenOpt::Level OL);
};
-/// MipselTargetMachine - Mips32 little endian target machine.
+/// MipselTargetMachine - Mips32/64 little endian target machine.
///
class MipselTargetMachine : public MipsTargetMachine {
virtual void anchor();
@@ -96,29 +96,6 @@ public:
CodeGenOpt::Level OL);
};
-/// Mips64ebTargetMachine - Mips64 big endian target machine.
-///
-class Mips64ebTargetMachine : public MipsTargetMachine {
- virtual void anchor();
-public:
- Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
-/// Mips64elTargetMachine - Mips64 little endian target machine.
-///
-class Mips64elTargetMachine : public MipsTargetMachine {
- virtual void anchor();
-public:
- Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
} // End llvm namespace
#endif
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
new file mode 100644
index 000000000000..7cb16b4dd810
--- /dev/null
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -0,0 +1,34 @@
+set(LLVM_TARGET_DEFINITIONS NVPTX.td)
+
+
+tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(NVPTXCommonTableGen)
+
+set(NVPTXCodeGen_sources
+ NVPTXFrameLowering.cpp
+ NVPTXInstrInfo.cpp
+ NVPTXISelDAGToDAG.cpp
+ NVPTXISelLowering.cpp
+ NVPTXRegisterInfo.cpp
+ NVPTXSubtarget.cpp
+ NVPTXTargetMachine.cpp
+ NVPTXSplitBBatBar.cpp
+ NVPTXLowerAggrCopies.cpp
+ NVPTXutil.cpp
+ NVPTXAllocaHoisting.cpp
+ NVPTXAsmPrinter.cpp
+ NVPTXUtilities.cpp
+ VectorElementize.cpp
+ )
+
+add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
+
+add_dependencies(LLVMNVPTXCodeGen intrinsics_gen)
+
+add_subdirectory(TargetInfo)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/NVPTX/InstPrinter/CMakeLists.txt b/lib/Target/NVPTX/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..ae4c75119687
--- /dev/null
+++ b/lib/Target/NVPTX/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMNVPTXAsmPrinter
+ NVPTXInstPrinter.cpp
+ )
+
+add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen)
diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
index af5d20029ffc..032b57347eb0 100644
--- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = PTXAsmPrinter
-parent = PTX
+name = NVPTXAsmPrinter
+parent = NVPTX
required_libraries = MC Support
-add_to_library_groups = PTX
+add_to_library_groups = NVPTX
diff --git a/lib/Target/PTX/InstPrinter/Makefile b/lib/Target/NVPTX/InstPrinter/Makefile
index 0ccfe4407896..7b7865436bf3 100644
--- a/lib/Target/PTX/InstPrinter/Makefile
+++ b/lib/Target/NVPTX/InstPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/PTX/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,10 +7,9 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXAsmPrinter
+LIBRARYNAME = LLVMNVPTXAsmPrinter
# Hack: we need to include 'main' ptx target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
new file mode 100644
index 000000000000..10051c768058
--- /dev/null
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -0,0 +1 @@
+// Placeholder
diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt
index 15a1eb532837..e2d6ed2b89ed 100644
--- a/lib/Target/PTX/LLVMBuild.txt
+++ b/lib/Target/NVPTX/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -20,13 +20,13 @@ subdirectories = InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
-name = PTX
+name = NVPTX
parent = Target
has_asmprinter = 1
[component_1]
type = Library
-name = PTXCodeGen
-parent = PTX
-required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
-add_to_library_groups = PTX
+name = NVPTXCodeGen
+parent = NVPTX
+required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils
+add_to_library_groups = NVPTX
diff --git a/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt b/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..a030d9f8f1f1
--- /dev/null
+++ b/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_llvm_library(LLVMNVPTXDesc
+ NVPTXMCAsmInfo.cpp
+ NVPTXMCTargetDesc.cpp
+ )
+
+add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
index 19b80c5ce9e3..01a051a11aa6 100644
--- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = PTXDesc
-parent = PTX
-required_libraries = MC PTXAsmPrinter PTXInfo Support
-add_to_library_groups = PTX
+name = NVPTXDesc
+parent = NVPTX
+required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support
+add_to_library_groups = NVPTX
diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/NVPTX/MCTargetDesc/Makefile
index 35f5a7b2e6ad..31d06cb5948d 100644
--- a/lib/Target/PTX/MCTargetDesc/Makefile
+++ b/lib/Target/NVPTX/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
+##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXDesc
+LIBRARYNAME = LLVMNVPTXDesc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
new file mode 100644
index 000000000000..454583850b71
--- /dev/null
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -0,0 +1,88 @@
+//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the NVPTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXBASEINFO_H
+#define NVPTXBASEINFO_H
+
+namespace llvm {
+
+enum AddressSpace {
+ ADDRESS_SPACE_GENERIC = 0,
+ ADDRESS_SPACE_GLOBAL = 1,
+ ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
+ ADDRESS_SPACE_SHARED = 3,
+ ADDRESS_SPACE_CONST = 4,
+ ADDRESS_SPACE_LOCAL = 5,
+
+ // NVVM Internal
+ ADDRESS_SPACE_PARAM = 101
+};
+
+enum PropertyAnnotation {
+ PROPERTY_MAXNTID_X = 0,
+ PROPERTY_MAXNTID_Y,
+ PROPERTY_MAXNTID_Z,
+ PROPERTY_REQNTID_X,
+ PROPERTY_REQNTID_Y,
+ PROPERTY_REQNTID_Z,
+ PROPERTY_MINNCTAPERSM,
+ PROPERTY_ISTEXTURE,
+ PROPERTY_ISSURFACE,
+ PROPERTY_ISSAMPLER,
+ PROPERTY_ISREADONLY_IMAGE_PARAM,
+ PROPERTY_ISWRITEONLY_IMAGE_PARAM,
+ PROPERTY_ISKERNEL_FUNCTION,
+ PROPERTY_ALIGN,
+
+ // last property
+ PROPERTY_LAST
+};
+
+const unsigned AnnotationNameLen = 8; // length of each annotation name
+const char
+PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+ "maxntidx", // PROPERTY_MAXNTID_X
+ "maxntidy", // PROPERTY_MAXNTID_Y
+ "maxntidz", // PROPERTY_MAXNTID_Z
+ "reqntidx", // PROPERTY_REQNTID_X
+ "reqntidy", // PROPERTY_REQNTID_Y
+ "reqntidz", // PROPERTY_REQNTID_Z
+ "minctasm", // PROPERTY_MINNCTAPERSM
+ "texture", // PROPERTY_ISTEXTURE
+ "surface", // PROPERTY_ISSURFACE
+ "sampler", // PROPERTY_ISSAMPLER
+ "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
+ "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "kernel", // PROPERTY_ISKERNEL_FUNCTION
+ "align", // PROPERTY_ALIGN
+
+ // last property
+ "proplast", // PROPERTY_LAST
+};
+
+// name of named metadata used for global annotations
+#if defined(__GNUC__)
+// As this is declared to be static but some of the .cpp files that
+// include NVVM.h do not use this array, gcc gives a warning when
+// compiling those .cpp files, hence __attribute__((unused)).
+__attribute__((unused))
+#endif
+static const char* NamedMDForAnnotations = "nvvm.annotations";
+
+}
+
+
+#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
new file mode 100644
index 000000000000..1d4166575da5
--- /dev/null
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -0,0 +1,63 @@
+//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the NVPTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool CompileForDebugging;
+
+// -debug-compile - Command line option to inform opt and llc passes to
+// compile for debugging
+static cl::opt<bool, true>
+Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
+ cl::location(CompileForDebugging),
+ cl::init(false));
+
+void NVPTXMCAsmInfo::anchor() { }
+
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::nvptx64)
+ PointerSize = 8;
+
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasSingleParameterDotFile = false;
+
+ InlineAsmStart = " inline asm";
+ InlineAsmEnd = " inline asm";
+
+ SupportsDebugInformation = CompileForDebugging;
+ HasDotTypeDotSizeDirective = false;
+
+ Data8bitsDirective = " .b8 ";
+ Data16bitsDirective = " .b16 ";
+ Data32bitsDirective = " .b32 ";
+ Data64bitsDirective = " .b64 ";
+ PrivateGlobalPrefix = "";
+ ZeroDirective = " .b8";
+ AsciiDirective = " .b8";
+ AscizDirective = " .b8";
+
+ // @TODO: Can we just disable this?
+ GlobalDirective = "\t// .globl\t";
+}
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 32ca0696950e..82097daa4ae5 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===//
+//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,24 +7,24 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the declaration of the PTXMCAsmInfo class.
+// This file contains the declaration of the NVPTXMCAsmInfo class.
//
//===----------------------------------------------------------------------===//
-#ifndef PTX_MCASM_INFO_H
-#define PTX_MCASM_INFO_H
+#ifndef NVPTX_MCASM_INFO_H
+#define NVPTX_MCASM_INFO_H
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
- class Target;
- class StringRef;
+class Target;
+class StringRef;
- class PTXMCAsmInfo : public MCAsmInfo {
- virtual void anchor();
- public:
- explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
- };
+class NVPTXMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+public:
+ explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
+};
} // namespace llvm
-#endif // PTX_MCASM_INFO_H
+#endif // NVPTX_MCASM_INFO_H
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
new file mode 100644
index 000000000000..44aa01ca6e30
--- /dev/null
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -0,0 +1,91 @@
+//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides NVPTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXMCTargetDesc.h"
+#include "NVPTXMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "NVPTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "NVPTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "NVPTXGenRegisterInfo.inc"
+
+
+using namespace llvm;
+
+static MCInstrInfo *createNVPTXMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitNVPTXMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitNVPTXMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
+ createNVPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
+ createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
+ createNVPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
+ createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
+ createNVPTXMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
+ createNVPTXMCSubtargetInfo);
+
+}
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 542638ace135..af95c76f92b2 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,30 +7,30 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides PTX specific target descriptions.
+// This file provides NVPTX specific target descriptions.
//
//===----------------------------------------------------------------------===//
-#ifndef PTXMCTARGETDESC_H
-#define PTXMCTARGETDESC_H
+#ifndef NVPTXMCTARGETDESC_H
+#define NVPTXMCTARGETDESC_H
namespace llvm {
class Target;
-extern Target ThePTX32Target;
-extern Target ThePTX64Target;
+extern Target TheNVPTXTarget32;
+extern Target TheNVPTXTarget64;
} // End llvm namespace
// Defines symbolic names for PTX registers.
#define GET_REGINFO_ENUM
-#include "PTXGenRegisterInfo.inc"
+#include "NVPTXGenRegisterInfo.inc"
// Defines symbolic names for the PTX instructions.
#define GET_INSTRINFO_ENUM
-#include "PTXGenInstrInfo.inc"
+#include "NVPTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
-#include "PTXGenSubtargetInfo.inc"
+#include "NVPTXGenSubtargetInfo.inc"
#endif
diff --git a/lib/Target/PTX/Makefile b/lib/Target/NVPTX/Makefile
index fa09634965ac..8db20ebed2c2 100644
--- a/lib/Target/PTX/Makefile
+++ b/lib/Target/NVPTX/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===##
+##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -8,15 +8,15 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../..
-LIBRARYNAME = LLVMPTXCodeGen
-TARGET = PTX
+LIBRARYNAME = LLVMNVPTXCodeGen
+TARGET = NVPTX
# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = PTXGenAsmWriter.inc \
- PTXGenDAGISel.inc \
- PTXGenInstrInfo.inc \
- PTXGenRegisterInfo.inc \
- PTXGenSubtargetInfo.inc
+BUILT_SOURCES = NVPTXGenAsmWriter.inc \
+ NVPTXGenDAGISel.inc \
+ NVPTXGenInstrInfo.inc \
+ NVPTXGenRegisterInfo.inc \
+ NVPTXGenSubtargetInfo.inc
DIRS = InstPrinter TargetInfo MCTargetDesc
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
new file mode 100644
index 000000000000..b5684883fc95
--- /dev/null
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -0,0 +1,49 @@
+//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The strings allocated from a managed string pool are owned by the string
+// pool and will be deleted together with the managed string pool.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_SUPPORT_MANAGED_STRING_H
+#define LLVM_SUPPORT_MANAGED_STRING_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+
+namespace llvm {
+
+/// ManagedStringPool - The strings allocated from a managed string pool are
+/// owned by the string pool and will be deleted together with the managed
+/// string pool.
+class ManagedStringPool {
+ SmallVector<std::string *, 8> Pool;
+
+public:
+ ManagedStringPool() {}
+ ~ManagedStringPool() {
+ SmallVector<std::string *, 8>::iterator Current = Pool.begin();
+ while (Current != Pool.end()) {
+ delete *Current;
+ Current++;
+ }
+ }
+
+ std::string *getManagedString(const char *S) {
+ std::string *Str = new std::string(S);
+ Pool.push_back(Str);
+ return Str;
+ }
+};
+
+}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
new file mode 100644
index 000000000000..a8d082a4d8b0
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -0,0 +1,137 @@
+//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM NVPTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_H
+#define LLVM_TARGET_NVPTX_H
+
+#include "llvm/Value.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include <cassert>
+#include <iosfwd>
+
+namespace llvm {
+class NVPTXTargetMachine;
+class FunctionPass;
+class formatted_raw_ostream;
+
+namespace NVPTXCC {
+enum CondCodes {
+ EQ,
+ NE,
+ LT,
+ LE,
+ GT,
+ GE
+};
+}
+
+inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
+ switch (CC) {
+ case NVPTXCC::NE: return "ne";
+ case NVPTXCC::EQ: return "eq";
+ case NVPTXCC::LT: return "lt";
+ case NVPTXCC::LE: return "le";
+ case NVPTXCC::GT: return "gt";
+ case NVPTXCC::GE: return "ge";
+ }
+ llvm_unreachable("Unknown condition code");
+}
+
+FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
+FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
+FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
+FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+
+bool isImageOrSamplerVal(const Value *, const Module *);
+
+extern Target TheNVPTXTarget32;
+extern Target TheNVPTXTarget64;
+
+namespace NVPTX
+{
+enum DrvInterface {
+ NVCL,
+ CUDA,
+ TEST
+};
+
+// A field inside TSFlags needs a shift and a mask. The usage is
+// always as follows :
+// ((TSFlags & fieldMask) >> fieldShift)
+// The enum keeps the mask, the shift, and all valid values of the
+// field in one place.
+enum VecInstType {
+ VecInstTypeShift = 0,
+ VecInstTypeMask = 0xF,
+
+ VecNOP = 0,
+ VecLoad = 1,
+ VecStore = 2,
+ VecBuild = 3,
+ VecShuffle = 4,
+ VecExtract = 5,
+ VecInsert = 6,
+ VecDest = 7,
+ VecOther = 15
+};
+
+enum SimpleMove {
+ SimpleMoveMask = 0x10,
+ SimpleMoveShift = 4
+};
+enum LoadStore {
+ isLoadMask = 0x20,
+ isLoadShift = 5,
+ isStoreMask = 0x40,
+ isStoreShift = 6
+};
+
+namespace PTXLdStInstCode {
+enum AddressSpace{
+ GENERIC = 0,
+ GLOBAL = 1,
+ CONSTANT = 2,
+ SHARED = 3,
+ PARAM = 4,
+ LOCAL = 5
+};
+enum FromType {
+ Unsigned = 0,
+ Signed,
+ Float
+};
+enum VecType {
+ Scalar = 1,
+ V2 = 2,
+ V4 = 4
+};
+}
+}
+} // end namespace llvm;
+
+// Defines symbolic names for NVPTX registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "NVPTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the NVPTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "NVPTXGenInstrInfo.inc"
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
new file mode 100644
index 000000000000..ae7710e54f08
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -0,0 +1,44 @@
+//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the NVPTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "NVPTXRegisterInfo.td"
+include "NVPTXInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+// - We use the SM version number instead of explicit feature table.
+// - Need at least one feature to avoid generating zero sized array by
+// TableGen in NVPTXGenSubtarget.inc.
+//===----------------------------------------------------------------------===//
+def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">;
+
+//===----------------------------------------------------------------------===//
+// NVPTX supported processors.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"sm_10", [FeatureDummy]>;
+
+
+def NVPTXInstrInfo : InstrInfo {
+}
+
+def NVPTX : Target {
+ let InstructionSet = NVPTXInstrInfo;
+}
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
new file mode 100644
index 000000000000..668c39308f71
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -0,0 +1,48 @@
+//===-- AllocaHoisting.cpp - Hoist allocas to the entry block --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "NVPTXAllocaHoisting.h"
+
+namespace llvm {
+
+bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
+ bool functionModified = false;
+ Function::iterator I = function.begin();
+ TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+
+ for (Function::iterator E = function.end(); I != E; ++I) {
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++);
+ if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) {
+ allocaInst->moveBefore(firstTerminatorInst);
+ functionModified = true;
+ }
+ }
+ }
+
+ return functionModified;
+}
+
+char NVPTXAllocaHoisting::ID = 1;
+RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
+ "Hoisting alloca instructions in non-entry "
+ "blocks to the entry block");
+
+FunctionPass *createAllocaHoisting() {
+ return new NVPTXAllocaHoisting();
+}
+
+} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
new file mode 100644
index 000000000000..24b3bd589812
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -0,0 +1,49 @@
+//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_ALLOCA_HOISTING_H_
+#define NVPTX_ALLOCA_HOISTING_H_
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class FunctionPass;
+class Function;
+
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+
+ virtual const char *getPassName() const {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ virtual bool runOnFunction(Function &function);
+};
+
+extern FunctionPass *createAllocaHoisting();
+
+} // end namespace llvm
+
+#endif // NVPTX_ALLOCA_HOISTING_H_
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
new file mode 100644
index 000000000000..f2b96163f43d
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -0,0 +1,2064 @@
+//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to NVPTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXAsmPrinter.h"
+#include "NVPTX.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTXNumRegisters.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Assembly/Writer.h"
+#include "cl_common_defines.h"
+#include <sstream>
+using namespace llvm;
+
+
+#include "NVPTXGenAsmWriter.inc"
+
+bool RegAllocNilUsed = true;
+
+#define DEPOTNAME "__local_depot"
+
+static cl::opt<bool>
+EmitLineNumbers("nvptx-emit-line-numbers",
+ cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
+ cl::init(true));
+
+namespace llvm {
+bool InterleaveSrcInPtx = false;
+}
+
+static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Emit source line in ptx file"),
+ cl::location(llvm::InterleaveSrcInPtx));
+
+
+
+
+// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
+// cannot just link to the existing version.
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+using namespace nvptx;
+const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0)
+ llvm_unreachable("Unknown constant value to lower!");
+
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using TargetData as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+ if (C != CE)
+ return LowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ case Instruction::GetElementPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Generate a symbolic expression for the byte address
+ const Constant *PtrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+
+ const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ if (Offset == 0)
+ return Base;
+
+ // Truncate/sext the offset to the pointer size.
+ if (TD.getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD.getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return LowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return LowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
+{
+ if (!EmitLineNumbers)
+ return;
+ if (ignoreLoc(MI))
+ return;
+
+ DebugLoc curLoc = MI.getDebugLoc();
+
+ if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+ return;
+
+ if (prevDebugLoc == curLoc)
+ return;
+
+ prevDebugLoc = curLoc;
+
+ if (curLoc.isUnknown())
+ return;
+
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ //const TargetMachine &TM = MF->getTarget();
+
+ const LLVMContext &ctx = MF->getFunction()->getContext();
+ DIScope Scope(curLoc.getScope(ctx));
+
+ if (!Scope.Verify())
+ return;
+
+ StringRef fileName(Scope.getFilename());
+ StringRef dirName(Scope.getDirectory());
+ SmallString<128> FullPathName = dirName;
+ if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
+ sys::path::append(FullPathName, fileName);
+ fileName = FullPathName.str();
+ }
+
+ if (filenameMap.find(fileName.str()) == filenameMap.end())
+ return;
+
+
+ // Emit the line from the source file.
+ if (llvm::InterleaveSrcInPtx)
+ this->emitSrcInText(fileName.str(), curLoc.getLine());
+
+ std::stringstream temp;
+ temp << "\t.loc " << filenameMap[fileName.str()]
+ << " " << curLoc.getLine() << " " << curLoc.getCol();
+ OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+}
+
+void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ emitLineNumberAsDotLoc(*MI);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+}
+
+void NVPTXAsmPrinter::printReturnValStr(const Function *F,
+ raw_ostream &O)
+{
+ const TargetData *TD = TM.getTargetData();
+ const TargetLowering *TLI = TM.getTargetLowering();
+
+ Type *Ty = F->getReturnType();
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ if (Ty->getTypeID() == Type::VoidTyID)
+ return;
+
+ O << " (";
+
+ if (isABI) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ size = ITy->getBitWidth();
+ if (size < 32) size = 32;
+ } else {
+ assert(Ty->isFloatingPointTy() &&
+ "Floating point type expected here");
+ size = Ty->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " func_retval0";
+ }
+ else if (isa<PointerType>(Ty)) {
+ O << ".param .b" << TLI->getPointerTy().getSizeInBits()
+ << " func_retval0";
+ } else {
+ if ((Ty->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(Ty)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, Ty, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ totalsz += sz/8;
+ }
+ }
+ unsigned retAlignment = 0;
+ if (!llvm::getAlign(*F, 0, retAlignment))
+ retAlignment = TD->getABITypeAlignment(Ty);
+ O << ".param .align "
+ << retAlignment
+ << " .b8 func_retval0["
+ << totalsz << "]";
+ } else
+ assert(false &&
+ "Unknown return type");
+ }
+ } else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, Ty, vtparts);
+ unsigned idx = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << ".reg .b" << sz << " func_retval" << idx;
+ if (j<je-1) O << ", ";
+ ++idx;
+ }
+ if (i < e-1)
+ O << ", ";
+ }
+ }
+ O << ") ";
+ return;
+}
+
+void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
+ raw_ostream &O) {
+ const Function *F = MF.getFunction();
+ printReturnValStr(F, O);
+}
+
+void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Set up
+ MRI = &MF->getRegInfo();
+ F = MF->getFunction();
+ emitLinkageDirective(F,O);
+ if (llvm::isKernelFunction(*F))
+ O << ".entry ";
+ else {
+ O << ".func ";
+ printReturnValStr(*MF, O);
+ }
+
+ O << *CurrentFnSym;
+
+ emitFunctionParamList(*MF, O);
+
+ if (llvm::isKernelFunction(*F))
+ emitKernelFunctionDirectives(*F, O);
+
+ OutStreamer.EmitRawText(O.str());
+
+ prevDebugLoc = DebugLoc();
+}
+
+void NVPTXAsmPrinter::EmitFunctionBodyStart() {
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ unsigned numRegClasses = TRI.getNumRegClasses();
+ VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+ OutStreamer.EmitRawText(StringRef("{\n"));
+ setAndEmitFunctionVirtualRegisters(*MF);
+
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+ emitDemotedVars(MF->getFunction(), O);
+ OutStreamer.EmitRawText(O.str());
+}
+
+void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(StringRef("}\n"));
+ delete []VRidGlobal2LocalMap;
+}
+
+
+void
+NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
+ raw_ostream &O) const {
+ // If the NVVM IR has some of reqntid* specified, then output
+ // the reqntid directive, and set the unspecified ones to 1.
+ // If none of reqntid* is specified, don't output reqntid directive.
+ unsigned reqntidx, reqntidy, reqntidz;
+ bool specified = false;
+ if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
+ else specified = true;
+ if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
+ else specified = true;
+ if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
+ else specified = true;
+
+ if (specified)
+ O << ".reqntid " << reqntidx << ", "
+ << reqntidy << ", " << reqntidz << "\n";
+
+ // If the NVVM IR has some of maxntid* specified, then output
+ // the maxntid directive, and set the unspecified ones to 1.
+ // If none of maxntid* is specified, don't output maxntid directive.
+ unsigned maxntidx, maxntidy, maxntidz;
+ specified = false;
+ if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
+ else specified = true;
+ if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
+ else specified = true;
+ if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
+ else specified = true;
+
+ if (specified)
+ O << ".maxntid " << maxntidx << ", "
+ << maxntidy << ", " << maxntidz << "\n";
+
+ unsigned mincta;
+ if (llvm::getMinCTASm(F, mincta))
+ O << ".minnctapersm " << mincta << "\n";
+}
+
+void
+NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+ raw_ostream &O) {
+ const TargetRegisterClass * RC = MRI->getRegClass(vr);
+ unsigned id = RC->getID();
+
+ std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
+ unsigned mapped_vr = regmap[vr];
+
+ if (!isVec) {
+ O << getNVPTXRegClassStr(RC) << mapped_vr;
+ return;
+ }
+ // Vector virtual register
+ if (getNVPTXVectorSize(RC) == 4)
+ O << "{"
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
+ << "}";
+ else if (getNVPTXVectorSize(RC) == 2)
+ O << "{"
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
+ << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
+ << "}";
+ else
+ llvm_unreachable("Unsupported vector size");
+}
+
+void
+NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+ raw_ostream &O) {
+ getVirtualRegisterName(vr, isVec, O);
+}
+
+void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
+ const char *Modifier,
+ raw_ostream &O) {
+ static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
+ int Imm = (int)MO.getImm();
+ if(0 == strcmp(Modifier, "vecelem"))
+ O << "_" << vecelem[Imm];
+ else if(0 == strcmp(Modifier, "vecv4comm1")) {
+ if((Imm < 0) || (Imm > 3))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv4comm2")) {
+ if((Imm < 4) || (Imm > 7))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv4pos")) {
+ if(Imm < 0) Imm = 0;
+ O << "_" << vecelem[Imm%4];
+ }
+ else if(0 == strcmp(Modifier, "vecv2comm1")) {
+ if((Imm < 0) || (Imm > 1))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv2comm2")) {
+ if((Imm < 2) || (Imm > 3))
+ O << "//";
+ }
+ else if(0 == strcmp(Modifier, "vecv2pos")) {
+ if(Imm < 0) Imm = 0;
+ O << "_" << vecelem[Imm%2];
+ }
+ else
+ llvm_unreachable("Unknown Modifier on immediate operand");
+}
+
+void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.getReg() == NVPTX::VRDepot)
+ O << DEPOTNAME << getFunctionNumber();
+ else
+ O << getRegisterName(MO.getReg());
+ } else {
+ if (!Modifier)
+ emitVirtualRegister(MO.getReg(), false, O);
+ else {
+ if (strcmp(Modifier, "vecfull") == 0)
+ emitVirtualRegister(MO.getReg(), true, O);
+ else
+ llvm_unreachable(
+ "Don't know how to handle the modifier on virtual register.");
+ }
+ }
+ return;
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier)
+ O << MO.getImm();
+ else if (strstr(Modifier, "vec") == Modifier)
+ printVecModifiedImmediate(MO, Modifier, O);
+ else
+ llvm_unreachable("Don't know how to handle modifier on immediate operand");
+ return;
+
+ case MachineOperand::MO_FPImmediate:
+ printFPConstant(MO.getFPImm(), O);
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol: {
+ const char * symbname = MO.getSymbolName();
+ if (strstr(symbname, ".PARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname+6, "%u[];", &index);
+ printParamName(index, O);
+ }
+ else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname+9, "%u[];", &index);
+ O << *CurrentFnSym << "_param_" << index << "_offset";
+ }
+ else
+ O << symbname;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ default:
+ llvm_unreachable("Operand type not supported.");
+ }
+}
+
+void NVPTXAsmPrinter::
+printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+#ifndef __OPTIMIZE__
+ O << "\t// Implicit def :";
+ //printOperand(MI, 0);
+ O << "\n";
+#endif
+}
+
+void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ if (Modifier && !strcmp(Modifier, "add")) {
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+ } else {
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print ',0' or '+0'
+ O << "+";
+ printOperand(MI, opNum+1, O);
+ }
+}
+
+void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier)
+{
+ if (Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ int Imm = (int)MO.getImm();
+ if (!strcmp(Modifier, "volatile")) {
+ if (Imm)
+ O << ".volatile";
+ } else if (!strcmp(Modifier, "addsp")) {
+ switch (Imm) {
+ case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
+ case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
+ case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
+ case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
+ case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+ case NVPTX::PTXLdStInstCode::GENERIC:
+ if (!nvptxSubtarget.hasGenericLdSt())
+ O << ".global";
+ break;
+ default:
+ assert("wrong value");
+ }
+ }
+ else if (!strcmp(Modifier, "sign")) {
+ if (Imm==NVPTX::PTXLdStInstCode::Signed)
+ O << "s";
+ else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+ O << "u";
+ else
+ O << "f";
+ }
+ else if (!strcmp(Modifier, "vec")) {
+ if (Imm==NVPTX::PTXLdStInstCode::V2)
+ O << ".v2";
+ else if (Imm==NVPTX::PTXLdStInstCode::V4)
+ O << ".v4";
+ }
+ else
+ assert("unknown modifier");
+ }
+ else
+ assert("unknown modifier");
+}
+
+void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+
+ emitLinkageDirective(F,O);
+ if (llvm::isKernelFunction(*F))
+ O << ".entry ";
+ else
+ O << ".func ";
+ printReturnValStr(F, O);
+ O << *CurrentFnSym << "\n";
+ emitFunctionParamList(F, O);
+ O << ";\n";
+}
+
+static bool usedInGlobalVarDef(const Constant *C)
+{
+ if (!C)
+ return false;
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->getName().str() == "llvm.used")
+ return false;
+ return true;
+ }
+
+ for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
+ ui!=ue; ++ui) {
+ const Constant *C = dyn_cast<Constant>(*ui);
+ if (usedInGlobalVarDef(C))
+ return true;
+ }
+ return false;
+}
+
+static bool usedInOneFunc(const User *U, Function const *&oneFunc)
+{
+ if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
+ if (othergv->getName().str() == "llvm.used")
+ return true;
+ }
+
+ if (const Instruction *instr = dyn_cast<Instruction>(U)) {
+ if (instr->getParent() && instr->getParent()->getParent()) {
+ const Function *curFunc = instr->getParent()->getParent();
+ if (oneFunc && (curFunc != oneFunc))
+ return false;
+ oneFunc = curFunc;
+ return true;
+ }
+ else
+ return false;
+ }
+
+ if (const MDNode *md = dyn_cast<MDNode>(U))
+ if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
+ (md->getName().str() == "llvm.dbg.sp")))
+ return true;
+
+
+ for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
+ ui!=ue; ++ui) {
+ if (usedInOneFunc(*ui, oneFunc) == false)
+ return false;
+ }
+ return true;
+}
+
+/* Find out if a global variable can be demoted to local scope.
+ * Currently, this is valid for CUDA shared variables, which have local
+ * scope and global lifetime. So the conditions to check are :
+ * 1. Is the global variable in shared address space?
+ * 2. Does it have internal linkage?
+ * 3. Is the global variable referenced only in one function?
+ */
+static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
+ if (gv->hasInternalLinkage() == false)
+ return false;
+ const PointerType *Pty = gv->getType();
+ if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
+ return false;
+
+ const Function *oneFunc = 0;
+
+ bool flag = usedInOneFunc(gv, oneFunc);
+ if (flag == false)
+ return false;
+ if (!oneFunc)
+ return false;
+ f = oneFunc;
+ return true;
+}
+
+static bool useFuncSeen(const Constant *C,
+ llvm::DenseMap<const Function *, bool> &seenMap) {
+ for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
+ ui!=ue; ++ui) {
+ if (const Constant *cu = dyn_cast<Constant>(*ui)) {
+ if (useFuncSeen(cu, seenMap))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
+ const BasicBlock *bb = I->getParent();
+ if (!bb) continue;
+ const Function *caller = bb->getParent();
+ if (!caller) continue;
+ if (seenMap.find(caller) != seenMap.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+ llvm::DenseMap<const Function *, bool> seenMap;
+ for (Module::const_iterator FI=M.begin(), FE=M.end();
+ FI!=FE; ++FI) {
+ const Function *F = FI;
+
+ if (F->isDeclaration()) {
+ if (F->use_empty())
+ continue;
+ if (F->getIntrinsicID())
+ continue;
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ continue;
+ }
+ for (Value::const_use_iterator iter=F->use_begin(),
+ iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+ if (const Constant *C = dyn_cast<Constant>(*iter)) {
+ if (usedInGlobalVarDef(C)) {
+ // The use is in the initialization of a global variable
+ // that is a function pointer, so print a declaration
+ // for the original function
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ // Emit a declaration of this function if the function that
+ // uses this constant expr has already been seen.
+ if (useFuncSeen(C, seenMap)) {
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ }
+
+ if (!isa<Instruction>(*iter)) continue;
+ const Instruction *instr = cast<Instruction>(*iter);
+ const BasicBlock *bb = instr->getParent();
+ if (!bb) continue;
+ const Function *caller = bb->getParent();
+ if (!caller) continue;
+
+ // If a caller has already been seen, then the caller is
+ // appearing in the module before the callee. so print out
+ // a declaration for the callee.
+ if (seenMap.find(caller) != seenMap.end()) {
+ CurrentFnSym = Mang->getSymbol(F);
+ emitDeclaration(F, O);
+ break;
+ }
+ }
+ seenMap[F] = true;
+ }
+}
+
+void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(M);
+
+ unsigned i=1;
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ DICompileUnit DIUnit(*I);
+ StringRef Filename(DIUnit.getFilename());
+ StringRef Dirname(DIUnit.getDirectory());
+ SmallString<128> FullPathName = Dirname;
+ if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
+ sys::path::append(FullPathName, Filename);
+ Filename = FullPathName.str();
+ }
+ if (filenameMap.find(Filename.str()) != filenameMap.end())
+ continue;
+ filenameMap[Filename.str()] = i;
+ OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+ ++i;
+ }
+
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end(); I != E; ++I) {
+ DISubprogram SP(*I);
+ StringRef Filename(SP.getFilename());
+ StringRef Dirname(SP.getDirectory());
+ SmallString<128> FullPathName = Dirname;
+ if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
+ sys::path::append(FullPathName, Filename);
+ Filename = FullPathName.str();
+ }
+ if (filenameMap.find(Filename.str()) != filenameMap.end())
+ continue;
+ filenameMap[Filename.str()] = i;
+ ++i;
+ }
+}
+
+bool NVPTXAsmPrinter::doInitialization (Module &M) {
+
+ SmallString<128> Str1;
+ raw_svector_ostream OS1(Str1);
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // We need to call the parent's one explicitly.
+ //bool Result = AsmPrinter::doInitialization(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getTargetData());
+
+ // Emit header before any dwarf directives are emitted below.
+ emitHeader(M, OS1);
+ OutStreamer.EmitRawText(OS1.str());
+
+
+ // Already commented out
+ //bool Result = AsmPrinter::doInitialization(M);
+
+
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ recordAndEmitFilenames(M);
+
+ SmallString<128> Str2;
+ raw_svector_ostream OS2(Str2);
+
+ emitDeclarations(M, OS2);
+
+ // Print out module-level global variables here.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I, OS2);
+
+ OS2 << '\n';
+
+ OutStreamer.EmitRawText(OS2.str());
+ return false; // success
+}
+
+void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+ O << "//\n";
+ O << "// Generated by LLVM NVPTX Back-End\n";
+ O << "//\n";
+ O << "\n";
+
+ O << ".version 3.0\n";
+
+ O << ".target ";
+ O << nvptxSubtarget.getTargetName();
+
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
+ O << ", texmode_independent";
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (!nvptxSubtarget.hasDouble())
+ O << ", map_f64_to_f32";
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ O << ", debug";
+
+ O << "\n";
+
+ O << ".address_size ";
+ if (nvptxSubtarget.is64Bit())
+ O << "64";
+ else
+ O << "32";
+ O << "\n";
+
+ O << "\n";
+}
+
+bool NVPTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable* [n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i ++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+
+
+ //bool Result = AsmPrinter::doFinalization(M);
+ // Instead of calling the parents doFinalization, we may
+ // clone parents doFinalization and customize here.
+ // Currently, we if NVISA out the EmitGlobals() in
+ // parent's doFinalization, which is too intrusive.
+ //
+ // Same for the doInitialization.
+ //return Result;
+}
+
+// This function emits appropriate linkage directives for
+// functions and global variables.
+//
+// extern function declaration -> .extern
+// extern function definition -> .visible
+// external global variable with init -> .visible
+// external without init -> .extern
+// appending -> not allowed, assert.
+
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
+{
+ if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (V->hasExternalLinkage()) {
+ if (isa<GlobalVariable>(V)) {
+ const GlobalVariable *GVar = cast<GlobalVariable>(V);
+ if (GVar) {
+ if (GVar->hasInitializer())
+ O << ".visible ";
+ else
+ O << ".extern ";
+ }
+ } else if (V->isDeclaration())
+ O << ".extern ";
+ else
+ O << ".visible ";
+ } else if (V->hasAppendingLinkage()) {
+ std::string msg;
+ msg.append("Error: ");
+ msg.append("Symbol ");
+ if (V->hasName())
+ msg.append(V->getName().str());
+ msg.append("has unsupported appending linkage type");
+ llvm_unreachable(msg.c_str());
+ }
+ }
+}
+
+
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+ bool processDemoted) {
+
+ // Skip meta data
+ if (GVar->hasSection()) {
+ if (GVar->getSection() == "llvm.metadata")
+ return;
+ }
+
+ const TargetData *TD = TM.getTargetData();
+
+ // GlobalVariables are always constant pointers themselves.
+ const PointerType *PTy = GVar->getType();
+ Type *ETy = PTy->getElementType();
+
+ if (GVar->hasExternalLinkage()) {
+ if (GVar->hasInitializer())
+ O << ".visible ";
+ else
+ O << ".extern ";
+ }
+
+ if (llvm::isTexture(*GVar)) {
+ O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
+ return;
+ }
+
+ if (llvm::isSurface(*GVar)) {
+ O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
+ return;
+ }
+
+ if (GVar->isDeclaration()) {
+ // (extern) declarations, no definition or initializer
+ // Currently the only known declaration is for an automatic __local
+ // (.shared) promoted to global.
+ emitPTXGlobalVariable(GVar, O);
+ O << ";\n";
+ return;
+ }
+
+ if (llvm::isSampler(*GVar)) {
+ O << ".global .samplerref " << llvm::getSamplerName(*GVar);
+
+ Constant *Initializer = NULL;
+ if (GVar->hasInitializer())
+ Initializer = GVar->getInitializer();
+ ConstantInt *CI = NULL;
+ if (Initializer)
+ CI = dyn_cast<ConstantInt>(Initializer);
+ if (CI) {
+ unsigned sample=CI->getZExtValue();
+
+ O << " = { ";
+
+ for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
+ __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+ O << "addr_mode_" << i << " = ";
+ switch (addr) {
+ case 0: O << "wrap"; break;
+ case 1: O << "clamp_to_border"; break;
+ case 2: O << "clamp_to_edge"; break;
+ case 3: O << "wrap"; break;
+ case 4: O << "mirror"; break;
+ }
+ O <<", ";
+ }
+ O << "filter_mode = ";
+ switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
+ case 0: O << "nearest"; break;
+ case 1: O << "linear"; break;
+ case 2: assert ( 0 && "Anisotropic filtering is not supported");
+ default: O << "nearest"; break;
+ }
+ if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+ O << ", force_unnormalized_coords = 1";
+ }
+ O << " }";
+ }
+
+ O << ";\n";
+ return;
+ }
+
+ if (GVar->hasPrivateLinkage()) {
+
+ if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
+ return;
+
+ // FIXME - need better way (e.g. Metadata) to avoid generating this global
+ if (!strncmp(GVar->getName().data(), "filename", 8))
+ return;
+ if (GVar->use_empty())
+ return;
+ }
+
+ const Function *demotedFunc = 0;
+ if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
+ O << "// " << GVar->getName().str() << " has been demoted\n";
+ if (localDecls.find(demotedFunc) != localDecls.end())
+ localDecls[demotedFunc].push_back(GVar);
+ else {
+ std::vector<GlobalVariable *> temp;
+ temp.push_back(GVar);
+ localDecls[demotedFunc] = temp;
+ }
+ return;
+ }
+
+ O << ".";
+ emitPTXAddressSpace(PTy->getAddressSpace(), O);
+ if (GVar->getAlignment() == 0)
+ O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ else
+ O << " .align " << GVar->getAlignment();
+
+
+ if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
+ O << " .";
+ O << getPTXFundamentalTypeStr(ETy, false);
+ O << " ";
+ O << *Mang->getSymbol(GVar);
+
+ // Ptx allows variable initilization only for constant and global state
+ // spaces.
+ if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
+ && GVar->hasInitializer()) {
+ Constant *Initializer = GVar->getInitializer();
+ if (!Initializer->isNullValue()) {
+ O << " = " ;
+ printScalarConstant(Initializer, O);
+ }
+ }
+ } else {
+ unsigned int ElementSize =0;
+
+ // Although PTX has direct support for struct type and array type and
+ // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
+ // targets that support these high level field accesses. Structs, arrays
+ // and vectors are lowered into arrays of bytes.
+ switch (ETy->getTypeID()) {
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ ElementSize = TD->getTypeStoreSize(ETy);
+ // Ptx allows variable initilization only for constant and
+ // global state spaces.
+ if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
+ && GVar->hasInitializer()) {
+ Constant *Initializer = GVar->getInitializer();
+ if (!isa<UndefValue>(Initializer) &&
+ !Initializer->isNullValue()) {
+ AggBuffer aggBuffer(ElementSize, O, *this);
+ bufferAggregateConstant(Initializer, &aggBuffer);
+ if (aggBuffer.numSymbols) {
+ if (nvptxSubtarget.is64Bit()) {
+ O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << ElementSize/8;
+ }
+ else {
+ O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << ElementSize/4;
+ }
+ O << "]";
+ }
+ else {
+ O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << ElementSize;
+ O << "]";
+ }
+ O << " = {" ;
+ aggBuffer.print();
+ O << "}";
+ }
+ else {
+ O << " .b8 " << *Mang->getSymbol(GVar) ;
+ if (ElementSize) {
+ O <<"[" ;
+ O << ElementSize;
+ O << "]";
+ }
+ }
+ }
+ else {
+ O << " .b8 " << *Mang->getSymbol(GVar);
+ if (ElementSize) {
+ O <<"[" ;
+ O << ElementSize;
+ O << "]";
+ }
+ }
+ break;
+ default:
+ assert( 0 && "type not supported yet");
+ }
+
+ }
+ O << ";\n";
+}
+
+void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
+ if (localDecls.find(f) == localDecls.end())
+ return;
+
+ std::vector<GlobalVariable *> &gvars = localDecls[f];
+
+ for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+ O << "\t// demoted variable\n\t";
+ printModuleLevelGV(gvars[i], O, true);
+ }
+}
+
+void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
+ raw_ostream &O) const {
+ switch (AddressSpace) {
+ case llvm::ADDRESS_SPACE_LOCAL:
+ O << "local" ;
+ break;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ O << "global" ;
+ break;
+ case llvm::ADDRESS_SPACE_CONST:
+ // This logic should be consistent with that in
+ // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
+ if (nvptxSubtarget.hasGenericLdSt())
+ O << "global" ;
+ else
+ O << "const" ;
+ break;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ O << "const" ;
+ break;
+ case llvm::ADDRESS_SPACE_SHARED:
+ O << "shared" ;
+ break;
+ default:
+ llvm_unreachable("unexpected address space");
+ }
+}
+
+std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
+ bool useB4PTR) const {
+ switch (Ty->getTypeID()) {
+ default:
+ llvm_unreachable("unexpected type");
+ break;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return "pred";
+ else if (NumBits <= 64) {
+ std::string name = "u";
+ return name + utostr(NumBits);
+ } else {
+ llvm_unreachable("Integer too large");
+ break;
+ }
+ break;
+ }
+ case Type::FloatTyID:
+ return "f32";
+ case Type::DoubleTyID:
+ return "f64";
+ case Type::PointerTyID:
+ if (nvptxSubtarget.is64Bit())
+ if (useB4PTR) return "b64";
+ else return "u64";
+ else
+ if (useB4PTR) return "b32";
+ else return "u32";
+ }
+ llvm_unreachable("unexpected type");
+ return NULL;
+}
+
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+ raw_ostream &O) {
+
+ const TargetData *TD = TM.getTargetData();
+
+ // GlobalVariables are always constant pointers themselves.
+ const PointerType *PTy = GVar->getType();
+ Type *ETy = PTy->getElementType();
+
+ O << ".";
+ emitPTXAddressSpace(PTy->getAddressSpace(), O);
+ if (GVar->getAlignment() == 0)
+ O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ else
+ O << " .align " << GVar->getAlignment();
+
+ if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
+ O << " .";
+ O << getPTXFundamentalTypeStr(ETy);
+ O << " ";
+ O << *Mang->getSymbol(GVar);
+ return;
+ }
+
+ int64_t ElementSize =0;
+
+ // Although PTX has direct support for struct type and array type and LLVM IR
+ // is very similar to PTX, the LLVM CodeGen does not support for targets that
+ // support these high level field accesses. Structs and arrays are lowered
+ // into arrays of bytes.
+ switch (ETy->getTypeID()) {
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ ElementSize = TD->getTypeStoreSize(ETy);
+ O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ if (ElementSize) {
+ O << itostr(ElementSize) ;
+ }
+ O << "]";
+ break;
+ default:
+ assert( 0 && "type not supported yet");
+ }
+ return ;
+}
+
+
+static unsigned int
+getOpenCLAlignment(const TargetData *TD,
+ Type *Ty) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
+ return TD->getPrefTypeAlignment(Ty);
+
+ const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (ATy)
+ return getOpenCLAlignment(TD, ATy->getElementType());
+
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (VTy) {
+ Type *ETy = VTy->getElementType();
+ unsigned int numE = VTy->getNumElements();
+ unsigned int alignE = TD->getPrefTypeAlignment(ETy);
+ if (numE == 3)
+ return 4*alignE;
+ else
+ return numE*alignE;
+ }
+
+ const StructType *STy = dyn_cast<StructType>(Ty);
+ if (STy) {
+ unsigned int alignStruct = 1;
+ // Go through each element of the struct and find the
+ // largest alignment.
+ for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+ Type *ETy = STy->getElementType(i);
+ unsigned int align = getOpenCLAlignment(TD, ETy);
+ if (align > alignStruct)
+ alignStruct = align;
+ }
+ return alignStruct;
+ }
+
+ const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+ if (FTy)
+ return TD->getPointerPrefAlignment();
+ return TD->getPrefTypeAlignment(Ty);
+}
+
+void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
+ int paramIndex, raw_ostream &O) {
+ if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
+ (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ else {
+ std::string argName = I->getName();
+ const char *p = argName.c_str();
+ while (*p) {
+ if (*p == '.')
+ O << "_";
+ else
+ O << *p;
+ p++;
+ }
+ }
+}
+
+void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
+ Function::const_arg_iterator I, E;
+ int i = 0;
+
+ if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
+ (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ return;
+ }
+
+ for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
+ if (i==paramIndex) {
+ printParamName(I, paramIndex, O);
+ return;
+ }
+ }
+ llvm_unreachable("paramIndex out of bound");
+}
+
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
+ raw_ostream &O) {
+ const TargetData *TD = TM.getTargetData();
+ const AttrListPtr &PAL = F->getAttributes();
+ const TargetLowering *TLI = TM.getTargetLowering();
+ Function::const_arg_iterator I, E;
+ unsigned paramIndex = 0;
+ bool first = true;
+ bool isKernelFunc = llvm::isKernelFunction(*F);
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ MVT thePointerTy = TLI->getPointerTy();
+
+ O << "(\n";
+
+ for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
+ const Type *Ty = I->getType();
+
+ if (!first)
+ O << ",\n";
+
+ first = false;
+
+ // Handle image/sampler parameters
+ if (llvm::isSampler(*I) || llvm::isImage(*I)) {
+ if (llvm::isImage(*I)) {
+ std::string sname = I->getName();
+ if (llvm::isImageWriteOnly(*I))
+ O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+ else // Default image is read_only
+ O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ else // Should be llvm::isSampler(*I)
+ O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+ << paramIndex;
+ continue;
+ }
+
+ if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) {
+ // Just a scalar
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (isKernelFunc) {
+ if (PTy) {
+ // Special handling for pointer arguments to kernel
+ O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
+
+ if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
+ Type *ETy = PTy->getElementType();
+ int addrSpace = PTy->getAddressSpace();
+ switch(addrSpace) {
+ default:
+ O << ".ptr ";
+ break;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ O << ".ptr .const ";
+ break;
+ case llvm::ADDRESS_SPACE_SHARED:
+ O << ".ptr .shared ";
+ break;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ case llvm::ADDRESS_SPACE_CONST:
+ O << ".ptr .global ";
+ break;
+ }
+ O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+ }
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+
+ // non-pointer scalar to kernel func
+ O << "\t.param ."
+ << getPTXFundamentalTypeStr(Ty) << " ";
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+ // Non-kernel function, just print .param .b<size> for ABI
+ // and .reg .b<size> for non ABY
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32) sz = 32;
+ }
+ else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ if (isABI)
+ O << "\t.param .b" << sz << " ";
+ else
+ O << "\t.reg .b" << sz << " ";
+ printParamName(I, paramIndex, O);
+ continue;
+ }
+
+ // param has byVal attribute. So should be a pointer
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy &&
+ "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ if (isABI || isKernelFunc) {
+ // Just print .param .b8 .align <a> .param[size];
+ // <a> = PAL.getparamalignment
+ // size = typeallocsize of element type
+ unsigned align = PAL.getParamAlignment(paramIndex+1);
+ unsigned sz = TD->getTypeAllocSize(ETy);
+ O << "\t.param .align " << align
+ << " .b8 ";
+ printParamName(I, paramIndex, O);
+ O << "[" << sz << "]";
+ continue;
+ } else {
+ // Split the ETy into constituent parts and
+ // print .param .b<size> <name> for each part.
+ // Further, if a part is vector, print the above for
+ // each vector element.
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*TLI, ETy, vtparts);
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0,je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << "\t.reg .b" << sz << " ";
+ printParamName(I, paramIndex, O);
+ if (j<je-1) O << ",\n";
+ ++paramIndex;
+ }
+ if (i<e-1)
+ O << ",\n";
+ }
+ --paramIndex;
+ continue;
+ }
+ }
+
+ O << "\n)\n";
+}
+
+void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
+ raw_ostream &O) {
+ const Function *F = MF.getFunction();
+ emitFunctionParamList(F, O);
+}
+
+
+void NVPTXAsmPrinter::
+setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ // Map the global virtual register number to a register class specific
+ // virtual register number starting from 1 with that class.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ //unsigned numRegClasses = TRI->getNumRegClasses();
+
+ // Emit the Fake Stack Object
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+ if (NumBytes) {
+ O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
+ << DEPOTNAME
+ << getFunctionNumber() << "[" << NumBytes << "];\n";
+ if (nvptxSubtarget.is64Bit()) {
+ O << "\t.reg .b64 \t%SP;\n";
+ O << "\t.reg .b64 \t%SPL;\n";
+ }
+ else {
+ O << "\t.reg .b32 \t%SP;\n";
+ O << "\t.reg .b32 \t%SPL;\n";
+ }
+ }
+
+ // Go through all virtual registers to establish the mapping between the
+ // global virtual
+ // register number and the per class virtual register number.
+ // We use the per class virtual register number in the ptx output.
+ unsigned int numVRs = MRI->getNumVirtRegs();
+ for (unsigned i=0; i< numVRs; i++) {
+ unsigned int vr = TRI->index2VirtReg(i);
+ const TargetRegisterClass *RC = MRI->getRegClass(vr);
+ std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
+ int n = regmap.size();
+ regmap.insert(std::make_pair(vr, n+1));
+ }
+
+ // Emit register declarations
+ // @TODO: Extract out the real register usage
+ O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
+ O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
+
+ // Emit declaration of the virtual registers or 'physical' registers for
+ // each register class
+ //for (unsigned i=0; i< numRegClasses; i++) {
+ // std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
+ // const TargetRegisterClass *RC = TRI->getRegClass(i);
+ // std::string rcname = getNVPTXRegClassName(RC);
+ // std::string rcStr = getNVPTXRegClassStr(RC);
+ // //int n = regmap.size();
+ // if (!isNVPTXVectorRegClass(RC)) {
+ // O << "\t.reg " << rcname << " \t" << rcStr << "<"
+ // << NVPTXNumRegisters << ">;\n";
+ // }
+
+ // Only declare those registers that may be used. And do not emit vector
+ // registers as
+ // they are all elementized to scalar registers.
+ //if (n && !isNVPTXVectorRegClass(RC)) {
+ // if (RegAllocNilUsed) {
+ // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
+ // << ">;\n";
+ // }
+ // else {
+ // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
+ // << "<" << 32 << ">;\n";
+ // }
+ //}
+ //}
+
+ OutStreamer.EmitRawText(O.str());
+}
+
+
+void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
+ APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
+ bool ignored;
+ unsigned int numHex;
+ const char *lead;
+
+ if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+ numHex = 8;
+ lead = "0f";
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &ignored);
+ } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
+ numHex = 16;
+ lead = "0d";
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ } else
+ llvm_unreachable("unsupported fp type");
+
+ APInt API = APF.bitcastToAPInt();
+ std::string hexstr(utohexstr(API.getZExtValue()));
+ O << lead;
+ if (hexstr.length() < numHex)
+ O << std::string(numHex - hexstr.length(), '0');
+ O << utohexstr(API.getZExtValue());
+}
+
+void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ O << CI->getValue();
+ return;
+ }
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+ printFPConstant(CFP, O);
+ return;
+ }
+ if (isa<ConstantPointerNull>(CPV)) {
+ O << "0";
+ return;
+ }
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ O << *Mang->getSymbol(GVar);
+ return;
+ }
+ if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ Value *v = Cexpr->stripPointerCasts();
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ O << *Mang->getSymbol(GVar);
+ return;
+ } else {
+ O << *LowerConstant(CPV, *this);
+ return;
+ }
+ }
+ llvm_unreachable("Not scalar type found in printScalarConstant()");
+}
+
+
+void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+ AggBuffer *aggBuffer) {
+
+ const TargetData *TD = TM.getTargetData();
+
+ if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
+ int s = TD->getTypeAllocSize(CPV->getType());
+ if (s<Bytes)
+ s = Bytes;
+ aggBuffer->addZeros(s);
+ return;
+ }
+
+ unsigned char *ptr;
+ switch (CPV->getType()->getTypeID()) {
+
+ case Type::IntegerTyID: {
+ const Type *ETy = CPV->getType();
+ if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+ unsigned char c =
+ (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = &c;
+ aggBuffer->addBytes(ptr, 1, Bytes);
+ } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
+ short int16 =
+ (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = (unsigned char*)&int16;
+ aggBuffer->addBytes(ptr, 2, Bytes);
+ } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ int int32 =(int)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ break;
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (ConstantInt *constInt =
+ dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
+ Cexpr, TD))) {
+ int int32 =(int)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ break;
+ }
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *v = Cexpr->getOperand(0)->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ aggBuffer->addZeros(4);
+ break;
+ }
+ }
+ llvm_unreachable("unsupported integer const type");
+ } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ long long int64 =(long long)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ break;
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ long long int64 =(long long)(constInt->getZExtValue());
+ ptr = (unsigned char*)&int64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ break;
+ }
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *v = Cexpr->getOperand(0)->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ aggBuffer->addZeros(8);
+ break;
+ }
+ }
+ llvm_unreachable("unsupported integer const type");
+ } else
+ llvm_unreachable("unsupported integer const type");
+ break;
+ }
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const Type* Ty = CFP->getType();
+ if (Ty == Type::getFloatTy(CPV->getContext())) {
+ float float32 = (float)CFP->getValueAPF().convertToFloat();
+ ptr = (unsigned char*)&float32;
+ aggBuffer->addBytes(ptr, 4, Bytes);
+ } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
+ double float64 = CFP->getValueAPF().convertToDouble();
+ ptr = (unsigned char*)&float64;
+ aggBuffer->addBytes(ptr, 8, Bytes);
+ }
+ else {
+ llvm_unreachable("unsupported fp const type");
+ }
+ break;
+ }
+ case Type::PointerTyID: {
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ aggBuffer->addSymbol(GVar);
+ }
+ else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ Value *v = Cexpr->stripPointerCasts();
+ aggBuffer->addSymbol(v);
+ }
+ unsigned int s = TD->getTypeAllocSize(CPV->getType());
+ aggBuffer->addZeros(s);
+ break;
+ }
+
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID: {
+ if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
+ isa<ConstantStruct>(CPV)) {
+ int ElementSize = TD->getTypeAllocSize(CPV->getType());
+ bufferAggregateConstant(CPV, aggBuffer);
+ if ( Bytes > ElementSize )
+ aggBuffer->addZeros(Bytes-ElementSize);
+ }
+ else if (isa<ConstantAggregateZero>(CPV))
+ aggBuffer->addZeros(Bytes);
+ else
+ llvm_unreachable("Unexpected Constant type");
+ break;
+ }
+
+ default:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+ AggBuffer *aggBuffer) {
+ const TargetData *TD = TM.getTargetData();
+ int Bytes;
+
+ // Old constants
+ if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
+ if (CPV->getNumOperands())
+ for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(CPV)) {
+ if (CDS->getNumElements())
+ for (unsigned i = 0; i < CDS->getNumElements(); ++i)
+ bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
+ aggBuffer);
+ return;
+ }
+
+
+ if (isa<ConstantStruct>(CPV)) {
+ if (CPV->getNumOperands()) {
+ StructType *ST = cast<StructType>(CPV->getType());
+ for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
+ if ( i == (e - 1))
+ Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
+ TD->getTypeAllocSize(ST)
+ - TD->getStructLayout(ST)->getElementOffset(i);
+ else
+ Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
+ TD->getStructLayout(ST)->getElementOffset(i);
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
+ aggBuffer);
+ }
+ }
+ return;
+ }
+ llvm_unreachable("unsupported constant type in printAggregateConstant()");
+}
+
+// buildTypeNameMap - Run through symbol table looking for type names.
+//
+
+
+bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
+
+ std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
+
+ if (PI != TypeNameMap.end() &&
+ (!PI->second.compare("struct._image1d_t") ||
+ !PI->second.compare("struct._image2d_t") ||
+ !PI->second.compare("struct._image3d_t")))
+ return true;
+
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
+{
+ switch(MI.getOpcode()) {
+ default:
+ return false;
+ case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0:
+ case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32:
+ case NVPTX::CallArgF64: case NVPTX::CallArgI16:
+ case NVPTX::CallArgI32: case NVPTX::CallArgI32imm:
+ case NVPTX::CallArgI64: case NVPTX::CallArgI8:
+ case NVPTX::CallArgParam: case NVPTX::CallVoidInst:
+ case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End:
+ case NVPTX::CallVoidInstReg64:
+ case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst:
+ case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst:
+ case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst:
+ case NVPTX::StoreParamF32: case NVPTX::StoreParamF64:
+ case NVPTX::StoreParamI16: case NVPTX::StoreParamI32:
+ case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
+ case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
+ case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
+ case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
+ case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
+ case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
+ case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
+ case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
+ case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
+ case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
+ case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
+ case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
+ case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
+ case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
+ case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
+ case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
+ case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
+ case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
+ case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
+ case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
+ case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
+ case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
+ case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
+ case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
+ case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
+ case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
+ case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
+ case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
+ case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
+ case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam:
+ case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64:
+ case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32:
+ case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8:
+ case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
+ case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
+ case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
+ case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
+ case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
+ case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
+ case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
+ case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
+ case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
+ case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
+ case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
+ case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
+ case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
+ case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
+ return true;
+ }
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
+ RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
+ RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+}
+
+
+void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
+ std::stringstream temp;
+ LineReader * reader = this->getReader(filename.str());
+ temp << "\n//";
+ temp << filename.str();
+ temp << ":";
+ temp << line;
+ temp << " ";
+ temp << reader->readLine(line);
+ temp << "\n";
+ this->OutStreamer.EmitRawText(Twine(temp.str()));
+}
+
+
+LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
+ if (reader == NULL) {
+ reader = new LineReader(filename);
+ }
+
+ if (reader->fileName() != filename) {
+ delete reader;
+ reader = new LineReader(filename);
+ }
+
+ return reader;
+}
+
+
+std::string
+LineReader::readLine(unsigned lineNum) {
+ if (lineNum < theCurLine) {
+ theCurLine = 0;
+ fstr.seekg(0,std::ios::beg);
+ }
+ while (theCurLine < lineNum) {
+ fstr.getline(buff,500);
+ theCurLine++;
+ }
+ return buff;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeNVPTXAsmPrinter() {
+ RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
+ RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+}
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
new file mode 100644
index 000000000000..6488b1442580
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -0,0 +1,315 @@
+//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to NVPTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXASMPRINTER_H
+#define NVPTXASMPRINTER_H
+
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <fstream>
+
+// The ptx syntax and format is very different from that usually seem in a .s
+// file,
+// therefore we are not able to use the MCAsmStreamer interface here.
+//
+// We are handcrafting the output method here.
+//
+// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
+// (subclass of MCStreamer).
+
+// This is defined in AsmPrinter.cpp.
+// Used to process the constant expressions in initializers.
+namespace nvptx {
+const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
+ llvm::AsmPrinter &AP) ;
+}
+
+namespace llvm {
+
+class LineReader {
+private:
+ unsigned theCurLine ;
+ std::ifstream fstr;
+ char buff[512];
+ std::string theFileName;
+ SmallVector<unsigned, 32> lineOffset;
+public:
+ LineReader(std::string filename) {
+ theCurLine = 0;
+ fstr.open(filename.c_str());
+ theFileName = filename;
+ }
+ std::string fileName() { return theFileName; }
+ ~LineReader() {
+ fstr.close();
+ }
+ std::string readLine(unsigned line);
+};
+
+
+
+class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
+
+
+ class AggBuffer {
+ // Used to buffer the emitted string for initializing global
+ // aggregates.
+ //
+ // Normally an aggregate (array, vector or structure) is emitted
+ // as a u8[]. However, if one element/field of the aggregate
+ // is a non-NULL address, then the aggregate is emitted as u32[]
+ // or u64[].
+ //
+ // We first layout the aggregate in 'buffer' in bytes, except for
+ // those symbol addresses. For the i-th symbol address in the
+ //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
+ // are filled with 0s. symbolPosInBuffer[i-1] records its position
+ // in 'buffer', and Symbols[i-1] records the Value*.
+ //
+ // Once we have this AggBuffer setup, we can choose how to print
+ // it out.
+ public:
+ unsigned size; // size of the buffer in bytes
+ unsigned char *buffer; // the buffer
+ unsigned numSymbols; // number of symbol addresses
+ SmallVector<unsigned, 4> symbolPosInBuffer;
+ SmallVector<Value *, 4> Symbols;
+
+ private:
+ unsigned curpos;
+ raw_ostream &O;
+ NVPTXAsmPrinter &AP;
+
+ public:
+ AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
+ :O(_O),AP(_AP) {
+ buffer = new unsigned char[_size];
+ size = _size;
+ curpos = 0;
+ numSymbols = 0;
+ }
+ ~AggBuffer() {
+ delete [] buffer;
+ }
+ unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
+ assert((curpos+Num) <= size);
+ assert((curpos+Bytes) <= size);
+ for ( int i= 0; i < Num; ++i) {
+ buffer[curpos] = Ptr[i];
+ curpos ++;
+ }
+ for ( int i=Num; i < Bytes ; ++i) {
+ buffer[curpos] = 0;
+ curpos ++;
+ }
+ return curpos;
+ }
+ unsigned addZeros(int Num) {
+ assert((curpos+Num) <= size);
+ for ( int i= 0; i < Num; ++i) {
+ buffer[curpos] = 0;
+ curpos ++;
+ }
+ return curpos;
+ }
+ void addSymbol(Value *GVar) {
+ symbolPosInBuffer.push_back(curpos);
+ Symbols.push_back(GVar);
+ numSymbols++;
+ }
+ void print() {
+ if (numSymbols == 0) {
+ // print out in bytes
+ for (unsigned i=0; i<size; i++) {
+ if (i)
+ O << ", ";
+ O << (unsigned int)buffer[i];
+ }
+ } else {
+ // print out in 4-bytes or 8-bytes
+ unsigned int pos = 0;
+ unsigned int nSym = 0;
+ unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
+ unsigned int nBytes = 4;
+ if (AP.nvptxSubtarget.is64Bit())
+ nBytes = 8;
+ for (pos=0; pos<size; pos+=nBytes) {
+ if (pos)
+ O << ", ";
+ if (pos == nextSymbolPos) {
+ Value *v = Symbols[nSym];
+ if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ MCSymbol *Name = AP.Mang->getSymbol(GVar);
+ O << *Name;
+ }
+ else if (ConstantExpr *Cexpr =
+ dyn_cast<ConstantExpr>(v)) {
+ O << *nvptx::LowerConstant(Cexpr, AP);
+ } else
+ llvm_unreachable("symbol type unknown");
+ nSym++;
+ if (nSym >= numSymbols)
+ nextSymbolPos = size+1;
+ else
+ nextSymbolPos = symbolPosInBuffer[nSym];
+ } else
+ if (nBytes == 4)
+ O << *(unsigned int*)(buffer+pos);
+ else
+ O << *(unsigned long long*)(buffer+pos);
+ }
+ }
+ }
+ };
+
+ friend class AggBuffer;
+
+ virtual void emitSrcInText(StringRef filename, unsigned line);
+
+private :
+ virtual const char *getPassName() const {
+ return "NVPTX Assembly Printer";
+ }
+
+ const Function *F;
+ std::string CurrentFnName;
+
+ void EmitFunctionEntryLabel();
+ void EmitFunctionBodyStart();
+ void EmitFunctionBodyEnd();
+
+ void EmitInstruction(const MachineInstr *);
+
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
+
+ void printGlobalVariable(const GlobalVariable *GVar);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier=0);
+ void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier=0);
+ void printVecModifiedImmediate(const MachineOperand &MO,
+ const char *Modifier, raw_ostream &O);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier=0);
+ void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
+ // definition autogenerated.
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+ bool=false);
+ void printParamName(int paramIndex, raw_ostream &O);
+ void printParamName(Function::const_arg_iterator I, int paramIndex,
+ raw_ostream &O);
+ void emitHeader(Module &M, raw_ostream &O);
+ void emitKernelFunctionDirectives(const Function& F,
+ raw_ostream &O) const;
+ void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
+ void emitFunctionExternParamList(const MachineFunction &MF);
+ void emitFunctionParamList(const Function *, raw_ostream &O);
+ void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
+ void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
+ void emitFunctionTempData(const MachineFunction &MF,
+ unsigned &FrameSize);
+ bool isImageType(const Type *Ty);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ void printReturnValStr(const Function *, raw_ostream &O);
+ void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
+
+protected:
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+private:
+ std::string CurrentBankselLabelInBasicBlock;
+
+ // This is specific per MachineFunction.
+ const MachineRegisterInfo *MRI;
+ // The contents are specific for each
+ // MachineFunction. But the size of the
+ // array is not.
+ std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
+ // cache the subtarget here.
+ const NVPTXSubtarget &nvptxSubtarget;
+ // Build the map between type name and ID based on module's type
+ // symbol table.
+ std::map<const Type *, std::string> TypeNameMap;
+
+ // List of variables demoted to a function scope.
+ std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+
+ // To record filename to ID mapping
+ std::map<std::string, unsigned> filenameMap;
+ void recordAndEmitFilenames(Module &);
+
+ void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
+ void emitPTXAddressSpace(unsigned int AddressSpace,
+ raw_ostream &O) const;
+ std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
+ void printScalarConstant(Constant *CPV, raw_ostream &O) ;
+ void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
+ void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
+ void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+
+ void printOperandProper(const MachineOperand &MO);
+
+ void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+ void emitDeclarations(Module &, raw_ostream &O);
+ void emitDeclaration(const Function *, raw_ostream &O);
+
+ static const char *getRegisterName(unsigned RegNo);
+ void emitDemotedVars(const Function *, raw_ostream &);
+
+ LineReader *reader;
+ LineReader *getReader(std::string);
+public:
+ NVPTXAsmPrinter(TargetMachine &TM,
+ MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ CurrentBankselLabelInBasicBlock = "";
+ VRidGlobal2LocalMap = NULL;
+ reader = NULL;
+ }
+
+ ~NVPTXAsmPrinter() {
+ if (!reader)
+ delete reader;
+ }
+
+ bool ignoreLoc(const MachineInstr &);
+
+ virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
+
+ DebugLoc prevDebugLoc;
+ void emitLineNumberAsDotLoc(const MachineInstr &);
+};
+} // end of namespace
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
new file mode 100644
index 000000000000..a9abc00bf3f6
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -0,0 +1,76 @@
+//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXFrameLowering.h"
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
+ return true;
+}
+
+void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+ if (MF.getFrameInfo()->hasStackObjects()) {
+ MachineBasicBlock &MBB = MF.front();
+ // Insert "mov.u32 %SP, %Depot"
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ // This instruction really occurs before first instruction
+ // in the BB, so giving it no debug location.
+ DebugLoc dl = DebugLoc();
+
+ if (tm.getSubtargetImpl()->hasGenericLdSt()) {
+ // mov %SPL, %depot;
+ // cvta.local %SP, %SPL;
+ if (is64bit) {
+ MachineInstr *MI = BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
+ .addReg(NVPTX::VRDepot);
+ } else {
+ MachineInstr *MI = BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
+ .addReg(NVPTX::VRDepot);
+ }
+ }
+ else {
+ // mov %SP, %depot;
+ if (is64bit)
+ BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
+ .addReg(NVPTX::VRDepot);
+ else
+ BuildMI(MBB, MBBI, dl,
+ tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
+ .addReg(NVPTX::VRDepot);
+ }
+ }
+}
+
+void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
new file mode 100644
index 000000000000..ee87b3997e78
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -0,0 +1,40 @@
+//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_FRAMELOWERING_H
+#define NVPTX_FRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+
+namespace llvm {
+class NVPTXTargetMachine;
+
+class NVPTXFrameLowering : public TargetFrameLowering {
+ NVPTXTargetMachine &tm;
+ bool is64bit;
+
+public:
+ explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
+ tm(_tm), is64bit(_is64bit) {}
+
+ virtual bool hasFP(const MachineFunction &MF) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
new file mode 100644
index 000000000000..4e92f0e785fe
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -0,0 +1,683 @@
+//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "NVPTXISelDAGToDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/GlobalValue.h"
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "nvptx-isel"
+
+using namespace llvm;
+
+
+static cl::opt<bool>
+UseFMADInstruction("nvptx-mad-enable",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+ cl::init(false));
+
+static cl::opt<int>
+FMAContractLevel("nvptx-fma-level",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
+
+
+static cl::opt<int>
+UsePrecDivF32("nvptx-prec-divf32",
+ cl::ZeroOrMore,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if avaiable."),
+ cl::init(2));
+
+/// createNVPTXISelDag - This pass converts a legalized DAG into a
+/// NVPTX-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new NVPTXDAGToDAGISel(TM, OptLevel);
+}
+
+
+NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+: SelectionDAGISel(tm, OptLevel),
+ Subtarget(tm.getSubtarget<NVPTXSubtarget>())
+{
+ // Always do fma.f32 fpcontract if the target supports the instruction.
+ // Always do fma.f64 fpcontract if the target supports the instruction.
+ // Do mad.f32 is nvptx-mad-enable is specified and the target does not
+ // support fma.f32.
+
+ doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
+ doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() &&
+ (FMAContractLevel>=1);
+ doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() &&
+ (FMAContractLevel>=1);
+ doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() &&
+ (FMAContractLevel==2);
+ doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() &&
+ (FMAContractLevel==2);
+
+ allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
+
+ UseF32FTZ = false;
+
+ doMulWide = (OptLevel > 0);
+
+ // Decide how to translate f32 div
+ do_DIVF32_PREC = UsePrecDivF32;
+ // sm less than sm_20 does not support div.rnd. Use div.full.
+ if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
+ do_DIVF32_PREC = 1;
+
+}
+
+/// Select - Select instructions not customized! Used for
+/// expanded, promoted and normal instructions.
+SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ SDNode *ResNode = NULL;
+ switch (N->getOpcode()) {
+ case ISD::LOAD:
+ ResNode = SelectLoad(N);
+ break;
+ case ISD::STORE:
+ ResNode = SelectStore(N);
+ break;
+ }
+ if (ResNode)
+ return ResNode;
+ return SelectCode(N);
+}
+
+
+static unsigned int
+getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
+{
+ const Value *Src = N->getSrcValue();
+ if (!Src)
+ return NVPTX::PTXLdStInstCode::LOCAL;
+
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ switch (PT->getAddressSpace()) {
+ case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ return NVPTX::PTXLdStInstCode::CONSTANT;
+ case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_CONST:
+ // If the arch supports generic address space, translate it to GLOBAL
+ // for correctness.
+ // If the arch does not support generic address space, then the arch
+ // does not really support ADDRESS_SPACE_CONST, translate it to
+ // to CONSTANT for better performance.
+ if (Subtarget.hasGenericLdSt())
+ return NVPTX::PTXLdStInstCode::GLOBAL;
+ else
+ return NVPTX::PTXLdStInstCode::CONSTANT;
+ default: break;
+ }
+ }
+ return NVPTX::PTXLdStInstCode::LOCAL;
+}
+
+
+SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDNode *NVPTXLD= NULL;
+
+ // do not support pre/post inc/dec
+ if (LD->isIndexed())
+ return NULL;
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool isVolatile = LD->isVolatile();
+ if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ unsigned num = SimpleVT.getVectorNumElements();
+ if (num == 2)
+ vecType = NVPTX::PTXLdStInstCode::V2;
+ else if (num == 4)
+ vecType = NVPTX::PTXLdStInstCode::V4;
+ else
+ return NULL;
+ }
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int fromType;
+ if ((LD->getExtensionType() == ISD::SEXTLOAD))
+ fromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ fromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ fromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ // Create the machine instruction DAG
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Addr;
+ SDValue Offset, Base;
+ unsigned Opcode;
+ MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (SelectDirectAddr(N1, Addr)) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_avar; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_avar; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_avar; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ Addr, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 7);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(N1.getNode(), N1, Base, Offset):
+ SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_asi; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_asi; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_asi; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(N1.getNode(), N1, Base, Offset):
+ SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 8);
+ }
+ else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
+ case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
+ case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
+ case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
+ case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
+ case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
+ case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
+ case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
+ case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
+ case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
+ case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(fromType),
+ getI32Imm(fromTypeWidth),
+ N1, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
+ MVT::Other, Ops, 7);
+ }
+
+ if (NVPTXLD != NULL) {
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ }
+
+ return NVPTXLD;
+}
+
+SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ EVT StoreVT = ST->getMemoryVT();
+ SDNode *NVPTXST = NULL;
+
+ // do not support pre/post inc/dec
+ if (ST->isIndexed())
+ return NULL;
+
+ if (!StoreVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool isVolatile = ST->isVolatile();
+ if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = StoreVT.getSimpleVT();
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ unsigned num = SimpleVT.getVectorNumElements();
+ if (num == 2)
+ vecType = NVPTX::PTXLdStInstCode::V2;
+ else if (num == 4)
+ vecType = NVPTX::PTXLdStInstCode::V4;
+ else
+ return NULL;
+ }
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ //
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int toType;
+ if (ScalarVT.isFloatingPoint())
+ toType = NVPTX::PTXLdStInstCode::Float;
+ else
+ toType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ // Create the machine instruction DAG
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue Addr;
+ SDValue Offset, Base;
+ unsigned Opcode;
+ MVT::SimpleValueType SourceVT =
+ N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_avar; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_avar; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_avar; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ Addr, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(N2.getNode(), N2, Base, Offset):
+ SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_asi; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_asi; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_asi; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 9);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(N2.getNode(), N2, Base, Offset):
+ SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 9);
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
+ case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
+ case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
+ case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
+ case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
+ case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
+ case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
+ case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
+ case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
+ case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
+ case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
+ default: return NULL;
+ }
+ SDValue Ops[] = { N1,
+ getI32Imm(isVolatile),
+ getI32Imm(codeAddrSpace),
+ getI32Imm(vecType),
+ getI32Imm(toType),
+ getI32Imm(toTypeWidth),
+ N2, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 8);
+ }
+
+ if (NVPTXST != NULL) {
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ }
+
+ return NVPTXST;
+}
+
+// SelectDirectAddr - Match a direct address for DAG.
+// A direct address could be a globaladdress or externalsymbol.
+bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
+ // Return true if TGA or ES.
+ if (N.getOpcode() == ISD::TargetGlobalAddress
+ || N.getOpcode() == ISD::TargetExternalSymbol) {
+ Address = N;
+ return true;
+ }
+ if (N.getOpcode() == NVPTXISD::Wrapper) {
+ Address = N.getOperand(0);
+ return true;
+ }
+ if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
+ if (IID == Intrinsic::nvvm_ptr_gen_to_param)
+ if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
+ return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
+ }
+ return false;
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset,
+ MVT mvt) {
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ SDValue base=Addr.getOperand(0);
+ if (SelectDirectAddr(base, Base)) {
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
+}
+
+// symbol+offset
+bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset,
+ MVT mvt) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
+ Offset = CurDAG->getTargetConstant(0, mvt);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
+ return false;
+ }
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
+ else
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ return true;
+ }
+ }
+ return false;
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
+}
+
+// register+offset
+bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
+}
+
+bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
+ unsigned int spN) const {
+ const Value *Src = NULL;
+ // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
+ // the classof() for MemSDNode does not include MemIntrinsicSDNode
+ // (See SelectionDAGNodes.h). So we need to check for both.
+ if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
+ Src = mN->getSrcValue();
+ }
+ else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ Src = mN->getSrcValue();
+ }
+ if (!Src)
+ return false;
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return (PT->getAddressSpace() == spN);
+ return false;
+}
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (SelectDirectAddr(Op, Op0)) {
+ OutOps.push_back(Op0);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ }
+ if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+ break;
+ }
+ return true;
+}
+
+// Return true if N is a undef or a constant.
+// If N was undef, return a (i8imm 0) in Retval
+// If N was imm, convert it to i8imm and return in Retval
+// Note: The convert to i8imm is required, otherwise the
+// pattern matcher inserts a bunch of IMOVi8rr to convert
+// the imm to i8imm, and this causes instruction selection
+// to fail.
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
+ SDValue &Retval) {
+ if (!(N.getOpcode() == ISD::UNDEF) &&
+ !(N.getOpcode() == ISD::Constant))
+ return false;
+
+ if (N.getOpcode() == ISD::UNDEF)
+ Retval = CurDAG->getTargetConstant(0, MVT::i8);
+ else {
+ ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
+ unsigned retval = cn->getZExtValue();
+ Retval = CurDAG->getTargetConstant(retval, MVT::i8);
+ }
+ return true;
+}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
new file mode 100644
index 000000000000..ccd69b29dd42
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -0,0 +1,105 @@
+//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "nvptx-isel"
+
+#include "NVPTX.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Intrinsics.h"
+using namespace llvm;
+
+namespace {
+
+class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
+
+ // If true, generate corresponding FPCONTRACT. This is
+ // language dependent (i.e. CUDA and OpenCL works differently).
+ bool doFMADF32;
+ bool doFMAF64;
+ bool doFMAF32;
+ bool doFMAF64AGG;
+ bool doFMAF32AGG;
+ bool allowFMA;
+
+ // 0: use div.approx
+ // 1: use div.full
+ // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
+ // Otherwise, use div.full
+ int do_DIVF32_PREC;
+
+ // If true, add .ftz to f32 instructions.
+ // This is only meaningful for sm_20 and later, as the default
+ // is not ftz.
+ // For sm earlier than sm_20, f32 denorms are always ftz by the
+ // hardware.
+ // We always add the .ftz modifier regardless of the sm value
+ // when Use32FTZ is true.
+ bool UseF32FTZ;
+
+ // If true, generate mul.wide from sext and mul
+ bool doMulWide;
+
+public:
+ explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
+ CodeGenOpt::Level OptLevel);
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "NVPTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ const NVPTXSubtarget &Subtarget;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+private:
+ // Include the pieces autogenerated from the target description.
+#include "NVPTXGenDAGISel.inc"
+
+ SDNode *Select(SDNode *N);
+ SDNode* SelectLoad(SDNode *N);
+ SDNode* SelectStore(SDNode *N);
+
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Match direct address complex pattern.
+ bool SelectDirectAddr(SDValue N, SDValue &Address);
+
+ bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset, MVT mvt);
+ bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+ bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset, MVT mvt);
+ bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+
+ bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
+
+ bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
+
+};
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
new file mode 100644
index 000000000000..6ea10ea14ad6
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -0,0 +1,1291 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that NVPTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "NVPTX.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXTargetMachine.h"
+#include "NVPTXTargetObjectFile.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+#include <sstream>
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "nvptx-lower"
+
+using namespace llvm;
+
+static unsigned int uniqueCallSite = 0;
+
+static cl::opt<bool>
+RetainVectorOperands("nvptx-codegen-vectors",
+ cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
+ cl::init(true));
+
+static cl::opt<bool>
+sched4reg("nvptx-sched4reg",
+ cl::desc("NVPTX Specific: schedule for register pressue"),
+ cl::init(false));
+
+// NVPTXTargetLowering Constructor.
+NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
+: TargetLowering(TM, new NVPTXTargetObjectFile()),
+ nvTM(&TM),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+
+ // always lower memset, memcpy, and memmove intrinsics to load/store
+ // instructions, rather
+ // then generating calls to memset, mempcy or memmove.
+ maxStoresPerMemset = (unsigned)0xFFFFFFFF;
+ maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
+ maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+ // Jump is Expensive. Don't create extra control flow for 'and', 'or'
+ // condition branches.
+ setJumpIsExpensive(true);
+
+ // By default, use the Source scheduling
+ if (sched4reg)
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Source);
+
+ addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
+ addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
+ addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
+ addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
+ addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
+
+ if (RetainVectorOperands) {
+ addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
+ addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
+ addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
+ addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
+ addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
+ addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
+ addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
+ addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
+ addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
+ addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
+
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
+ }
+
+ // Operations not directly supported by NVPTX.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ if (nvptxSubtarget.hasROT64()) {
+ setOperationAction(ISD::ROTL , MVT::i64, Legal);
+ setOperationAction(ISD::ROTR , MVT::i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+ }
+ if (nvptxSubtarget.hasROT32()) {
+ setOperationAction(ISD::ROTL , MVT::i32, Legal);
+ setOperationAction(ISD::ROTR , MVT::i32, Legal);
+ }
+ else {
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ }
+
+ setOperationAction(ISD::ROTL , MVT::i16, Expand);
+ setOperationAction(ISD::ROTR , MVT::i16, Expand);
+ setOperationAction(ISD::ROTL , MVT::i8, Expand);
+ setOperationAction(ISD::ROTR , MVT::i8, Expand);
+ setOperationAction(ISD::BSWAP , MVT::i16, Expand);
+ setOperationAction(ISD::BSWAP , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+
+ // Indirect branch is not supported.
+ // This also disables Jump Table creation.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+
+ // We want to legalize constant related memmove and memcopy
+ // intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PTX does not support load / store predicate registers
+ setOperationAction(ISD::LOAD, MVT::i1, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setOperationAction(ISD::STORE, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ // This is legal in NVPTX
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+
+ // TRAP can be lowered to PTX trap
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // By default, CONCAT_VECTORS is implemented via store/load
+ // through stack. It is slow and uses local memory. We need
+ // to custom-lowering them.
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
+
+ // Expand vector int to float and float to int conversions
+ // - For SINT_TO_FP and UINT_TO_FP, the src type
+ // (Node->getOperand(0).getValueType())
+ // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
+ // the dest type (Node->getValueType(0)) is used.
+ //
+ // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
+ // case, and
+ // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
+ //
+ // That is why v4i32 or v2i32 are used here.
+ //
+ // The expansion for vectors happens in VectorLegalizer::LegalizeOp()
+ // (LegalizeVectorOps.cpp).
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+
+ // Now deduce the information based on the above mentioned
+ // actions
+ computeRegisterProperties();
+}
+
+
+const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case NVPTXISD::CALL: return "NVPTXISD::CALL";
+ case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper";
+ case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin";
+ case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam";
+ case NVPTXISD::DeclareScalarParam:
+ return "NVPTXISD::DeclareScalarParam";
+ case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet";
+ case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
+ case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall";
+ case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam";
+ case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam";
+ case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32";
+ case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32";
+ case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam";
+ case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin";
+ case NVPTXISD::CallArg: return "NVPTXISD::CallArg";
+ case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg";
+ case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd";
+ case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid";
+ case NVPTXISD::CallVal: return "NVPTXISD::CallVal";
+ case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol";
+ case NVPTXISD::Prototype: return "NVPTXISD::Prototype";
+ case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam";
+ case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval";
+ case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval";
+ case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval";
+ case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam";
+ case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
+ case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
+ case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
+ }
+}
+
+
+SDValue
+NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+}
+
+std::string NVPTXTargetLowering::getPrototype(Type *retTy,
+ const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ unsigned retAlignment) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ std::stringstream O;
+ O << "prototype_" << uniqueCallSite << " : .callprototype ";
+
+ if (retTy->getTypeID() == Type::VoidTyID)
+ O << "()";
+ else {
+ O << "(";
+ if (isABI) {
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ size = ITy->getBitWidth();
+ if (size < 32) size = 32;
+ }
+ else {
+ assert(retTy->isFloatingPointTy() &&
+ "Floating point type expected here");
+ size = retTy->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " _";
+ }
+ else if (isa<PointerType>(retTy))
+ O << ".param .b" << getPointerTy().getSizeInBits()
+ << " _";
+ else {
+ if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ totalsz += sz/8;
+ }
+ }
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << totalsz << "]";
+ }
+ else {
+ assert(false &&
+ "Unknown return type");
+ }
+ }
+ }
+ else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned idx = 0;
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0, je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << ".reg .b" << sz << " _";
+ if (j<je-1) O << ", ";
+ ++idx;
+ }
+ if (i < e-1)
+ O << ", ";
+ }
+ }
+ O << ") ";
+ }
+ O << "_ (";
+
+ bool first = true;
+ MVT thePointerTy = getPointerTy();
+
+ for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+ const Type *Ty = Args[i].Ty;
+ if (!first) {
+ O << ", ";
+ }
+ first = false;
+
+ if (Outs[i].Flags.isByVal() == false) {
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32) sz = 32;
+ }
+ else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ if (isABI)
+ O << ".param .b" << sz << " ";
+ else
+ O << ".reg .b" << sz << " ";
+ O << "_";
+ continue;
+ }
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy &&
+ "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ if (isABI) {
+ unsigned align = Outs[i].Flags.getByValAlign();
+ unsigned sz = getTargetData()->getTypeAllocSize(ETy);
+ O << ".param .align " << align
+ << " .b8 ";
+ O << "_";
+ O << "[" << sz << "]";
+ continue;
+ }
+ else {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, ETy, vtparts);
+ for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+
+ for (unsigned j=0,je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ O << ".reg .b" << sz << " ";
+ O << "_";
+ if (j<je-1) O << ", ";
+ }
+ if (i<e-1)
+ O << ", ";
+ }
+ continue;
+ }
+ }
+ O << ");";
+ return O.str();
+}
+
+
+SDValue
+NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ ArgListTy &Args = CLI.Args;
+ Type *retTy = CLI.RetTy;
+ ImmutableCallSite *CS = CLI.CS;
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ SDValue tempChain = Chain;
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, true));
+ SDValue InFlag = Chain.getValue(1);
+
+ assert((Outs.size() == Args.size()) &&
+ "Unexpected number of arguments to function call");
+ unsigned paramCount = 0;
+ // Declare the .params or .reg need to pass values
+ // to the function
+ for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ EVT VT = Outs[i].VT;
+
+ if (Outs[i].Flags.isByVal() == false) {
+ // Plain scalar
+ // for ABI, declare .param .b<size> .param<n>;
+ // for nonABI, declare .reg .b<size> .param<n>;
+ unsigned isReg = 1;
+ if (isABI)
+ isReg = 0;
+ unsigned sz = VT.getSizeInBits();
+ if (VT.isInteger() && (sz < 32)) sz = 32;
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(isReg, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+
+ unsigned opcode = NVPTXISD::StoreParam;
+ if (isReg)
+ opcode = NVPTXISD::MoveToParam;
+ else {
+ if (Outs[i].Flags.isZExt())
+ opcode = NVPTXISD::StoreParamU32;
+ else if (Outs[i].Flags.isSExt())
+ opcode = NVPTXISD::StoreParamS32;
+ }
+ Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
+
+ InFlag = Chain.getValue(1);
+ ++paramCount;
+ continue;
+ }
+ // struct or vector
+ SmallVector<EVT, 16> vtparts;
+ const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ assert(PTy &&
+ "Type of a byval parameter should be pointer");
+ ComputeValueVTs(*this, PTy->getElementType(), vtparts);
+
+ if (isABI) {
+ // declare .param .align 16 .b8 .param<n>[<size>];
+ unsigned sz = Outs[i].Flags.getByValSize();
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
+ // don't need to
+ // worry about natural alignment or not. See TargetLowering::LowerCallTo()
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ unsigned curOffset = 0;
+ for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ OutVals[i],
+ DAG.getConstant(curOffset,
+ getPointerTy()));
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
+ MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32),
+ theVal, InFlag };
+ Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
+ CopyParamOps, 5);
+ InFlag = Chain.getValue(1);
+ curOffset += sz/8;
+ }
+ }
+ ++paramCount;
+ continue;
+ }
+ // Non-abi, struct or vector
+ // Declare a bunch or .reg .b<size> .param<n>
+ unsigned curOffset = 0;
+ for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
+ MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(1, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset,
+ getPointerTy()));
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), theVal,
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
+ CopyParamOps, 5);
+ InFlag = Chain.getValue(1);
+ ++paramCount;
+ }
+ }
+ }
+
+ GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
+ unsigned retAlignment = 0;
+
+ // Handle Result
+ unsigned retCount = 0;
+ if (Ins.size() > 0) {
+ SmallVector<EVT, 16> resvtparts;
+ ComputeValueVTs(*this, retTy, resvtparts);
+
+ // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
+ // individual .reg .b<size> func_retval<0..> for non ABI
+ unsigned resultsz = 0;
+ for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = resvtparts[i];
+ if (resvtparts[i].isVector()) {
+ elems = resvtparts[i].getVectorNumElements();
+ elemtype = resvtparts[i].getVectorElementType();
+ }
+ for (unsigned j=0,je=elems; j!=je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (isABI == false) {
+ if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ }
+ else {
+ if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ }
+ if (isABI == false) {
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(retCount, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ ++retCount;
+ }
+ resultsz += sz;
+ }
+ }
+ if (isABI) {
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
+ retTy->isPointerTy() ) {
+ // Scalar needs to be at least 32bit wide
+ if (resultsz < 32)
+ resultsz = 32;
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resultsz, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ else {
+ if (Func) { // direct call
+ if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
+ retAlignment = getTargetData()->getABITypeAlignment(retTy);
+ } else { // indirect call
+ const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
+ if (!llvm::getAlign(*CallI, 0, retAlignment))
+ retAlignment = getTargetData()->getABITypeAlignment(retTy);
+ }
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
+ MVT::i32),
+ DAG.getConstant(resultsz/8, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ }
+ }
+
+ if (!Func) {
+ // This is indirect function call case : PTX requires a prototype of the
+ // form
+ // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
+ // to be emitted, and the label has to used as the last arg of call
+ // instruction.
+ // The prototype is embedded in a string and put as the operand for an
+ // INLINEASM SDNode.
+ SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
+ const char *asmstr = nvTM->getManagedStrPool()->
+ getManagedString(proto_string.c_str())->c_str();
+ SDValue InlineAsmOps[] = { Chain,
+ DAG.getTargetExternalSymbol(asmstr,
+ getPointerTy()),
+ DAG.getMDNode(0),
+ DAG.getTargetConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
+ InFlag = Chain.getValue(1);
+ }
+ // Op to just print "call"
+ SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue PrintCallOps[] = { Chain,
+ DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
+ : retCount, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
+ PrintCallVTs, PrintCallOps, 3);
+ InFlag = Chain.getValue(1);
+
+ // Ops to print out the function name
+ SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallVoidOps[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
+ InFlag = Chain.getValue(1);
+
+ // Ops to print out the param list
+ SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgBeginOps[] = { Chain, InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
+ CallArgBeginOps, 2);
+ InFlag = Chain.getValue(1);
+
+ for (unsigned i=0, e=paramCount; i!=e; ++i) {
+ unsigned opcode;
+ if (i==(e-1))
+ opcode = NVPTXISD::LastCallArg;
+ else
+ opcode = NVPTXISD::CallArg;
+ SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(i, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
+ InFlag = Chain.getValue(1);
+ }
+ SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CallArgEndOps[] = { Chain,
+ DAG.getConstant(Func ? 1 : 0, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
+ 3);
+ InFlag = Chain.getValue(1);
+
+ if (!Func) {
+ SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue PrototypeOps[] = { Chain,
+ DAG.getConstant(uniqueCallSite, MVT::i32),
+ InFlag };
+ Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Generate loads from param memory/moves from registers for result
+ if (Ins.size() > 0) {
+ if (isABI) {
+ unsigned resoffset = 0;
+ for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ unsigned sz = Ins[i].VT.getSizeInBits();
+ if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
+ std::vector<EVT> LoadRetVTs;
+ LoadRetVTs.push_back(Ins[i].VT);
+ LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
+ std::vector<SDValue> LoadRetOps;
+ LoadRetOps.push_back(Chain);
+ LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
+ LoadRetOps.push_back(InFlag);
+ SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
+ &LoadRetOps[0], LoadRetOps.size());
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ InVals.push_back(retval);
+ resoffset += sz/8;
+ }
+ }
+ else {
+ SmallVector<EVT, 16> resvtparts;
+ ComputeValueVTs(*this, retTy, resvtparts);
+
+ assert(Ins.size() == resvtparts.size() &&
+ "Unexpected number of return values in non-ABI case");
+ unsigned paramNum = 0;
+ for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ assert(EVT(Ins[i].VT) == resvtparts[i] &&
+ "Unexpected EVT type in non-ABI case");
+ unsigned numelems = 1;
+ EVT elemtype = Ins[i].VT;
+ if (Ins[i].VT.isVector()) {
+ numelems = Ins[i].VT.getVectorNumElements();
+ elemtype = Ins[i].VT.getVectorElementType();
+ }
+ std::vector<SDValue> tempRetVals;
+ for (unsigned j=0; j<numelems; ++j) {
+ std::vector<EVT> MoveRetVTs;
+ MoveRetVTs.push_back(elemtype);
+ MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
+ std::vector<SDValue> MoveRetOps;
+ MoveRetOps.push_back(Chain);
+ MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
+ MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
+ MoveRetOps.push_back(InFlag);
+ SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
+ &MoveRetOps[0], MoveRetOps.size());
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ tempRetVals.push_back(retval);
+ ++paramNum;
+ }
+ if (Ins[i].VT.isVector())
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
+ &tempRetVals[0], tempRetVals.size()));
+ else
+ InVals.push_back(tempRetVals[0]);
+ }
+ }
+ }
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, true),
+ DAG.getIntPtrConstant(uniqueCallSite+1, true),
+ InFlag);
+ uniqueCallSite++;
+
+ // set isTailCall to false for now, until we figure out how to express
+ // tail call optimization in PTX
+ isTailCall = false;
+ return Chain;
+}
+
+// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
+// (see LegalizeDAG.cpp). This is slow and uses local memory.
+// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
+SDValue NVPTXTargetLowering::
+LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumOperands = Node->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue SubOp = Node->getOperand(i);
+ EVT VVT = SubOp.getNode()->getValueType(0);
+ EVT EltVT = VVT.getVectorElementType();
+ unsigned NumSubElem = VVT.getVectorNumElements();
+ for (unsigned j=0; j < NumSubElem; ++j) {
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
+ DAG.getIntPtrConstant(j)));
+ }
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+SDValue NVPTXTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::RETURNADDR: return SDValue();
+ case ISD::FRAMEADDR: return SDValue();
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return Op;
+ case ISD::BUILD_VECTOR:
+ case ISD::EXTRACT_SUBVECTOR:
+ return Op;
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ default:
+ llvm_unreachable("Custom lowering not defined for operation");
+ }
+}
+
+SDValue
+NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
+ EVT v) const {
+ std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
+ std::stringstream suffix;
+ suffix << idx;
+ *name += suffix.str();
+ return DAG.getTargetExternalSymbol(name->c_str(), v);
+}
+
+SDValue
+NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
+ return getExtSymb(DAG, ".PARAM", idx, v);
+}
+
+SDValue
+NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+ return getExtSymb(DAG, ".HLPPARAM", idx);
+}
+
+// Check to see if the kernel argument is image*_t or sampler_t
+
+bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
+ static const char *const specialTypes[] = {
+ "struct._image2d_t",
+ "struct._image3d_t",
+ "struct._sampler_t"
+ };
+
+ const Type *Ty = arg->getType();
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+
+ if (!PTy)
+ return false;
+
+ if (!context)
+ return false;
+
+ const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ const std::string TypeName = STy ? STy->getName() : "";
+
+ for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
+ if (TypeName == specialTypes[i])
+ return true;
+
+ return false;
+}
+
+SDValue
+NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetData *TD = getTargetData();
+
+ const Function *F = MF.getFunction();
+ const AttrListPtr &PAL = F->getAttributes();
+
+ SDValue Root = DAG.getRoot();
+ std::vector<SDValue> OutChains;
+
+ bool isKernel = llvm::isKernelFunction(*F);
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ std::vector<Type *> argTypes;
+ std::vector<const Argument *> theArgs;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ theArgs.push_back(I);
+ argTypes.push_back(I->getType());
+ }
+ assert(argTypes.size() == Ins.size() &&
+ "Ins types and function types did not match");
+
+ int idx = 0;
+ for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+ Type *Ty = argTypes[i];
+ EVT ObjectVT = getValueType(Ty);
+ assert(ObjectVT == Ins[i].VT &&
+ "Ins type did not match function type");
+
+ // If the kernel argument is image*_t or sampler_t, convert it to
+ // a i32 constant holding the parameter position. This can later
+ // matched in the AsmPrinter to output the correct mangled name.
+ if (isImageOrSamplerVal(theArgs[i],
+ (theArgs[i]->getParent() ?
+ theArgs[i]->getParent()->getParent() : 0))) {
+ assert(isKernel && "Only kernels can have image/sampler params");
+ InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+ continue;
+ }
+
+ if (theArgs[i]->use_empty()) {
+ // argument is dead
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ continue;
+ }
+
+ // In the following cases, assign a node order of "idx+1"
+ // to newly created nodes. The SDNOdes for params have to
+ // appear in the same order as their order of appearance
+ // in the original function. "idx+1" holds that order.
+ if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) {
+ // A plain scalar.
+ if (isABI || isKernel) {
+ // If ABI, load from the param symbol
+ SDValue Arg = getParamSymbol(DAG, idx);
+ Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
+ F->getContext()),
+ llvm::ADDRESS_SPACE_PARAM));
+ SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
+ MachinePointerInfo(srcValue), false, false,
+ false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
+ F->getContext())));
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx+1);
+ InVals.push_back(p);
+ }
+ else {
+ // If no ABI, just move the param symbol
+ SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
+ SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx+1);
+ InVals.push_back(p);
+ }
+ continue;
+ }
+
+ // Param has ByVal attribute
+ if (isABI || isKernel) {
+ // Return MoveParam(param symbol).
+ // Ideally, the param symbol can be returned directly,
+ // but when SDNode builder decides to use it in a CopyToReg(),
+ // machine instruction fails because TargetExternalSymbol
+ // (not lowered) is target dependent, and CopyToReg assumes
+ // the source is lowered.
+ SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
+ if (p.getNode())
+ DAG.AssignOrdering(p.getNode(), idx+1);
+ if (isKernel)
+ InVals.push_back(p);
+ else {
+ SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
+ p);
+ InVals.push_back(p2);
+ }
+ } else {
+ // Have to move a set of param symbols to registers and
+ // store them locally and return the local pointer in InVals
+ const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
+ assert(elemPtrType &&
+ "Byval parameter should be a pointer type");
+ Type *elemType = elemPtrType->getElementType();
+ // Compute the constituent parts
+ SmallVector<EVT, 16> vtparts;
+ SmallVector<uint64_t, 16> offsets;
+ ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
+ unsigned totalsize = 0;
+ for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+ totalsize += vtparts[j].getStoreSizeInBits();
+ SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()->
+ CreateStackObject(totalsize/8, 16, false),
+ getPointerTy());
+ unsigned sizesofar = 0;
+ std::vector<SDValue> theChains;
+ for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+ unsigned numElems = 1;
+ if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
+ for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+ EVT tmpvt = vtparts[j];
+ if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+ SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
+ getParamSymbol(DAG, idx, tmpvt));
+ SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+ DAG.getConstant(sizesofar, getPointerTy()));
+ theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
+ MachinePointerInfo(), false, false, 0));
+ sizesofar += tmpvt.getStoreSizeInBits()/8;
+ ++idx;
+ }
+ }
+ --idx;
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
+ theChains.size());
+ InVals.push_back(localcopy);
+ }
+ }
+
+ // Clang will check explicit VarArg and issue error if any. However, Clang
+ // will let code with
+ // implicit var arg like f() pass.
+ // We treat this case as if the arg list is empty.
+ //if (F.isVarArg()) {
+ // assert(0 && "VarArg not supported yet!");
+ //}
+
+ if (!OutChains.empty())
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size()));
+
+ return Chain;
+}
+
+SDValue
+NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+
+ unsigned sizesofar = 0;
+ unsigned idx = 0;
+ for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ SDValue theVal = OutVals[i];
+ EVT theValType = theVal.getValueType();
+ unsigned numElems = 1;
+ if (theValType.isVector()) numElems = theValType.getVectorNumElements();
+ for (unsigned j=0,je=numElems; j!=je; ++j) {
+ SDValue tmpval = theVal;
+ if (theValType.isVector())
+ tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ theValType.getVectorElementType(),
+ tmpval, DAG.getIntPtrConstant(j));
+ Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
+ dl, MVT::Other,
+ Chain,
+ DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+ tmpval);
+ if (theValType.isVector())
+ sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+ else
+ sizesofar += theValType.getStoreSizeInBits()/8;
+ ++idx;
+ }
+ }
+
+ return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+void
+NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const
+{
+ if (Constraint.length() > 1)
+ return;
+ else
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// NVPTX suuport vector of legal types of any length in Intrinsics because the
+// NVPTX specific type legalizer
+// will legalize them to the PTX supported length.
+bool
+NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+ if (isTypeLegal(VT))
+ return true;
+ if (VT.isVector()) {
+ MVT eVT = VT.getVectorElementType();
+ if (isTypeLegal(eVT))
+ return true;
+ }
+ return false;
+}
+
+
+// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
+// TgtMemIntrinsic
+// because we need the information that is only available in the "Value" type
+// of destination
+// pointer. In particular, the address space information.
+bool
+NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ default:
+ return false;
+
+ case Intrinsic::nvvm_atomic_load_add_f32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::f32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = true;
+ Info.align = 0;
+ return true;
+
+ case Intrinsic::nvvm_atomic_load_inc_32:
+ case Intrinsic::nvvm_atomic_load_dec_32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = true;
+ Info.align = 0;
+ return true;
+
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
+ Info.memVT = MVT::i32;
+ else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ Info.memVT = getPointerTy();
+ else
+ Info.memVT = MVT::f32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 0;
+ return true;
+
+ }
+ return false;
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+/// Used to guide target specific optimizations, like loop strength reduction
+/// (LoopStrengthReduce.cpp) and memory optimization for address mode
+/// (CodeGenPrepare.cpp)
+bool
+NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+
+ // AddrMode - This represents an addressing mode of:
+ // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
+ //
+ // The legal address modes are
+ // - [avar]
+ // - [areg]
+ // - [areg+immoff]
+ // - [immAddr]
+
+ if (AM.BaseGV) {
+ if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
+ return false;
+ return true;
+ }
+
+ switch (AM.Scale) {
+ case 0: // "r", "r+i" or "i" is allowed
+ break;
+ case 1:
+ if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
+ return false;
+ // Otherwise we have r+i.
+ break;
+ default:
+ // No scale > 1 is allowed
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// NVPTX Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+NVPTXTargetLowering::ConstraintType
+NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'r':
+ case 'h':
+ case 'c':
+ case 'l':
+ case 'f':
+ case 'd':
+ case '0':
+ case 'N':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*>
+NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'c':
+ return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
+ case 'h':
+ return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
+ case 'r':
+ return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
+ case 'l':
+ case 'N':
+ return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
+ case 'f':
+ return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
+ case 'd':
+ return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
+ return 4;
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
new file mode 100644
index 000000000000..86246e6449d2
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -0,0 +1,144 @@
+//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that NVPTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXISELLOWERING_H
+#define NVPTXISELLOWERING_H
+
+#include "NVPTX.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace NVPTXISD {
+enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ Wrapper,
+ CALL,
+ RET_FLAG,
+ LOAD_PARAM,
+ NVBuiltin,
+ DeclareParam,
+ DeclareScalarParam,
+ DeclareRetParam,
+ DeclareRet,
+ DeclareScalarRet,
+ LoadParam,
+ StoreParam,
+ StoreParamS32, // to sext and store a <32bit value, not used currently
+ StoreParamU32, // to zext and store a <32bit value, not used currently
+ MoveToParam,
+ PrintCall,
+ PrintCallUni,
+ CallArgBegin,
+ CallArg,
+ LastCallArg,
+ CallArgEnd,
+ CallVoid,
+ CallVal,
+ CallSymbol,
+ Prototype,
+ MoveParam,
+ MoveRetval,
+ MoveToRetval,
+ StoreRetval,
+ PseudoUseParam,
+ RETURN,
+ CallSeqBegin,
+ CallSeqEnd,
+ Dummy
+};
+}
+
+//===--------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===--------------------------------------------------------------------===//
+class NVPTXTargetLowering : public TargetLowering {
+public:
+ explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
+ SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ bool isTypeSupportedInIntrinsic(MVT VT) const;
+
+ bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+ unsigned Intrinsic) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type
+ /// Used to guide target specific optimizations, like loop strength
+ /// reduction (LoopStrengthReduce.cpp) and memory optimization for
+ /// address mode (CodeGenPrepare.cpp)
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this function.
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+ }
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
+
+ std::string getPrototype(Type *, const ArgListTy &,
+ const SmallVectorImpl<ISD::OutputArg> &,
+ unsigned retAlignment) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ NVPTXTargetMachine *nvTM;
+
+ // PTX always uses 32-bit shift amounts
+ virtual MVT getShiftAmountTy(EVT LHSTy) const {
+ return MVT::i32;
+ }
+
+private:
+ const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+
+ SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
+ MVT::i32) const;
+ SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
+ SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
+
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+};
+} // namespace llvm
+
+#endif // NVPTXISELLOWERING_H
diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td
new file mode 100644
index 000000000000..f11f1b8f96fc
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -0,0 +1,43 @@
+//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe NVPTX instructions format
+//
+//===----------------------------------------------------------------------===//
+
+// Vector instruction type enum
+class VecInstTypeEnum<bits<4> val> {
+ bits<4> Value=val;
+}
+def VecNOP : VecInstTypeEnum<0>;
+
+// Generic NVPTX Format
+
+class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<14> Inst;
+
+ let Namespace = "NVPTX";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+
+ // TSFlagFields
+ bits<4> VecInstType = VecNOP.Value;
+ bit IsSimpleMove = 0;
+ bit IsLoad = 0;
+ bit IsStore = 0;
+
+ let TSFlags{3-0} = VecInstType;
+ let TSFlags{4-4} = IsSimpleMove;
+ let TSFlags{5-5} = IsLoad;
+ let TSFlags{6-6} = IsStore;
+}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
new file mode 100644
index 000000000000..cd50deb26a23
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -0,0 +1,326 @@
+//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXTargetMachine.h"
+#define GET_INSTRINFO_CTOR
+#include "NVPTXGenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include <cstdio>
+
+
+using namespace llvm;
+
+// FIXME: Add the subtarget support on this constructor.
+NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
+: NVPTXGenInstrInfo(),
+ TM(tm),
+ RegInfo(*this, *TM.getSubtargetImpl()) {}
+
+
+void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
+ NVPTX::Int32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
+ NVPTX::Int8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
+ NVPTX::Int1RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
+ NVPTX::Float32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
+ NVPTX::Int16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
+ NVPTX::Int64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
+ NVPTX::Float64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
+ NVPTX::V4F32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
+ NVPTX::V4I32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
+ NVPTX::V2F32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I32RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
+ NVPTX::V4I8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I8RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
+ NVPTX::V4I16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I16RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
+ NVPTX::V2I64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
+ NVPTX::V2F64RegsRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else {
+ llvm_unreachable("Don't know how to copy a register");
+ }
+}
+
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg,
+ unsigned &DestReg) const {
+ // Look for the appropriate part of TSFlags
+ bool isMove = false;
+
+ unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
+ NVPTX::SimpleMoveShift;
+ isMove = (TSFlags == 1);
+
+ if (isMove) {
+ MachineOperand dest = MI.getOperand(0);
+ MachineOperand src = MI.getOperand(1);
+ assert(dest.isReg() && "dest of a movrr is not a reg");
+ assert(src.isReg() && "src of a movrr is not a reg");
+
+ SrcReg = src.getReg();
+ DestReg = dest.getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
+{
+ switch (MI.getOpcode()) {
+ default: return false;
+ case NVPTX::INT_PTX_SREG_NTID_X:
+ case NVPTX::INT_PTX_SREG_NTID_Y:
+ case NVPTX::INT_PTX_SREG_NTID_Z:
+ case NVPTX::INT_PTX_SREG_TID_X:
+ case NVPTX::INT_PTX_SREG_TID_Y:
+ case NVPTX::INT_PTX_SREG_TID_Z:
+ case NVPTX::INT_PTX_SREG_CTAID_X:
+ case NVPTX::INT_PTX_SREG_CTAID_Y:
+ case NVPTX::INT_PTX_SREG_CTAID_Z:
+ case NVPTX::INT_PTX_SREG_NCTAID_X:
+ case NVPTX::INT_PTX_SREG_NCTAID_Y:
+ case NVPTX::INT_PTX_SREG_NCTAID_Z:
+ case NVPTX::INT_PTX_SREG_WARPSIZE:
+ return true;
+ }
+}
+
+
+bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
+ unsigned &AddrSpace) const {
+ bool isLoad = false;
+ unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
+ NVPTX::isLoadShift;
+ isLoad = (TSFlags == 1);
+ if (isLoad)
+ AddrSpace = getLdStCodeAddrSpace(MI);
+ return isLoad;
+}
+
+bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
+ unsigned &AddrSpace) const {
+ bool isStore = false;
+ unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
+ NVPTX::isStoreShift;
+ isStore = (TSFlags == 1);
+ if (isStore)
+ AddrSpace = getLdStCodeAddrSpace(MI);
+ return isStore;
+}
+
+
+bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
+ unsigned addrspace = 0;
+ if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
+ return false;
+ if (isLoadInstr(*MI, addrspace))
+ if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
+ return false;
+ if (isStoreInstr(*MI, addrspace))
+ if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
+ return false;
+ return true;
+}
+
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == NVPTX::CBranch) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
+ if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
+ LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two NVPTX:GOTOs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == NVPTX::GOTO &&
+ LastInst->getOpcode() == NVPTX::GOTO) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != NVPTX::CBranch)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "NVPTX branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(NVPTX::CBranch))
+ .addReg(Cond[0].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, DL, get(NVPTX::CBranch))
+ .addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
+ return 2;
+}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
new file mode 100644
index 000000000000..7b8e218b05b6
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -0,0 +1,83 @@
+//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the niversity of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXINSTRUCTIONINFO_H
+#define NVPTXINSTRUCTIONINFO_H
+
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "NVPTXGenInstrInfo.inc"
+
+namespace llvm {
+
+class NVPTXInstrInfo : public NVPTXGenInstrInfo
+{
+ NVPTXTargetMachine &TM;
+ const NVPTXRegisterInfo RegInfo;
+public:
+ explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
+
+ virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
+ /* The following virtual functions are used in register allocation.
+ * They are not implemented because the existing interface and the logic
+ * at the caller side do not work for the elementized vector load and store.
+ *
+ * virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ * int &FrameIndex) const;
+ * virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ * int &FrameIndex) const;
+ * virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ * MachineBasicBlock::iterator MBBI,
+ * unsigned SrcReg, bool isKill, int FrameIndex,
+ * const TargetRegisterClass *RC) const;
+ * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ * MachineBasicBlock::iterator MBBI,
+ * unsigned DestReg, int FrameIndex,
+ * const TargetRegisterClass *RC) const;
+ */
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const ;
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg,
+ unsigned &DestReg) const;
+ bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
+ bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
+ bool isReadSpecialReg(MachineInstr &MI) const;
+
+ virtual bool CanTailMerge(const MachineInstr *MI) const ;
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
+ return MI.getOperand(2).getImm();
+ }
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
new file mode 100644
index 000000000000..8a410b872925
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -0,0 +1,2837 @@
+//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "NVPTXInstrFormats.td"
+
+// A NOP instruction
+def NOP : NVPTXInst<(outs), (ins), "", []>;
+
+// List of vector specific properties
+def isVecLD : VecInstTypeEnum<1>;
+def isVecST : VecInstTypeEnum<2>;
+def isVecBuild : VecInstTypeEnum<3>;
+def isVecShuffle : VecInstTypeEnum<4>;
+def isVecExtract : VecInstTypeEnum<5>;
+def isVecInsert : VecInstTypeEnum<6>;
+def isVecDest : VecInstTypeEnum<7>;
+def isVecOther : VecInstTypeEnum<15>;
+
+//===----------------------------------------------------------------------===//
+// NVPTX Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+def brtarget : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// NVPTX Instruction Predicate Definitions
+//===----------------------------------------------------------------------===//
+
+
+def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
+def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
+def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
+def useAtomRedG32forGen32 :
+ Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
+def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
+def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
+def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
+def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
+def useAtomRedG64forGen64 :
+ Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
+def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
+def hasVote : Predicate<"Subtarget.hasVote()">;
+def hasDouble : Predicate<"Subtarget.hasDouble()">;
+def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDU : Predicate<"Subtarget.hasLDU()">;
+def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
+
+def doF32FTZ : Predicate<"UseF32FTZ">;
+
+def doFMAF32 : Predicate<"doFMAF32">;
+def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
+def doFMAF32AGG : Predicate<"doFMAF32AGG">;
+def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
+def doFMAF64 : Predicate<"doFMAF64">;
+def doFMAF64AGG : Predicate<"doFMAF64AGG">;
+def doFMADF32 : Predicate<"doFMADF32">;
+def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
+
+def doMulWide : Predicate<"doMulWide">;
+
+def allowFMA : Predicate<"allowFMA">;
+def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
+
+def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
+def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+
+def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
+
+def true : Predicate<"1">;
+
+//===----------------------------------------------------------------------===//
+// Special Handling for 8-bit Operands and Operations
+//
+// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
+// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
+// 8-bit registers.
+//
+// PTX ld, st and cvt instructions permit source and destination data operands
+// to be wider than the instruction-type size, so that narrow values may be
+// loaded, stored, and converted using regular-width registers.
+//
+// So in PTX generation, we
+// - always use 16-bit registers in place in 8-bit registers.
+// (8-bit variables should stay as 8-bit as they represent memory layout.)
+// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
+// before operation
+// . div
+// . rem
+// . neg (sign)
+// . set, setp
+// . shr
+//
+// We are patching the operations by inserting the cvt instructions in the
+// asm strings of the affected instructions.
+//
+// Since vector operations, except for ld/st, are eventually elementized. We
+// do not need to special-hand the vector 8-bit operations.
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// cvt.s16.s8 %temp2, %b;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp2;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
+}
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// mov.b16 %temp2, %b;
+// cvt.s16.s8 %temp2, %temp2;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .",
+ !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
+ !strconcat("mov.b16 \t%temp2, $b;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
+}
+
+// Generate string block like
+// {
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// mov.b16 %temp1, %b;
+// cvt.s16.s8 %temp1, %temp1;
+// cvt.s16.s8 %temp2, %a;
+// opc.s16 %dst, %temp1, %temp2;
+// }
+// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
+class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp1;\n\t",
+ !strconcat(".reg .", !strconcat(TypeStr,
+ !strconcat(" \t%temp2;\n\t",
+ !strconcat("mov.b16 \t%temp1, $a;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
+ !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Some Common Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+multiclass I3<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
+}
+
+multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
+}
+
+multiclass I3_noi8<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
+}
+
+multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+}
+
+multiclass F3<string OpcStr, SDNode OpNode> {
+ def f64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+ def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA_ftz]>;
+ def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA_ftz]>;
+ def f32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[allowFMA]>;
+ def f32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[allowFMA]>;
+}
+
+multiclass F3_rn<string OpcStr, SDNode OpNode> {
+ def f64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
+ [(set Float64Regs:$dst,
+ (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
+ [(set Float32Regs:$dst,
+ (OpNode Float32Regs:$a, fpimm:$b))]>;
+}
+
+multiclass F2<string OpcStr, SDNode OpNode> {
+ def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
+ !strconcat(OpcStr, ".f64 \t$dst, $a;"),
+ [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
+ def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
+ Requires<[doF32FTZ]>;
+ def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ !strconcat(OpcStr, ".f32 \t$dst, $a;"),
+ [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// NVPTX Instructions.
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------
+// Integer Arithmetic
+//-----------------------------------
+
+multiclass ADD_SUB_i1<SDNode OpNode> {
+ def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
+}
+
+defm ADD_i1 : ADD_SUB_i1<add>;
+defm SUB_i1 : ADD_SUB_i1<sub>;
+
+
+defm ADD : I3<"add.s", add>;
+defm SUB : I3<"sub.s", sub>;
+
+defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
+defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
+
+defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
+defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
+
+//mul.wide PTX instruction
+def SInt32Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isSignedIntN(32))
+ return true;
+ return false;
+}]>;
+
+def UInt32Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isIntN(32))
+ return true;
+ return false;
+}]>;
+
+def SInt16Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isSignedIntN(16))
+ return true;
+ return false;
+}]>;
+
+def UInt16Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ if (v.isIntN(16))
+ return true;
+ return false;
+}]>;
+
+def Int5Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ // Check if 0 <= v < 32
+ // Only then the result from (x << v) will be i32
+ if (v.sge(0) && v.slt(32))
+ return true;
+ return false;
+}]>;
+
+def Int4Const : PatLeaf<(imm), [{
+ const APInt &v = N->getAPIntValue();
+ // Check if 0 <= v < 16
+ // Only then the result from (x << v) will be i16
+ if (v.sge(0) && v.slt(16))
+ return true;
+ return false;
+}]>;
+
+def SHL2MUL32 : SDNodeXForm<imm, [{
+ const APInt &v = N->getAPIntValue();
+ APInt temp(32, 1);
+ return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
+}]>;
+
+def SHL2MUL16 : SDNodeXForm<imm, [{
+ const APInt &v = N->getAPIntValue();
+ APInt temp(16, 1);
+ return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
+}]>;
+
+def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+
+def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int32Regs:$a, i64imm:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+
+def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+
+def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+
+def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
+ (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ Requires<[doMulWide]>;
+def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
+ (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ Requires<[doMulWide]>;
+def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
+ (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
+ (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+defm MULT : I3<"mul.lo.s", mul>;
+
+defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
+defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
+def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s16 temp1; \n\t",
+ !strconcat(".reg \t.s16 temp2; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
+ !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", "")))))))),
+ [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
+def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s16 temp1; \n\t",
+ !strconcat(".reg \t.s16 temp2; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
+ !strconcat("mov.b16 \ttemp2, $b; \n\t",
+ !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
+ !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
+def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.u16 temp1; \n\t",
+ !strconcat(".reg \t.u16 temp2; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
+ !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", "")))))))),
+ [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
+def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.u16 temp1; \n\t",
+ !strconcat(".reg \t.u16 temp2; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
+ !strconcat("mov.b16 \ttemp2, $b; \n\t",
+ !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
+ !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
+ !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
+
+
+defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
+defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
+
+defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
+// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
+defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
+// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
+
+def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
+ Int8Regs:$c))]>;
+def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
+ imm:$c))]>;
+def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
+ Int8Regs:$c))]>;
+def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
+ imm:$c))]>;
+
+def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
+def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
+def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add
+ (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
+def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
+ "mad.lo.s16 \t$dst, $a, $b, $c;",
+ [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
+ imm:$c))]>;
+
+def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
+def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
+def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
+def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
+ "mad.lo.s32 \t$dst, $a, $b, $c;",
+ [(set Int32Regs:$dst, (add
+ (mul Int32Regs:$a, imm:$b), imm:$c))]>;
+
+def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
+def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
+def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
+def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
+ "mad.lo.s64 \t$dst, $a, $b, $c;",
+ [(set Int64Regs:$dst, (add
+ (mul Int64Regs:$a, imm:$b), imm:$c))]>;
+
+
+def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
+ "neg.s16 \t$dst, $dst;"),
+ [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
+def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "neg.s16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
+def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "neg.s32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
+def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "neg.s64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
+
+//-----------------------------------
+// Floating Point Arithmetic
+//-----------------------------------
+
+// Constant 1.0f
+def FloatConst1 : PatLeaf<(fpimm), [{
+ if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
+ return false;
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==1.0f);
+}]>;
+// Constand (double)1.0
+def DoubleConst1 : PatLeaf<(fpimm), [{
+ if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
+ return false;
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==1.0);
+}]>;
+
+defm FADD : F3<"add", fadd>;
+defm FSUB : F3<"sub", fsub>;
+defm FMUL : F3<"mul", fmul>;
+
+defm FADD_rn : F3_rn<"add", fadd>;
+defm FSUB_rn : F3_rn<"sub", fsub>;
+defm FMUL_rn : F3_rn<"mul", fmul>;
+
+defm FABS : F2<"abs", fabs>;
+defm FNEG : F2<"neg", fneg>;
+defm FSQRT : F2<"sqrt.rn", fsqrt>;
+
+//
+// F64 division
+//
+def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b),
+ "rcp.rn.f64 \t$dst, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
+def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv Float64Regs:$a, Float64Regs:$b))]>;
+def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ "div.rn.f64 \t$dst, $a, $b;",
+ [(set Float64Regs:$dst,
+ (fdiv Float64Regs:$a, fpimm:$b))]>;
+
+//
+// F32 Approximate reciprocal
+//
+def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+//
+// F32 Approximate division
+//
+def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX, doF32FTZ]>;
+def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.approx.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_APPROX]>;
+//
+// F32 Semi-accurate reciprocal
+//
+// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
+//
+def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.approx.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+//
+// F32 Semi-accurate division
+//
+def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL, doF32FTZ]>;
+def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.full.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[do_DIVF32_FULL]>;
+//
+// F32 Accurate reciprocal
+//
+def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.ftz.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20, doF32FTZ]>;
+def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ "rcp.rn.f32 \t$dst, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv FloatConst1:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+//
+// F32 Accurate division
+//
+def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.ftz.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ, reqPTX20]>;
+def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[reqPTX20]>;
+def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ "div.rn.f32 \t$dst, $a, $b;",
+ [(set Float32Regs:$dst,
+ (fdiv Float32Regs:$a, fpimm:$b))]>,
+ Requires<[reqPTX20]>;
+
+
+multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
+ def rrr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, Float32Regs:$b),
+ Float32Regs:$c))]>, Requires<[Pred]>;
+ // This is to WAR a weird bug in Tablegen that does not automatically
+ // generate the following permutated rule rrr2 from the above rrr.
+ // So we explicitly add it here. This happens to FMA32 only.
+ // See the comments at FMAD32 and FMA32 for more information.
+ def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd Float32Regs:$c,
+ (fmul Float32Regs:$a, Float32Regs:$b)))]>,
+ Requires<[Pred]>;
+ def rri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+ def rir : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float32Regs:$dst, (fadd
+ (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+}
+
+multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
+ def rrr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, Float64Regs:$b),
+ Float64Regs:$c))]>, Requires<[Pred]>;
+ def rri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
+ Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
+ def rir : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set Float64Regs:$dst, (fadd
+ (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
+ Requires<[Pred]>;
+}
+
+// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
+// automatically generate the rrr2 rule from
+// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
+// If we reverse the order of the following two lines, then rrr2 rule will be
+// generated for FMA32, but not for rrr.
+// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
+defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
+defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
+defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
+defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
+defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
+
+// b*c-a => fmad(b, c, -a)
+multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
+ (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+// a-b*c => fmad(-b,c, a)
+// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
+// b*c-a => fmad(b, c, -a)
+// - legal because b*c-a <=> b*c+(-a)
+multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
+ (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
+ Requires<[Pred]>;
+ def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
+ (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+// a-b*c => fmad(-b,c, a)
+// b*c-a => fmad(b, c, -a)
+multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
+ (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
+ (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
+ Requires<[Pred]>;
+}
+
+defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
+defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
+defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
+defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
+defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
+
+def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "sin.approx.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
+def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "cos.approx.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
+
+//-----------------------------------
+// Logical Arithmetic
+//-----------------------------------
+
+multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
+ def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
+ def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
+ !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
+ def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int16Regs:$b))]>;
+ def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int64Regs:$b))]>;
+ def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+}
+
+defm OR : LOG_FORMAT<"or", or>;
+defm AND : LOG_FORMAT<"and", and>;
+defm XOR : LOG_FORMAT<"xor", xor>;
+
+def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
+ "not.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
+def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ "not.b16 \t$dst, $src;",
+ [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
+def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "not.b16 \t$dst, $src;",
+ [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
+def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ "not.b32 \t$dst, $src;",
+ [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
+def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ "not.b64 \t$dst, $src;",
+ [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
+
+// For shifts, the second src operand must be 32-bit value
+multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int32Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
+ (i32 imm:$b)))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int32Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ (i32 imm:$b)))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ Int32Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ (i32 imm:$b)))]>;
+}
+
+defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
+
+// For shifts, the second src operand must be 32-bit value
+// Need to add cvt for the 8-bits.
+multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
+ def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ Int32Regs:$b))]>;
+ def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ Int32Regs:$b))]>;
+ def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
+ (i32 imm:$b)))]>;
+ def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
+ (i32 imm:$b)))]>;
+ def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ Int32Regs:$b))]>;
+ def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
+ (i32 imm:$b)))]>;
+ def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
+ !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ Int32Regs:$b))]>;
+ def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
+ !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
+ !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
+ [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
+ (i32 imm:$b)))]>;
+}
+
+defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
+defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
+
+// 32bit
+def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
+ !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))),
+ []>;
+
+def SUB_FRM_32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
+}]>;
+
+def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
+def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
+
+def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat(".reg .b32 %amt2;\n\t",
+ !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
+ !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %lhs;\n\t",
+ !strconcat(".reg .b32 %rhs;\n\t",
+ !strconcat(".reg .b32 %amt2;\n\t",
+ !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
+ !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
+
+// 64bit
+def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ i32imm:$amt1, i32imm:$amt2),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
+ !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))),
+ []>;
+
+def SUB_FRM_64 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
+}]>;
+
+def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
+def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
+
+def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat(".reg .u32 %amt2;\n\t",
+ !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
+ !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
+
+def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
+ Int32Regs:$amt),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b64 %lhs;\n\t",
+ !strconcat(".reg .b64 %rhs;\n\t",
+ !strconcat(".reg .u32 %amt2;\n\t",
+ !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
+ !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
+ !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
+ !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
+ !strconcat("}}", ""))))))))),
+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+
+
+//-----------------------------------
+// Data Movement (Load / Store, Move)
+//-----------------------------------
+
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
+ [SDNPWantRoot]>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
+ [SDNPWantRoot]>;
+
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops Int32Regs, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops Int64Regs, i64imm);
+}
+
+def imem : Operand<iPTR> {
+ let PrintMethod = "printOperand";
+}
+
+def imemAny : Operand<iPTRAny> {
+ let PrintMethod = "printOperand";
+}
+
+def LdStCode : Operand<i32> {
+ let PrintMethod = "printLdStCode";
+}
+
+def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
+
+def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
+ "mov.u32 \t$dst, $a;",
+ [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+
+def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
+ "mov.u64 \t$dst, $a;",
+ [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
+
+// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
+let IsSimpleMove=1 in {
+def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
+ "mov.pred \t$dst, $sss;", []>;
+def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
+ "mov.u16 \t$dst, $sss;", []>;
+def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
+ "mov.u32 \t$dst, $sss;", []>;
+def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
+ "mov.u64 \t$dst, $sss;", []>;
+
+def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+ "mov.f32 \t$dst, $src;", []>;
+def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
+ "mov.f64 \t$dst, $src;", []>;
+}
+def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
+ "mov.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, imm:$src)]>;
+def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int8Regs:$dst, imm:$src)]>;
+def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int16Regs:$dst, imm:$src)]>;
+def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.u32 \t$dst, $src;",
+ [(set Int32Regs:$dst, imm:$src)]>;
+def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.u64 \t$dst, $src;",
+ [(set Int64Regs:$dst, imm:$src)]>;
+
+def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
+ "mov.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, fpimm:$src)]>;
+def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
+ "mov.f64 \t$dst, $src;",
+ [(set Float64Regs:$dst, fpimm:$src)]>;
+
+def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
+
+//---- Copy Frame Index ----
+def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
+ "add.u32 \t$dst, ${addr:add};",
+ [(set Int32Regs:$dst, ADDRri:$addr)]>;
+def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
+ "add.u64 \t$dst, ${addr:add};",
+ [(set Int64Regs:$dst, ADDRri64:$addr)]>;
+
+//-----------------------------------
+// Comparison and Selection
+//-----------------------------------
+
+// Generate string block like
+// {
+// .reg .pred p;
+// setp.gt.s16 p, %a, %b;
+// selp.s16 %dst, -1, 0, p;
+// }
+// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
+class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
+ string b> {
+ string t1 = "{{\n\t.reg .pred p;\n\t";
+ string t2 = !strconcat(t1 , OpcStr);
+ string t3 = !strconcat(t2 , sz1);
+ string t4 = !strconcat(t3 , " \tp, ");
+ string t5 = !strconcat(t4 , a);
+ string t6 = !strconcat(t5 , ", ");
+ string t7 = !strconcat(t6 , b);
+ string t8 = !strconcat(t7 , ";\n\tselp.s");
+ string t9 = !strconcat(t8 , sz2);
+ string t10 = !strconcat(t9, " \t");
+ string t11 = !strconcat(t10, d);
+ string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
+}
+
+// Generate string block like
+// {
+// .reg .pred p;
+// .reg .s16 %temp1;
+// .reg .s16 %temp2;
+// cvt.s16.s8 %temp1, %a;
+// cvt s16.s8 %temp1, %b;
+// setp.gt.s16 p, %temp1, %temp2;
+// selp.s16 %dst, -1, 0, p;
+// }
+// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
+class Set_Stri8<string OpcStr, string d, string a, string b, string type,
+ string cvt> {
+ string t1 = "{{\n\t.reg .pred p;\n\t";
+ string t2 = !strconcat(t1, ".reg .");
+ string t3 = !strconcat(t2, type);
+ string t4 = !strconcat(t3, " %temp1;\n\t");
+ string t5 = !strconcat(t4, ".reg .");
+ string t6 = !strconcat(t5, type);
+ string t7 = !strconcat(t6, " %temp2;\n\t");
+ string t8 = !strconcat(t7, cvt);
+ string t9 = !strconcat(t8, " \t%temp1, ");
+ string t10 = !strconcat(t9, a);
+ string t11 = !strconcat(t10, ";\n\t");
+ string t12 = !strconcat(t11, cvt);
+ string t13 = !strconcat(t12, " \t%temp2, ");
+ string t14 = !strconcat(t13, b);
+ string t15 = !strconcat(t14, ";\n\t");
+ string t16 = !strconcat(t15, OpcStr);
+ string t17 = !strconcat(t16, "16");
+ string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
+ string t19 = !strconcat(t18, "selp.s16 \t");
+ string t20 = !strconcat(t19, d);
+ string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
+}
+
+multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
+ string TypeStr, string CVTStr> {
+ def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
+ []>;
+ def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
+ Int16Regs:$b),
+ Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
+ []>;
+ def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
+ []>;
+ def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
+ Int64Regs:$b),
+ Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
+ []>;
+
+ def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
+ Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
+ def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
+ !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
+ def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
+ !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
+ def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
+ def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
+ !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
+
+ def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
+ Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
+ [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
+ def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
+ Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
+ [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
+ def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
+ Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
+ def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
+ Int16Regs:$b),
+ !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
+ def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
+ def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
+ !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
+ def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
+ Int32Regs:$b),
+ !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
+ !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
+ def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
+ Int64Regs:$b),
+ !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
+ def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
+ def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
+ !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
+}
+
+multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
+ def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
+ Float32Regs:$b),
+ Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
+ []>, Requires<[doF32FTZ]>;
+ def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
+ Float32Regs:$b),
+ Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
+ []>;
+ def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
+ Float64Regs:$b),
+ Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
+ []>;
+ def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
+ Float64Regs:$b),
+ Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
+ []>;
+
+ def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
+ , Float32Regs:$b),
+ !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
+ , Requires<[doF32FTZ]>;
+ def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+ def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
+ Requires<[doF32FTZ]>;
+ def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
+ def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
+ !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
+ [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
+
+ def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+ def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b),
+ !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+ def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
+ def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b),
+ !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
+ def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b),
+ !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b),
+ !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
+ def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b),
+ !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
+}
+
+defm ISetSGT
+: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
+defm ISetUGT
+: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
+defm ISetSLT
+: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
+defm ISetULT
+: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
+defm ISetSGE
+: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
+defm ISetUGE
+: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
+defm ISetSLE
+: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
+defm ISetULE
+: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
+defm ISetSEQ
+: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
+defm ISetUEQ
+: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
+defm ISetSNE
+: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
+defm ISetUNE
+: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
+
+def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
+def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ "xor.pred \t$dst, $a, $b;",
+ [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
+def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
+ [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
+def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
+ [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
+
+// Compare 2 i1's and produce a u32
+def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
+ [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
+def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int1Regs:$a, Int1Regs:$b),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .pred temp;\n\t",
+ !strconcat("xor.pred \ttemp, $a, $b;\n\t",
+ !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
+ [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
+
+defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
+defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
+defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
+defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
+defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
+defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
+
+defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
+defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
+defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
+defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
+defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
+defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
+
+defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
+defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
+
+def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
+ (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
+ (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
+def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
+def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
+ (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
+def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
+ (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
+def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
+ (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
+def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
+def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
+ (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
+def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
+ (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
+ "selp.b16 \t$dst, $a, $b, $p;",
+ [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
+def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
+def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
+ (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
+def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
+ (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
+def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
+ (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
+def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
+ (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
+def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
+ (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
+ "selp.b64 \t$dst, $a, $b, $p;",
+ [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
+
+def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst,
+ (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
+def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
+ (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
+def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
+def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
+ (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
+ "selp.f32 \t$dst, $a, $b, $p;",
+ [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
+
+def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
+ "selp.f64 \t$dst, $a, $b, $p;",
+ [(set Float64Regs:$dst,
+ (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
+def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
+ (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
+ "selp.f64 \t$dst, $a, $b, $p;",
+ [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
+def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
+ "selp.f64 \t$dst, $a, $b, $p;",
+ [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
+def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
+ (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
+ "selp.f64 \t $dst, $a, $b, $p;",
+ [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
+
+//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
+// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisInt<2>]>;
+def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
+ SDTCisInt<1>, SDTCisInt<2>]>;
+def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
+def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
+def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
+def SDTCallValProfile : SDTypeProfile<1, 0, []>;
+def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
+def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
+def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
+def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
+
+def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
+ SDTDeclareScalarParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
+ SDTDeclareParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
+ [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
+def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
+ []>;
+def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
+ SDTPseudoUseParamProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
+ !strconcat(!strconcat("ld.param", opstr),
+ "\t$dst, [retval0+$b];"),
+ [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+
+class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
+ !strconcat(!strconcat("mov", opstr),
+ "\t$dst, retval$b;"),
+ [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
+
+class StoreParamInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[param$a+$b], $val;"),
+ [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
+
+class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("mov", opstr),
+ "\tparam$a, $val;"),
+ [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
+
+class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[func_retval0+$a], $val;"),
+ [(StoreRetval (i32 imm:$a), regclass:$val)]>;
+
+class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
+ !strconcat(!strconcat("mov", opstr),
+ "\tfunc_retval$num, $val;"),
+ [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
+
+class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs), (ins regclass:$val),
+ !strconcat(!strconcat("mov", opstr),
+ "\tfunc_retval0, $val;"),
+ [(MoveRetval regclass:$val)]>;
+
+def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
+"call (retval0), ",
+ [(PrintCall (i32 1))]>;
+def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1), ",
+ [(PrintCall (i32 2))]>;
+def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2), ",
+ [(PrintCall (i32 3))]>;
+def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3), ",
+ [(PrintCall (i32 4))]>;
+def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3, retval4), ",
+ [(PrintCall (i32 5))]>;
+def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
+ [(PrintCall (i32 6))]>;
+def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
+"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
+ [(PrintCall (i32 7))]>;
+def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
+!strconcat("call (retval0, retval1, retval2, retval3, retval4",
+ ", retval5, retval6, retval7), "),
+ [(PrintCall (i32 8))]>;
+
+def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
+ [(PrintCall (i32 0))]>;
+
+def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
+"call.uni (retval0), ",
+ [(PrintCallUni (i32 1))]>;
+def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1), ",
+ [(PrintCallUni (i32 2))]>;
+def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2), ",
+ [(PrintCallUni (i32 3))]>;
+def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3), ",
+ [(PrintCallUni (i32 4))]>;
+def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3, retval4), ",
+ [(PrintCallUni (i32 5))]>;
+def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
+ [(PrintCallUni (i32 6))]>;
+def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
+"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
+ [(PrintCallUni (i32 7))]>;
+def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
+!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
+ ", retval5, retval6, retval7), "),
+ [(PrintCallUni (i32 8))]>;
+
+def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
+ [(PrintCallUni (i32 0))]>;
+
+def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
+def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
+def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
+def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">;
+
+//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
+// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
+// "cvt.u16.u32\t$dst, temp_param_reg;"),
+// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
+// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
+// "cvt.u16.u32\t$dst, temp_param_reg;"),
+// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+
+def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
+def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
+
+def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
+def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
+def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
+ "cvt.u16.u32\t$dst, retval$b;",
+ [(set Int16Regs:$dst,
+ (LoadParam (i32 0), (i32 imm:$b)))]>;
+def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
+ "cvt.u16.u32\t$dst, retval$b;",
+ [(set Int8Regs:$dst,
+ (LoadParam (i32 0), (i32 imm:$b)))]>;
+
+def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
+def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
+
+def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
+def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
+
+def StoreParamI16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ "st.param.b16\t[param$a+$b], $val;",
+ [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+
+def StoreParamI8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ "st.param.b8\t[param$a+$b], $val;",
+ [(StoreParam
+ (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+
+def StoreParamS32I16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+def StoreParamU32I16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+
+def StoreParamU32I8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+def StoreParamS32I8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
+ "st.param.b32\t[param$a+$b], temp_param_reg;"),
+ [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+
+def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
+def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
+
+def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
+def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
+def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">;
+def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">;
+def MoveToParamI16 : NVPTXInst<(outs),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
+ "mov.b32\tparam$a, temp_param_reg;"),
+ [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+def MoveToParamI8 : NVPTXInst<(outs),
+ (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
+ "mov.b32\tparam$a, temp_param_reg;"),
+ [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+
+def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
+def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
+def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
+def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">;
+
+//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
+// !strconcat("\{\n\t",
+// !strconcat(".reg .b32 temp_retval_reg;\n\t",
+// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
+// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
+// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
+//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
+// !strconcat("\{\n\t",
+// !strconcat(".reg .b32 temp_retval_reg;\n\t",
+// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
+// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
+// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
+
+def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
+def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
+
+def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
+def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
+def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
+def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">;
+def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
+def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
+
+def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">;
+def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">;
+def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">;
+def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
+def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
+ "cvt.u32.u16\tfunc_retval$num, $val;",
+ [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
+def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
+ "cvt.u32.u16\tfunc_retval$num, $val;",
+ [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
+
+def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
+def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
+def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
+def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
+
+class CallArgInst<NVPTXRegClass regclass> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a, ",
+ [(CallArg (i32 0), regclass:$a)]>;
+
+class LastCallArgInst<NVPTXRegClass regclass> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a",
+ [(LastCallArg (i32 0), regclass:$a)]>;
+
+def CallArgI64 : CallArgInst<Int64Regs>;
+def CallArgI32 : CallArgInst<Int32Regs>;
+def CallArgI16 : CallArgInst<Int16Regs>;
+def CallArgI8 : CallArgInst<Int8Regs>;
+
+def CallArgF64 : CallArgInst<Float64Regs>;
+def CallArgF32 : CallArgInst<Float32Regs>;
+
+def LastCallArgI64 : LastCallArgInst<Int64Regs>;
+def LastCallArgI32 : LastCallArgInst<Int32Regs>;
+def LastCallArgI16 : LastCallArgInst<Int16Regs>;
+def LastCallArgI8 : LastCallArgInst<Int8Regs>;
+
+def LastCallArgF64 : LastCallArgInst<Float64Regs>;
+def LastCallArgF32 : LastCallArgInst<Float32Regs>;
+
+def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
+ [(CallArg (i32 0), (i32 imm:$a))]>;
+def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
+ [(LastCallArg (i32 0), (i32 imm:$a))]>;
+
+def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
+ [(CallArg (i32 1), (i32 imm:$a))]>;
+def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
+ [(LastCallArg (i32 1), (i32 imm:$a))]>;
+
+def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
+ "$addr, ",
+ [(CallVoid (Wrapper tglobaladdr:$addr))]>;
+def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
+ "$addr, ",
+ [(CallVoid Int32Regs:$addr)]>;
+def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
+ "$addr, ",
+ [(CallVoid Int64Regs:$addr)]>;
+def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
+ ", prototype_$val;",
+ [(Prototype (i32 imm:$val))]>;
+
+def DeclareRetMemInst : NVPTXInst<(outs),
+ (ins i32imm:$align, i32imm:$size, i32imm:$num),
+ ".param .align $align .b8 retval$num[$size];",
+ [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
+def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
+ ".param .b$size retval$num;",
+ [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
+def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
+ ".reg .b$size retval$num;",
+ [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
+
+def DeclareParamInst : NVPTXInst<(outs),
+ (ins i32imm:$align, i32imm:$a, i32imm:$size),
+ ".param .align $align .b8 param$a[$size];",
+ [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
+def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
+ ".param .b$size param$a;",
+ [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
+def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
+ ".reg .b$size param$a;",
+ [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
+
+class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
+ NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+ !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
+ [(set regclass:$dst, (MoveParam regclass:$src))]>;
+
+def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
+def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
+def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ "cvt.u16.u32\t$dst, $src;",
+ [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
+def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
+ "cvt.u16.u32\t$dst, $src;",
+ [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
+def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
+def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
+
+class PseudoUseParamInst<NVPTXRegClass regclass> :
+ NVPTXInst<(outs), (ins regclass:$src),
+ "// Pseudo use of $src",
+ [(PseudoUseParam regclass:$src)]>;
+
+def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
+def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
+def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
+def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>;
+def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
+def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+
+
+//
+// Load / Store Handling
+//
+multiclass LD<NVPTXRegClass regclass> {
+ def _avar : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr];"), []>;
+ def _areg : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr];"), []>;
+ def _ari : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr+$offset];"), []>;
+ def _asi : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t$dst, [$addr+$offset];"), []>;
+}
+
+let mayLoad=1, neverHasSideEffects=1 in {
+defm LD_i8 : LD<Int8Regs>;
+defm LD_i16 : LD<Int16Regs>;
+defm LD_i32 : LD<Int32Regs>;
+defm LD_i64 : LD<Int64Regs>;
+defm LD_f32 : LD<Float32Regs>;
+defm LD_f64 : LD<Float64Regs>;
+}
+
+let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
+defm LD_v2i8 : LD<V2I8Regs>;
+defm LD_v4i8 : LD<V4I8Regs>;
+defm LD_v2i16 : LD<V2I16Regs>;
+defm LD_v4i16 : LD<V4I16Regs>;
+defm LD_v2i32 : LD<V2I32Regs>;
+defm LD_v4i32 : LD<V4I32Regs>;
+defm LD_v2f32 : LD<V2F32Regs>;
+defm LD_v4f32 : LD<V4F32Regs>;
+defm LD_v2i64 : LD<V2I64Regs>;
+defm LD_v2f64 : LD<V2F64Regs>;
+}
+
+multiclass ST<NVPTXRegClass regclass> {
+ def _avar : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr], $src;"), []>;
+ def _areg : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr], $src;"), []>;
+ def _ari : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr+$offset], $src;"), []>;
+ def _asi : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
+!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
+ " \t[$addr+$offset], $src;"), []>;
+}
+
+let mayStore=1, neverHasSideEffects=1 in {
+defm ST_i8 : ST<Int8Regs>;
+defm ST_i16 : ST<Int16Regs>;
+defm ST_i32 : ST<Int32Regs>;
+defm ST_i64 : ST<Int64Regs>;
+defm ST_f32 : ST<Float32Regs>;
+defm ST_f64 : ST<Float64Regs>;
+}
+
+let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
+defm ST_v2i8 : ST<V2I8Regs>;
+defm ST_v4i8 : ST<V4I8Regs>;
+defm ST_v2i16 : ST<V2I16Regs>;
+defm ST_v4i16 : ST<V4I16Regs>;
+defm ST_v2i32 : ST<V2I32Regs>;
+defm ST_v4i32 : ST<V4I32Regs>;
+defm ST_v2f32 : ST<V2F32Regs>;
+defm ST_v4f32 : ST<V4F32Regs>;
+defm ST_v2i64 : ST<V2I64Regs>;
+defm ST_v2f64 : ST<V2F64Regs>;
+}
+
+// The following is used only in and after vector elementizations.
+// Vector elementization happens at the machine instruction level, so the
+// following instruction
+// never appears in the DAG.
+multiclass LD_VEC<NVPTXRegClass regclass> {
+ def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
+ def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
+}
+let mayLoad=1, neverHasSideEffects=1 in {
+defm LDV_i8 : LD_VEC<Int8Regs>;
+defm LDV_i16 : LD_VEC<Int16Regs>;
+defm LDV_i32 : LD_VEC<Int32Regs>;
+defm LDV_i64 : LD_VEC<Int64Regs>;
+defm LDV_f32 : LD_VEC<Float32Regs>;
+defm LDV_f64 : LD_VEC<Float64Regs>;
+}
+
+multiclass ST_VEC<NVPTXRegClass regclass> {
+ def _v2_avar : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_areg : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_ari : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v2_asi : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v4_avar : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_areg : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_ari : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
+ def _v4_asi : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
+}
+let mayStore=1, neverHasSideEffects=1 in {
+defm STV_i8 : ST_VEC<Int8Regs>;
+defm STV_i16 : ST_VEC<Int16Regs>;
+defm STV_i32 : ST_VEC<Int32Regs>;
+defm STV_i64 : ST_VEC<Int64Regs>;
+defm STV_f32 : ST_VEC<Float32Regs>;
+defm STV_f64 : ST_VEC<Float64Regs>;
+}
+
+
+//---- Conversion ----
+
+multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
+// FIXME: need to add f16 support
+// def CVTf16i8 :
+// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
+// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
+// def CVTf16i16 :
+// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
+// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
+// def CVTf16i32 :
+// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
+// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
+// def CVTf16i64:
+// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
+// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
+// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
+
+ def CVTf32i1 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
+ "selp.f32 \t$d, 1.0, 0.0, $a;",
+ [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
+ def CVTf32i8 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
+ def CVTf32i16 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
+ def CVTf32i32 :
+ NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
+ def CVTf32i64:
+ NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
+ [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
+
+ def CVTf64i1 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
+ "selp.f64 \t$d, 1.0, 0.0, $a;",
+ [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
+ def CVTf64i8 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
+ def CVTf64i16 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
+ def CVTf64i32 :
+ NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
+ def CVTf64i64:
+ NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
+ !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
+ [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
+}
+
+defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
+defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
+
+multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
+// FIXME: need to add f16 support
+// def CVTi8f16:
+// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
+// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi8f32_ftz:
+ NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi8f32:
+ NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi8f64:
+ NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
+ [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
+
+// FIXME: need to add f16 support
+// def CVTi16f16:
+// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
+// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi16f32_ftz:
+ NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi16f32:
+ NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
+ [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi16f64:
+ NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
+ [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
+
+// FIXME: need to add f16 support
+// def CVTi32f16: def CVTi32f16:
+// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
+// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi32f32_ftz:
+ NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
+ [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi32f32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
+ [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi32f64:
+ NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
+ [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
+
+// FIXME: need to add f16 support
+// def CVTi64f16:
+// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
+// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
+// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
+ def CVTi64f32_ftz:
+ NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
+ [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+ def CVTi64f32:
+ NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
+ [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
+ def CVTi64f64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
+ !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
+ [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
+}
+
+defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
+defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
+
+multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
+ def ext1to8:
+ NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
+ "selp.u16 \t$d, 1, 0, $a;",
+ [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to16:
+ NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
+ "selp.u16 \t$d, 1, 0, $a;",
+ [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
+ "selp.u32 \t$d, 1, 0, $a;",
+ [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
+ "selp.u64 \t$d, 1, 0, $a;",
+ [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
+}
+
+multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
+ def ext1to8:
+ NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
+ "selp.s16 \t$d, -1, 0, $a;",
+ [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to16:
+ NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
+ "selp.s16 \t$d, -1, 0, $a;",
+ [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
+ "selp.s32 \t$d, -1, 0, $a;",
+ [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
+ def ext1to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
+ "selp.s64 \t$d, -1, 0, $a;",
+ [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
+}
+
+multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
+ // All Int8Regs are emiited as 16bit registers in ptx.
+ // And there is no selp.u8 in ptx.
+ def ext8to16:
+ NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
+ !strconcat(OpStr, "8 \t$d, $a;")))),
+ [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
+ def ext8to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
+ !strconcat(OpStr, "8 \t$d, $a;")))),
+ [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
+ def ext8to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
+ !strconcat(OpStr, "8 \t$d, $a;")))),
+ [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
+ def ext16to32:
+ NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
+ !strconcat(OpStr, "16 \t$d, $a;")))),
+ [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
+ def ext16to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
+ !strconcat(OpStr, "16 \t$d, $a;")))),
+ [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
+ def ext32to64:
+ NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
+ !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
+ !strconcat(OpStr, "32 \t$d, $a;")))),
+ [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
+}
+
+defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
+defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
+defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
+
+defm Sint_extend : INT_EXTEND <"s", sext>;
+defm Zint_extend : INT_EXTEND <"u", zext>;
+defm Aint_extend : INT_EXTEND <"u", anyext>;
+
+class TRUNC_to1_asm<string sz> {
+ string s = !strconcat("{{\n\t",
+ !strconcat(".reg ",
+ !strconcat(sz,
+ !strconcat(" temp;\n\t",
+ !strconcat("and",
+ !strconcat(sz,
+ !strconcat("\t temp, $a, 1;\n\t",
+ !strconcat("setp",
+ !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
+}
+
+def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "cvt.u32.u64 \t$d, $a;",
+ [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
+ "cvt.u16.u64 \t$d, $a;",
+ [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
+ "cvt.u8.u64 \t$d, $a;",
+ [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
+ "cvt.u16.u32 \t$d, $a;",
+ [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
+def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
+ "cvt.u8.u32 \t$d, $a;",
+ [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
+def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
+ "cvt.u8.u16 \t$d, $a;",
+ [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
+def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ TRUNC_to1_asm<".b64">.s,
+ [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
+def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ TRUNC_to1_asm<".b32">.s,
+ [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
+def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
+ TRUNC_to1_asm<".b16">.s,
+ [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
+def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
+ TRUNC_to1_asm<".b16">.s,
+ [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
+
+// Select instructions
+def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
+ (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
+ (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
+ (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
+ (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
+ (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
+ (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
+ (TRUNC_32to1 Int32Regs:$pred))>;
+
+class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
+ NVPTXRegClass regclassOut> :
+ NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
+ !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
+ [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
+
+def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
+def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
+def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
+def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
+
+// pack a set of smaller int registers to a larger int register
+def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
+ (ins Int8Regs:$s1, Int8Regs:$s2,
+ Int8Regs:$s3, Int8Regs:$s4),
+ !strconcat("{{\n\t.reg .b8\t%t<4>;",
+ !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
+ !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
+ !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
+ !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
+ "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
+ []>;
+def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
+ (ins Int16Regs:$s1, Int16Regs:$s2,
+ Int16Regs:$s3, Int16Regs:$s4),
+ "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
+ []>;
+def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
+ (ins Int8Regs:$s1, Int8Regs:$s2),
+ !strconcat("{{\n\t.reg .b8\t%t<2>;",
+ !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
+ !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
+ "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
+ []>;
+def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
+ (ins Int16Regs:$s1, Int16Regs:$s2),
+ "mov.b32\t$d, {{$s1, $s2}};",
+ []>;
+def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
+ (ins Int32Regs:$s1, Int32Regs:$s2),
+ "mov.b64\t$d, {{$s1, $s2}};",
+ []>;
+def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
+ (ins Float32Regs:$s1, Float32Regs:$s2),
+ "mov.b64\t$d, {{$s1, $s2}};",
+ []>;
+
+// unpack a larger int register to a set of smaller int registers
+def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
+ Int8Regs:$d3, Int8Regs:$d4),
+ (ins Int32Regs:$s),
+ !strconcat("{{\n\t.reg .b8\t%t<4>;",
+ !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
+ !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
+ !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
+ !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
+ "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
+ []>;
+def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
+ Int16Regs:$d3, Int16Regs:$d4),
+ (ins Int64Regs:$s),
+ "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
+ []>;
+def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
+ (ins Int16Regs:$s),
+ !strconcat("{{\n\t.reg .b8\t%t<2>;",
+ !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
+ !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
+ "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
+ []>;
+def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
+ (ins Int32Regs:$s),
+ "mov.b32\t{{$d1, $d2}}, $s;",
+ []>;
+def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
+ (ins Int64Regs:$s),
+ "mov.b64\t{{$d1, $d2}}, $s;",
+ []>;
+def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
+ (ins Float64Regs:$s),
+ "mov.b64\t{{$d1, $d2}}, $s;",
+ []>;
+
+def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
+ "cvt.rn.ftz.f32.f64 \t$d, $a;",
+ [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
+
+def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
+ "cvt.rn.f32.f64 \t$d, $a;",
+ [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
+
+def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
+ "cvt.ftz.f64.f32 \t$d, $a;",
+ [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
+
+def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
+ "cvt.f64.f32 \t$d, $a;",
+ [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
+
+def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//-----------------------------------
+// Control-flow
+//-----------------------------------
+
+let isTerminator=1 in {
+ let isReturn=1, isBarrier=1 in
+ def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
+
+ let isBranch=1 in
+ def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
+ "@$a bra \t$target;",
+ [(brcond Int1Regs:$a, bb:$target)]>;
+ let isBranch=1 in
+ def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
+ "@!$a bra \t$target;",
+ []>;
+
+ let isBranch=1, isBarrier=1 in
+ def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
+ "bra.uni \t$target;",
+ [(br bb:$target)]>;
+}
+
+def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
+ (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
+
+// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
+// conditional branch if
+// the target block is the next block so that the code can fall through to the
+// target block.
+// The invertion is done by 'xor condition, 1', which will be translated to
+// (setne condition, -1).
+// Since ptx supports '@!pred bra target', we should use it.
+def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
+ (CBranchOther Int1Regs:$a, bb:$target)>;
+
+// Call
+def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPSideEffect]>;
+
+def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def calltarget : Operand<i32>;
+let isCall=1 in {
+ def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
+ "call \t$dst, (1);", []>;
+}
+
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : NVPTXInst<outs, ins, asmstr, pattern>;
+
+// @TODO: We use some tricks here to emit curly braces. Can we clean this up
+// a bit without TableGen modifications?
+def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
+ "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
+ [(callseq_start timm:$amt)]>;
+def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "\n\t//{{\n\t}}// Callseq End $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+// trap instruction
+
+def trapinst : NVPTXInst<(outs), (ins),
+ "trap;",
+ [(trap)]>;
+
+include "NVPTXVector.td"
+
+include "NVPTXIntrinsics.td"
+
+
+//-----------------------------------
+// Notes
+//-----------------------------------
+// BSWAP is currently expanded. The following is a more efficient
+// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
+// - for sm_20, use pmpt (use vector scalar mov to get the pack and
+// unpack). sm_20 supports native 32-bit register, but not native 16-bit
+// register.
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
new file mode 100644
index 000000000000..028a94bfd1bb
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -0,0 +1,1675 @@
+//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def immFloat0 : PatLeaf<(fpimm), [{
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==0.0f);
+}]>;
+
+def immFloat1 : PatLeaf<(fpimm), [{
+ float f = (float)N->getValueAPF().convertToFloat();
+ return (f==1.0f);
+}]>;
+
+def immDouble0 : PatLeaf<(fpimm), [{
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==0.0);
+}]>;
+
+def immDouble1 : PatLeaf<(fpimm), [{
+ double d = (double)N->getValueAPF().convertToDouble();
+ return (d==1.0);
+}]>;
+
+
+
+//-----------------------------------
+// Synchronization Functions
+//-----------------------------------
+def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins),
+ "bar.sync \t0;",
+ [(int_cuda_syncthreads)]>;
+def INT_BARRIER0 : NVPTXInst<(outs), (ins),
+ "bar.sync \t0;",
+ [(int_nvvm_barrier0)]>;
+def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg .pred \t%p1; \n\t",
+ !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
+ !strconcat("}}", ""))))),
+ [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
+def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg .pred \t%p1; \n\t",
+ !strconcat(".reg .pred \t%p2; \n\t",
+ !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
+ !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ !strconcat("}}", ""))))))),
+ [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
+def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg .pred \t%p1; \n\t",
+ !strconcat(".reg .pred \t%p2; \n\t",
+ !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
+ !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ !strconcat("}}", ""))))))),
+ [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
+
+
+//-----------------------------------
+// Explicit Memory Fence Functions
+//-----------------------------------
+class MEMBAR<string StrOp, Intrinsic IntOP> :
+ NVPTXInst<(outs), (ins),
+ StrOp, [(IntOP)]>;
+
+def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
+def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
+def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
+
+
+//-----------------------------------
+// Math Functions
+//-----------------------------------
+
+// Map min(1.0, max(0.0, x)) to sat(x)
+multiclass SAT<NVPTXRegClass regclass, Operand fimm, Intrinsic IntMinOp,
+ Intrinsic IntMaxOp, PatLeaf f0, PatLeaf f1, string OpStr> {
+
+ // fmin(1.0, fmax(0.0, x)) => sat(x)
+ def SAT11 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp f1:$srcf0 ,
+ (IntMaxOp f0:$srcf1, regclass:$src)))]>;
+
+ // fmin(1.0, fmax(x, 0.0)) => sat(x)
+ def SAT12 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp f1:$srcf0 ,
+ (IntMaxOp regclass:$src, f0:$srcf1)))]>;
+
+ // fmin(fmax(0.0, x), 1.0) => sat(x)
+ def SAT13 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp
+ (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>;
+
+ // fmin(fmax(x, 0.0), 1.0) => sat(x)
+ def SAT14 : NVPTXInst<(outs regclass:$dst),
+ (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
+ OpStr,
+ [(set regclass:$dst, (IntMinOp
+ (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>;
+
+}
+// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x
+// is NaN
+// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
+// Same story for fmax, fmin.
+
+defm SAT_fmin_fmax_f : SAT<Float32Regs, f32imm, int_nvvm_fmin_f,
+ int_nvvm_fmax_f, immFloat0, immFloat1,
+ "cvt.sat.f32.f32 \t$dst, $src; \n">;
+defm SAT_fmin_fmax_d : SAT<Float64Regs, f64imm, int_nvvm_fmin_d,
+ int_nvvm_fmax_d, immDouble0, immDouble1,
+ "cvt.sat.f64.f64 \t$dst, $src; \n">;
+
+
+// We need a full string for OpcStr here because we need to deal with case like
+// INT_PTX_RECIP.
+class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
+ NVPTXRegClass src_regclass, Intrinsic IntOP>
+ : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
+ OpcStr,
+ [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
+
+// We need a full string for OpcStr here because we need to deal with the case
+// like INT_PTX_NATIVE_POWR_F.
+class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
+ NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
+ : NVPTXInst<(outs t_regclass:$dst),
+ (ins s0_regclass:$src0, s1_regclass:$src1),
+ OpcStr,
+ [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
+
+class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
+ NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
+ NVPTXRegClass s2_regclass, Intrinsic IntOP>
+ : NVPTXInst<(outs t_regclass:$dst),
+ (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
+ OpcStr,
+ [(set t_regclass:$dst,
+ (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
+
+//
+// MISC
+//
+
+def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_clz_i>;
+def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
+ int_nvvm_clz_ll>;
+
+def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_popc_i>;
+def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
+ int_nvvm_popc_ll>;
+
+def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
+ Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
+
+//
+// Min Max
+//
+
+def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_min_i>;
+def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_min_ui>;
+
+def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_min_ll>;
+def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_min_ull>;
+
+def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_max_i>;
+def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_max_ui>;
+
+def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_max_ll>;
+def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_max_ull>;
+
+def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
+ Float32Regs, Float32Regs, int_nvvm_fmin_f>;
+def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
+
+def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
+ Float32Regs, Float32Regs, int_nvvm_fmax_f>;
+def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
+
+def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
+ Float64Regs, Float64Regs, int_nvvm_fmin_d>;
+def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
+ Float64Regs, Float64Regs, int_nvvm_fmax_d>;
+
+//
+// Multiplication
+//
+
+def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
+def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
+ Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
+
+def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
+def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
+ Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
+
+def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
+def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
+def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
+def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
+def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
+def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
+def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
+def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
+
+def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
+def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
+def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
+def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
+
+def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
+ Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
+def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
+ Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
+
+//
+// Div
+//
+
+def INT_NVVM_DIV_APPROX_FTZ_F
+ : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
+ Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
+def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
+
+def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
+def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
+def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
+def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
+def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
+def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
+def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
+def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
+
+def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
+def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
+def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
+def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
+
+//
+// Brev
+//
+
+def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_brev32>;
+def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
+ int_nvvm_brev64>;
+
+//
+// Sad
+//
+
+def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
+ Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
+def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
+ Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
+
+//
+// Floor Ceil
+//
+
+def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>;
+def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_floor_f>;
+def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_floor_d>;
+
+def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>;
+def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ceil_f>;
+def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_ceil_d>;
+
+//
+// Abs
+//
+
+def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
+ int_nvvm_abs_i>;
+def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
+ int_nvvm_abs_ll>;
+
+def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_fabs_ftz_f>;
+def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_fabs_f>;
+
+def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_fabs_d>;
+
+//
+// Round
+//
+
+def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_round_ftz_f>;
+def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_round_f>;
+
+def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_round_d>;
+
+//
+// Trunc
+//
+
+def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>;
+def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_trunc_f>;
+
+def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_trunc_d>;
+
+//
+// Saturate
+//
+
+def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>;
+def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_saturate_f>;
+
+def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_saturate_d>;
+
+//
+// Exp2 Log2
+//
+
+def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
+def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
+def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
+
+def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
+def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
+def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
+
+//
+// Sin Cos
+//
+
+def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
+def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
+
+def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
+def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
+
+//
+// Fma
+//
+
+def INT_NVVM_FMA_RN_FTZ_F
+ : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
+def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
+def INT_NVVM_FMA_RZ_FTZ_F
+ : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
+def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
+def INT_NVVM_FMA_RM_FTZ_F
+ : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
+def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
+def INT_NVVM_FMA_RP_FTZ_F
+ : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
+def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
+ Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
+
+def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
+def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
+def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
+def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
+ Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
+
+//
+// Rcp
+//
+
+def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
+def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
+def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
+def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
+def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
+def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
+def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
+def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
+
+def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rn_d>;
+def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rz_d>;
+def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rm_d>;
+def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_rcp_rp_d>;
+
+def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
+
+//
+// Sqrt
+//
+
+def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
+def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rn_f>;
+def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
+def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rz_f>;
+def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
+def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rm_f>;
+def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
+def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
+ Float32Regs, int_nvvm_sqrt_rp_f>;
+def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
+def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
+
+def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rn_d>;
+def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rz_d>;
+def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rm_d>;
+def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
+ Float64Regs, int_nvvm_sqrt_rp_d>;
+
+//
+// Rsqrt
+//
+
+def INT_NVVM_RSQRT_APPROX_FTZ_F
+ : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
+ int_nvvm_rsqrt_approx_ftz_f>;
+def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
+def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
+ Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
+
+//
+// Add
+//
+
+def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
+def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
+def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
+def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
+def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
+def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
+def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
+def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
+
+def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
+def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
+def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
+def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
+ Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
+
+//
+// Convert
+//
+
+def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>;
+def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rn>;
+def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>;
+def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rz>;
+def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>;
+def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rm>;
+def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>;
+def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;",
+ Float32Regs, Float64Regs, int_nvvm_d2f_rp>;
+
+def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rn>;
+def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rz>;
+def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rm>;
+def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2i_rp>;
+
+def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rn>;
+def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rz>;
+def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rm>;
+def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;",
+ Int32Regs, Float64Regs, int_nvvm_d2ui_rp>;
+
+def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rn>;
+def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rz>;
+def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rm>;
+def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_i2d_rp>;
+
+def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rn>;
+def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rz>;
+def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rm>;
+def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;",
+ Float64Regs, Int32Regs, int_nvvm_ui2d_rp>;
+
+def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>;
+def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rn>;
+def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>;
+def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rz>;
+def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>;
+def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rm>;
+def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>;
+def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2i_rp>;
+
+def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>;
+def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rn>;
+def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>;
+def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rz>;
+def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>;
+def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rm>;
+def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;",
+ Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>;
+def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_f2ui_rp>;
+
+def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rn>;
+def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rz>;
+def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rm>;
+def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_i2f_rp>;
+
+def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rn>;
+def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rz>;
+def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rm>;
+def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_ui2f_rp>;
+
+def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
+ Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
+
+def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b32 %temp; \n\t",
+ !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
+ "}}"))),
+ Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
+def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b32 %temp; \n\t",
+ !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
+ "}}"))),
+ Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
+
+def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>;
+def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rn>;
+def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>;
+def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rz>;
+def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>;
+def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rm>;
+def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>;
+def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ll_rp>;
+
+def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>;
+def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rn>;
+def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>;
+def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rz>;
+def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>;
+def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rm>;
+def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;",
+ Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>;
+def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs,
+ Float32Regs, int_nvvm_f2ull_rp>;
+
+def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rn>;
+def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rz>;
+def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rm>;
+def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ll_rp>;
+
+def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rn>;
+def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rz>;
+def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rm>;
+def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_d2ull_rp>;
+
+def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rn>;
+def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rz>;
+def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rm>;
+def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ll2f_rp>;
+def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rn>;
+def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rz>;
+def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rm>;
+def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs,
+ Int64Regs, int_nvvm_ull2f_rp>;
+
+def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rn>;
+def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rz>;
+def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rm>;
+def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ll2d_rp>;
+def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rn>;
+def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rz>;
+def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rm>;
+def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_ull2d_rp>;
+
+def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b16 %temp;\n\t",
+ !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
+ !strconcat("mov.b16 \t$dst, %temp;\n",
+ "}}")))),
+ Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
+def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b16 %temp;\n\t",
+ !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
+ !strconcat("mov.b16 \t$dst, %temp;\n",
+ "}}")))),
+ Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
+
+def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
+ !strconcat(".reg .b16 %temp;\n\t",
+ !strconcat("mov.b16 \t%temp, $src0;\n\t",
+ !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
+ "}}")))),
+ Float32Regs, Int16Regs, int_nvvm_h2f>;
+
+//
+// Bitcast
+//
+
+def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
+ Float32Regs, int_nvvm_bitcast_f2i>;
+def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
+ Int32Regs, int_nvvm_bitcast_i2f>;
+
+def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
+ Int64Regs, int_nvvm_bitcast_ll2d>;
+def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
+ Float64Regs, int_nvvm_bitcast_d2ll>;
+
+//-----------------------------------
+// Atomic Functions
+//-----------------------------------
+
+class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
+ : PatFrag<ops, frag, [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
+}]>;
+class ATOMIC_SHARED_CHK <dag ops, dag frag>
+ : PatFrag<ops, frag, [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
+}]>;
+class ATOMIC_GENERIC_CHK <dag ops, dag frag>
+ : PatFrag<ops, frag, [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
+}]>;
+
+multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+ string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
+ Operand IMMType, SDNode IMM, Predicate Pred> {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ Requires<[Pred]>;
+ def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
+ Requires<[Pred]>;
+}
+multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+ string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
+ defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, IMM, Pred>;
+ defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, IMM, Pred>;
+}
+
+// has 2 operands, neg the second one
+multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+ string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
+ Operand IMMType, Predicate Pred> {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat("{{ \n\t",
+ !strconcat(".reg \t.s",
+ !strconcat(TypeStr,
+ !strconcat(" temp; \n\t",
+ !strconcat("neg.s",
+ !strconcat(TypeStr,
+ !strconcat(" \ttemp, $b; \n\t",
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(".u",
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], temp; \n\t",
+ !strconcat("}}", "")))))))))))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ Requires<[Pred]>;
+}
+multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
+ string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
+ Predicate Pred> {
+ defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred> ;
+ defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred> ;
+}
+
+// has 3 operands
+multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+ string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
+ Operand IMMType, Predicate Pred> {
+ def reg : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, regclass:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst,
+ (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
+ Requires<[Pred]>;
+ def imm1 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, regclass:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
+ Requires<[Pred]>;
+ def imm2 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, IMMType:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
+ Requires<[Pred]>;
+ def imm3 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
+ !strconcat("atom",
+ !strconcat(SpaceStr,
+ !strconcat(OpcStr,
+ !strconcat(TypeStr,
+ !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
+ Requires<[Pred]>;
+}
+multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+ string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
+ defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred>;
+ defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ IntOp, IMMType, Pred>;
+}
+
+// atom_add
+
+def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_32 node:$a, node:$b)>;
+def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_32 node:$a, node:$b)>;
+def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_32 node:$a, node:$b)>;
+def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_64 node:$a, node:$b)>;
+def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_64 node:$a, node:$b)>;
+def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_add_64 node:$a, node:$b)>;
+def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
+ atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
+ atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
+ atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
+ atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
+ atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
+ atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+ ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
+
+defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
+ atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
+defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
+ atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
+defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
+ atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
+
+// atom_sub
+
+def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_32 node:$a, node:$b)>;
+def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_32 node:$a, node:$b)>;
+def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_32 node:$a, node:$b)>;
+def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_64 node:$a, node:$b)>;
+def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_64 node:$a, node:$b)>;
+def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_sub_64 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
+ atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
+ atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
+ atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
+ ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
+ atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
+ atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
+ atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
+ ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
+
+// atom_swap
+
+def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_swap_32 node:$a, node:$b)>;
+def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_swap_32 node:$a, node:$b)>;
+def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_swap_32 node:$a, node:$b)>;
+def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_swap_64 node:$a, node:$b)>;
+def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_swap_64 node:$a, node:$b)>;
+def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_swap_64 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
+ atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
+ atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
+ atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
+ atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
+ atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
+ atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
+
+// atom_max
+
+def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
+ , (atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_32 node:$a, node:$b)>;
+def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_32 node:$a, node:$b)>;
+def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+ ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+ ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
+ atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+ ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
+ atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_min
+
+def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_32 node:$a, node:$b)>;
+def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_32 node:$a, node:$b)>;
+def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+ ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+ ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
+ atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+ ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
+ atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+ ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_inc atom_dec
+
+def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
+def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
+def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
+def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
+def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
+def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
+ atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
+ atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
+ atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
+ atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
+ atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
+ atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+ ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_and
+
+def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_32 node:$a, node:$b)>;
+def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_32 node:$a, node:$b)>;
+def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
+ atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
+ atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
+ atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_or
+
+def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_32 node:$a, node:$b)>;
+def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_32 node:$a, node:$b)>;
+def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
+ atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
+ atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
+ atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
+
+// atom_xor
+
+def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_32 node:$a, node:$b)>;
+def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_32 node:$a, node:$b)>;
+def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_32 node:$a, node:$b)>;
+
+defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
+ atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
+ atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
+ atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+ ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+
+// atom_cas
+
+def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
+
+defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
+ atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
+defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
+ atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
+ atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
+defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
+ ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
+ atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
+ atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
+ atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
+ ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
+
+
+//-----------------------------------
+// Read Special Registers
+//-----------------------------------
+class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
+ NVPTXInst<(outs regclassOut:$dst), (ins),
+ OpStr,
+ [(set regclassOut:$dst, (IntOp))]>;
+
+def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_tid_x>;
+def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_tid_y>;
+def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_tid_z>;
+
+def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ntid_x>;
+def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ntid_y>;
+def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ntid_z>;
+
+def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ctaid_x>;
+def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ctaid_y>;
+def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_ctaid_z>;
+
+def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
+ int_nvvm_read_ptx_sreg_nctaid_x>;
+def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
+ int_nvvm_read_ptx_sreg_nctaid_y>;
+def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
+ int_nvvm_read_ptx_sreg_nctaid_z>;
+
+def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
+ int_nvvm_read_ptx_sreg_warpsize>;
+
+
+//-----------------------------------
+// Support for ldu on sm_20 or later
+//-----------------------------------
+
+// Scalar
+// @TODO: Revisit this, Changed imemAny to imem
+multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDU]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
+}
+
+defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int8Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
+int_nvvm_ldu_global_i>;
+defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
+int_nvvm_ldu_global_f>;
+defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
+int_nvvm_ldu_global_f>;
+defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
+int_nvvm_ldu_global_p>;
+defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
+int_nvvm_ldu_global_p>;
+
+// vector
+
+// Elementized vector ldu
+multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+}
+
+multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+}
+
+defm INT_PTX_LDU_G_v2i8_ELE
+ : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int8Regs>;
+defm INT_PTX_LDU_G_v2i16_ELE
+ : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDU_G_v2i32_ELE
+ : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDU_G_v2f32_ELE
+ : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDU_G_v2i64_ELE
+ : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDU_G_v2f64_ELE
+ : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDU_G_v4i8_ELE
+ : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>;
+defm INT_PTX_LDU_G_v4i16_ELE
+ : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Int16Regs>;
+defm INT_PTX_LDU_G_v4i32_ELE
+ : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Int32Regs>;
+defm INT_PTX_LDU_G_v4f32_ELE
+ : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Float32Regs>;
+
+// Vector ldu
+multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
+ NVPTXInst eleInst, NVPTXInst eleInst64> {
+ def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
+ Requires<[hasLDU]>;
+ def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
+ Requires<[hasLDU]>;
+}
+
+let VecInstType=isVecLD.Value in {
+defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
+ V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
+ INT_PTX_LDU_G_v2i8_ELE_64>;
+defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
+ V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
+ INT_PTX_LDU_G_v4i8_ELE_64>;
+defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
+ V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
+ INT_PTX_LDU_G_v2i16_ELE_64>;
+defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
+ V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
+ INT_PTX_LDU_G_v4i16_ELE_64>;
+defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
+ V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
+ INT_PTX_LDU_G_v2i32_ELE_64>;
+defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
+ V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
+ INT_PTX_LDU_G_v4i32_ELE_64>;
+defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
+ V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
+ INT_PTX_LDU_G_v2f32_ELE_64>;
+defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
+ V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
+ INT_PTX_LDU_G_v4f32_ELE_64>;
+defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
+ V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
+ INT_PTX_LDU_G_v2i64_ELE_64>;
+defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
+ V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
+ INT_PTX_LDU_G_v2f64_ELE_64>;
+}
+
+
+
+multiclass NG_TO_G<string Str, Intrinsic Intrin> {
+ def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+ def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+
+// @TODO: Are these actually needed? I believe global addresses will be copied
+// to register values anyway.
+ /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
+ [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasGenericLdSt]>;
+ def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
+ !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
+ [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasGenericLdSt]>;*/
+
+ def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+ def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
+
+// @TODO: Are these actually needed? I believe global addresses will be copied
+// to register values anyway.
+ /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
+ def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
+}
+
+multiclass G_TO_NG<string Str, Intrinsic Intrin> {
+ def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+ def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
+ Requires<[hasGenericLdSt]>;
+ def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+ def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
+}
+
+defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
+defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
+defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+
+defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
+defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
+defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
+
+def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
+def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
+
+
+
+// @TODO: Revisit this. There is a type
+// contradiction between iPTRAny and iPTR for the def.
+/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
+ (Wrapper tglobaladdr:$src)))]>;
+def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
+ (Wrapper tglobaladdr:$src)))]>;*/
+
+
+def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
+def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+
+
+// nvvm.ptr.gen.to.param
+def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
+ (ins Int32Regs:$src),
+ "mov.u32 \t$result, $src;",
+ [(set Int32Regs:$result,
+ (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
+def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
+ (ins Int64Regs:$src),
+ "mov.u64 \t$result, $src;",
+ [(set Int64Regs:$result,
+ (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
+
+
+// nvvm.move intrinsicc
+def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s),
+ "mov.b16 \t$r, $s;",
+ [(set Int8Regs:$r,
+ (int_nvvm_move_i8 Int8Regs:$s))]>;
+def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
+ "mov.b16 \t$r, $s;",
+ [(set Int16Regs:$r,
+ (int_nvvm_move_i16 Int16Regs:$s))]>;
+def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
+ "mov.b32 \t$r, $s;",
+ [(set Int32Regs:$r,
+ (int_nvvm_move_i32 Int32Regs:$s))]>;
+def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
+ "mov.b64 \t$r, $s;",
+ [(set Int64Regs:$r,
+ (int_nvvm_move_i64 Int64Regs:$s))]>;
+def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
+ "mov.f32 \t$r, $s;",
+ [(set Float32Regs:$r,
+ (int_nvvm_move_float Float32Regs:$s))]>;
+def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
+ "mov.f64 \t$r, $s;",
+ [(set Float64Regs:$r,
+ (int_nvvm_move_double Float64Regs:$s))]>;
+def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
+ "mov.u32 \t$r, $s;",
+ [(set Int32Regs:$r,
+ (int_nvvm_move_ptr Int32Regs:$s))]>;
+def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
+ "mov.u64 \t$r, $s;",
+ [(set Int64Regs:$r,
+ (int_nvvm_move_ptr Int64Regs:$s))]>;
+
+// @TODO: Are these actually needed, or will we always just see symbols
+// copied to registers first?
+/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
+ "mov.u32 \t$r, $s;",
+ [(set Int32Regs:$r,
+ (int_nvvm_move_ptr texternalsym:$s))]>;
+def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
+ "mov.u64 \t$r, $s;",
+ [(set Int64Regs:$r,
+ (int_nvvm_move_ptr texternalsym:$s))]>;*/
+
+
+// MoveParam %r1, param
+// ptr_local_to_gen %r2, %r1
+// ptr_gen_to_local %r3, %r2
+// ->
+// mov %r1, param
+
+// @TODO: Revisit this. There is a type
+// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
+// instructions are not currently defined. However, we can use the ptr
+// variants and the asm printer will do the right thing.
+def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
+ (MoveParam texternalsym:$src)))),
+ (nvvm_move_ptr64 texternalsym:$src)>;
+def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
+ (MoveParam texternalsym:$src)))),
+ (nvvm_move_ptr32 texternalsym:$src)>;
+
+
+//-----------------------------------
+// Compiler Error Warn
+// - Just ignore them in codegen
+//-----------------------------------
+
+def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
+ "// llvm.nvvm.compiler.warn()",
+ [(int_nvvm_compiler_warn Int32Regs:$a)]>;
+def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
+ "// llvm.nvvm.compiler.warn()",
+ [(int_nvvm_compiler_warn Int64Regs:$a)]>;
+def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
+ "// llvm.nvvm.compiler.error()",
+ [(int_nvvm_compiler_error Int32Regs:$a)]>;
+def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
+ "// llvm.nvvm.compiler.error()",
+ [(int_nvvm_compiler_error Int64Regs:$a)]>;
+
+
+
+//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
+
+// These intrinsics are handled to retain compatibility with the old backend.
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+ : NVPTXInst<(outs Int64Regs:$d), (ins),
+ !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
+ [(set Int64Regs:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+ : NVPTXInst<(outs Int32Regs:$d), (ins),
+ !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
+ [(set Int32Regs:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
+
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
+
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
+
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+ : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+ : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
+ [(int_ptx_bar_sync imm:$i)]>;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
new file mode 100644
index 000000000000..56b237252d0a
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -0,0 +1,208 @@
+//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
+// the size is large or is not a compile-time constant.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXLowerAggrCopies.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+namespace llvm {
+FunctionPass *createLowerAggrCopies();
+}
+
+char NVPTXLowerAggrCopies::ID = 0;
+
+// Lower MemTransferInst or load-store pair to loop
+static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
+ Value *dstAddr, Value *len,
+ //unsigned numLoads,
+ bool srcVolatile, bool dstVolatile,
+ LLVMContext &Context, Function &F) {
+ Type *indType = len->getType();
+
+ BasicBlock *origBB = splitAt->getParent();
+ BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
+ BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+
+ origBB->getTerminator()->setSuccessor(0, loopBB);
+ IRBuilder<> builder(origBB, origBB->getTerminator());
+
+ // srcAddr and dstAddr are expected to be pointer types,
+ // so no check is made here.
+ unsigned srcAS =
+ dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+ unsigned dstAS =
+ dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+
+ // Cast pointers to (char *)
+ srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
+ dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
+
+ IRBuilder<> loop(loopBB);
+ // The loop index (ind) is a phi node.
+ PHINode *ind = loop.CreatePHI(indType, 0);
+ // Incoming value for ind is 0
+ ind->addIncoming(ConstantInt::get(indType, 0), origBB);
+
+ // load from srcAddr+ind
+ Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+ // store at dstAddr+ind
+ loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+
+ // The value for ind coming from backedge is (ind + 1)
+ Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
+ ind->addIncoming(newind, loopBB);
+
+ loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+}
+
+// Lower MemSetInst to loop
+static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
+ Value *len, Value *val, LLVMContext &Context,
+ Function &F) {
+ BasicBlock *origBB = splitAt->getParent();
+ BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
+ BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+
+ origBB->getTerminator()->setSuccessor(0, loopBB);
+ IRBuilder<> builder(origBB, origBB->getTerminator());
+
+ unsigned dstAS =
+ dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+
+ // Cast pointer to the type of value getting stored
+ dstAddr = builder.CreateBitCast(dstAddr,
+ PointerType::get(val->getType(), dstAS));
+
+ IRBuilder<> loop(loopBB);
+ PHINode *ind = loop.CreatePHI(len->getType(), 0);
+ ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
+
+ loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+
+ Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
+ ind->addIncoming(newind, loopBB);
+
+ loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+}
+
+bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
+ SmallVector<LoadInst *, 4> aggrLoads;
+ SmallVector<MemTransferInst *, 4> aggrMemcpys;
+ SmallVector<MemSetInst *, 4> aggrMemsets;
+
+ TargetData *TD = &getAnalysis<TargetData>();
+ LLVMContext &Context = F.getParent()->getContext();
+
+ //
+ // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
+ //
+ //const BasicBlock *firstBB = &F.front(); // first BB in F
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ //BasicBlock *bb = BI;
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+ ++II) {
+ if (LoadInst * load = dyn_cast<LoadInst>(II)) {
+
+ if (load->hasOneUse() == false) continue;
+
+ if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
+
+ User *use = *(load->use_begin());
+ if (StoreInst * store = dyn_cast<StoreInst>(use)) {
+ if (store->getOperand(0) != load) //getValueOperand
+ continue;
+ aggrLoads.push_back(load);
+ }
+ } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
+ Value *len = intr->getLength();
+ // If the number of elements being copied is greater
+ // than MaxAggrCopySize, lower it to a loop
+ if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
+ if (len_int->getZExtValue() >= MaxAggrCopySize) {
+ aggrMemcpys.push_back(intr);
+ }
+ } else {
+ // turn variable length memcpy/memmov into loop
+ aggrMemcpys.push_back(intr);
+ }
+ } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
+ Value *len = memsetintr->getLength();
+ if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
+ if (len_int->getZExtValue() >= MaxAggrCopySize) {
+ aggrMemsets.push_back(memsetintr);
+ }
+ } else {
+ // turn variable length memset into loop
+ aggrMemsets.push_back(memsetintr);
+ }
+ }
+ }
+ }
+ if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
+ && (aggrMemsets.size() == 0)) return false;
+
+ //
+ // Do the transformation of an aggr load/copy/set to a loop
+ //
+ for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
+ LoadInst *load = aggrLoads[i];
+ StoreInst *store = dyn_cast<StoreInst>(*load->use_begin());
+ Value *srcAddr = load->getOperand(0);
+ Value *dstAddr = store->getOperand(1);
+ unsigned numLoads = TD->getTypeStoreSize(load->getType());
+ Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
+
+ convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
+ store->isVolatile(), Context, F);
+
+ store->eraseFromParent();
+ load->eraseFromParent();
+ }
+
+ for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
+ MemTransferInst *cpy = aggrMemcpys[i];
+ Value *len = cpy->getLength();
+ // llvm 2.7 version of memcpy does not have volatile
+ // operand yet. So always making it non-volatile
+ // optimistically, so that we don't see unnecessary
+ // st.volatile in ptx
+ convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
+ false, Context, F);
+ cpy->eraseFromParent();
+ }
+
+ for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
+ MemSetInst *memsetinst = aggrMemsets[i];
+ Value *len = memsetinst->getLength();
+ Value *val = memsetinst->getValue();
+ convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
+ F);
+ memsetinst->eraseFromParent();
+ }
+
+ return true;
+}
+
+FunctionPass *llvm::createLowerAggrCopies() {
+ return new NVPTXLowerAggrCopies();
+}
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
new file mode 100644
index 000000000000..ac7f1509f215
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -0,0 +1,47 @@
+//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVIDIA specific lowering of
+// aggregate copies
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_LOWER_AGGR_COPIES_H
+#define NVPTX_LOWER_AGGR_COPIES_H
+
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+ static char ID;
+
+ NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ static const unsigned MaxAggrCopySize = 128;
+
+ virtual const char *getPassName() const {
+ return "Lower aggregate copies/intrinsics into loops";
+ }
+};
+
+extern FunctionPass *createLowerAggrCopies();
+}
+
+#endif
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/lib/Target/NVPTX/NVPTXNumRegisters.h
index 60acfc75a2f7..b4a4dbce98a9 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -1,4 +1,5 @@
-//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===//
+
+//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +8,13 @@
//
//===----------------------------------------------------------------------===//
-#include "PTXMachineFunctionInfo.h"
+#ifndef NVPTX_NUM_REGISTERS_H
+#define NVPTX_NUM_REGISTERS_H
+
+namespace llvm {
+
+const unsigned NVPTXNumRegisters = 396;
-using namespace llvm;
+}
-void PTXMachineFunctionInfo::anchor() { }
+#endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
new file mode 100644
index 000000000000..e3cd46f063bf
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -0,0 +1,325 @@
+//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "nvptx-reg-info"
+
+#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+
+using namespace llvm;
+
+namespace llvm
+{
+std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
+ if (RC == &NVPTX::Float32RegsRegClass) {
+ return ".f32";
+ }
+ if (RC == &NVPTX::Float64RegsRegClass) {
+ return ".f64";
+ }
+ else if (RC == &NVPTX::Int64RegsRegClass) {
+ return ".s64";
+ }
+ else if (RC == &NVPTX::Int32RegsRegClass) {
+ return ".s32";
+ }
+ else if (RC == &NVPTX::Int16RegsRegClass) {
+ return ".s16";
+ }
+ // Int8Regs become 16-bit registers in PTX
+ else if (RC == &NVPTX::Int8RegsRegClass) {
+ return ".s16";
+ }
+ else if (RC == &NVPTX::Int1RegsRegClass) {
+ return ".pred";
+ }
+ else if (RC == &NVPTX::SpecialRegsRegClass) {
+ return "!Special!";
+ }
+ else if (RC == &NVPTX::V2F32RegsRegClass) {
+ return ".v2.f32";
+ }
+ else if (RC == &NVPTX::V4F32RegsRegClass) {
+ return ".v4.f32";
+ }
+ else if (RC == &NVPTX::V2I32RegsRegClass) {
+ return ".v2.s32";
+ }
+ else if (RC == &NVPTX::V4I32RegsRegClass) {
+ return ".v4.s32";
+ }
+ else if (RC == &NVPTX::V2F64RegsRegClass) {
+ return ".v2.f64";
+ }
+ else if (RC == &NVPTX::V2I64RegsRegClass) {
+ return ".v2.s64";
+ }
+ else if (RC == &NVPTX::V2I16RegsRegClass) {
+ return ".v2.s16";
+ }
+ else if (RC == &NVPTX::V4I16RegsRegClass) {
+ return ".v4.s16";
+ }
+ else if (RC == &NVPTX::V2I8RegsRegClass) {
+ return ".v2.s16";
+ }
+ else if (RC == &NVPTX::V4I8RegsRegClass) {
+ return ".v4.s16";
+ }
+ else {
+ return "INTERNAL";
+ }
+ return "";
+}
+
+std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
+ if (RC == &NVPTX::Float32RegsRegClass) {
+ return "%f";
+ }
+ if (RC == &NVPTX::Float64RegsRegClass) {
+ return "%fd";
+ }
+ else if (RC == &NVPTX::Int64RegsRegClass) {
+ return "%rd";
+ }
+ else if (RC == &NVPTX::Int32RegsRegClass) {
+ return "%r";
+ }
+ else if (RC == &NVPTX::Int16RegsRegClass) {
+ return "%rs";
+ }
+ else if (RC == &NVPTX::Int8RegsRegClass) {
+ return "%rc";
+ }
+ else if (RC == &NVPTX::Int1RegsRegClass) {
+ return "%p";
+ }
+ else if (RC == &NVPTX::SpecialRegsRegClass) {
+ return "!Special!";
+ }
+ else if (RC == &NVPTX::V2F32RegsRegClass) {
+ return "%v2f";
+ }
+ else if (RC == &NVPTX::V4F32RegsRegClass) {
+ return "%v4f";
+ }
+ else if (RC == &NVPTX::V2I32RegsRegClass) {
+ return "%v2r";
+ }
+ else if (RC == &NVPTX::V4I32RegsRegClass) {
+ return "%v4r";
+ }
+ else if (RC == &NVPTX::V2F64RegsRegClass) {
+ return "%v2fd";
+ }
+ else if (RC == &NVPTX::V2I64RegsRegClass) {
+ return "%v2rd";
+ }
+ else if (RC == &NVPTX::V2I16RegsRegClass) {
+ return "%v2s";
+ }
+ else if (RC == &NVPTX::V4I16RegsRegClass) {
+ return "%v4rs";
+ }
+ else if (RC == &NVPTX::V2I8RegsRegClass) {
+ return "%v2rc";
+ }
+ else if (RC == &NVPTX::V4I8RegsRegClass) {
+ return "%v4rc";
+ }
+ else {
+ return "INTERNAL";
+ }
+ return "";
+}
+
+bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
+ if (RC->getID() == NVPTX::V2F32RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V2F64RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V2I16RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V2I32RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V2I64RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V2I8RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V4F32RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V4I16RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V4I32RegsRegClassID)
+ return true;
+ if (RC->getID() == NVPTX::V4I8RegsRegClassID)
+ return true;
+ return false;
+}
+
+std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
+ if (RC->getID() == NVPTX::V2F32RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
+ if (RC->getID() == NVPTX::V2F64RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
+ if (RC->getID() == NVPTX::V2I16RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
+ if (RC->getID() == NVPTX::V2I32RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
+ if (RC->getID() == NVPTX::V2I64RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
+ if (RC->getID() == NVPTX::V2I8RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
+ if (RC->getID() == NVPTX::V4F32RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
+ if (RC->getID() == NVPTX::V4I16RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
+ if (RC->getID() == NVPTX::V4I32RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
+ if (RC->getID() == NVPTX::V4I8RegsRegClassID)
+ return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
+ llvm_unreachable("Not a vector register class");
+}
+
+const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
+ if (RC->getID() == NVPTX::V2F32RegsRegClassID)
+ return (&NVPTX::Float32RegsRegClass);
+ if (RC->getID() == NVPTX::V2F64RegsRegClassID)
+ return (&NVPTX::Float64RegsRegClass);
+ if (RC->getID() == NVPTX::V2I16RegsRegClassID)
+ return (&NVPTX::Int16RegsRegClass);
+ if (RC->getID() == NVPTX::V2I32RegsRegClassID)
+ return (&NVPTX::Int32RegsRegClass);
+ if (RC->getID() == NVPTX::V2I64RegsRegClassID)
+ return (&NVPTX::Int64RegsRegClass);
+ if (RC->getID() == NVPTX::V2I8RegsRegClassID)
+ return (&NVPTX::Int8RegsRegClass);
+ if (RC->getID() == NVPTX::V4F32RegsRegClassID)
+ return (&NVPTX::Float32RegsRegClass);
+ if (RC->getID() == NVPTX::V4I16RegsRegClassID)
+ return (&NVPTX::Int16RegsRegClass);
+ if (RC->getID() == NVPTX::V4I32RegsRegClassID)
+ return (&NVPTX::Int32RegsRegClass);
+ if (RC->getID() == NVPTX::V4I8RegsRegClassID)
+ return (&NVPTX::Int8RegsRegClass);
+ llvm_unreachable("Not a vector register class");
+}
+
+int getNVPTXVectorSize(TargetRegisterClass const *RC) {
+ if (RC->getID() == NVPTX::V2F32RegsRegClassID)
+ return 2;
+ if (RC->getID() == NVPTX::V2F64RegsRegClassID)
+ return 2;
+ if (RC->getID() == NVPTX::V2I16RegsRegClassID)
+ return 2;
+ if (RC->getID() == NVPTX::V2I32RegsRegClassID)
+ return 2;
+ if (RC->getID() == NVPTX::V2I64RegsRegClassID)
+ return 2;
+ if (RC->getID() == NVPTX::V2I8RegsRegClassID)
+ return 2;
+ if (RC->getID() == NVPTX::V4F32RegsRegClassID)
+ return 4;
+ if (RC->getID() == NVPTX::V4I16RegsRegClassID)
+ return 4;
+ if (RC->getID() == NVPTX::V4I32RegsRegClassID)
+ return 4;
+ if (RC->getID() == NVPTX::V4I8RegsRegClassID)
+ return 4;
+ llvm_unreachable("Not a vector register class");
+}
+}
+
+NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
+ const NVPTXSubtarget &st)
+ : NVPTXGenRegisterInfo(0),
+ Is64Bit(st.is64Bit()) {}
+
+#define GET_REGINFO_TARGET_DESC
+#include "NVPTXGenRegisterInfo.inc"
+
+/// NVPTX Callee Saved Registers
+const uint16_t* NVPTXRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const uint16_t CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+// NVPTX Callee Saved Reg Classes
+const TargetRegisterClass* const*
+NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+void NVPTXRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+1).getImm();
+
+ // Using I0 as the frame pointer
+ MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+
+int NVPTXRegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return 0;
+}
+
+unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return NVPTX::VRFrame;
+}
+
+unsigned NVPTXRegisterInfo::getRARegister() const {
+ return 0;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void NVPTXRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN,
+ // ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
new file mode 100644
index 000000000000..595178335ae2
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -0,0 +1,92 @@
+//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the NVPTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXREGISTERINFO_H
+#define NVPTXREGISTERINFO_H
+
+#include "ManagedStringPool.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+
+#define GET_REGINFO_HEADER
+#include "NVPTXGenRegisterInfo.inc"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <sstream>
+
+namespace llvm {
+
+// Forward Declarations.
+class TargetInstrInfo;
+class NVPTXSubtarget;
+
+class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
+private:
+ bool Is64Bit;
+ // Hold Strings that can be free'd all together with NVPTXRegisterInfo
+ ManagedStringPool ManagedStrPool;
+
+public:
+ NVPTXRegisterInfo(const TargetInstrInfo &tii,
+ const NVPTXSubtarget &st);
+
+
+ //------------------------------------------------------
+ // Pure virtual functions from TargetRegisterInfo
+ //------------------------------------------------------
+
+ // NVPTX callee saved registers
+ virtual const uint16_t*
+ getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ // NVPTX callee saved register classes
+ virtual const TargetRegisterClass* const *
+ getCalleeSavedRegClasses(const MachineFunction *MF) const;
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj,
+ RegScavenger *RS=NULL) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const;
+ virtual unsigned getRARegister() const;
+
+ ManagedStringPool *getStrPool() const {
+ return const_cast<ManagedStringPool *>(&ManagedStrPool);
+ }
+
+ const char *getName(unsigned RegNo) const {
+ std::stringstream O;
+ O << "reg" << RegNo;
+ return getStrPool()->getManagedString(O.str().c_str())->c_str();
+ }
+
+};
+
+
+std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
+bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
+std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
+int getNVPTXVectorSize (const TargetRegisterClass *RC);
+const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
+
+} // end namespace llvm
+
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
new file mode 100644
index 000000000000..ba158258b994
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -0,0 +1,108 @@
+//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class NVPTXReg<string n> : Register<n> {
+ let Namespace = "NVPTX";
+}
+
+class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
+ : RegisterClass <"NVPTX", regTypes, alignment, regList>;
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+// Special Registers used as stack pointer
+def VRFrame : NVPTXReg<"%SP">;
+def VRFrameLocal : NVPTXReg<"%SPL">;
+
+// Special Registers used as the stack
+def VRDepot : NVPTXReg<"%Depot">;
+
+foreach i = 0-395 in {
+ def P#i : NVPTXReg<"%p"#i>; // Predicate
+ def RC#i : NVPTXReg<"%rc"#i>; // 8-bit
+ def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
+ def R#i : NVPTXReg<"%r"#i>; // 32-bit
+ def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
+ def F#i : NVPTXReg<"%f"#i>; // 32-bit float
+ def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
+ // Vectors
+ foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
+ def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
+
+ // Arguments
+ def ia#i : NVPTXReg<"%ia"#i>;
+ def la#i : NVPTXReg<"%la"#i>;
+ def fa#i : NVPTXReg<"%fa"#i>;
+ def da#i : NVPTXReg<"%da"#i>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>;
+def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>;
+def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>;
+def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>;
+def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>;
+def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 395))>;
+def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 395))>;
+def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 395))>;
+def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 395))>;
+def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 395))>;
+def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
+
+// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
+
+class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
+ NVPTXRegClass sClass,
+ int e,
+ string n>
+ : NVPTXRegClass<regTypes, alignment, regList>
+{
+ NVPTXRegClass scalarClass=sClass;
+ int elems=e;
+ string name=n;
+}
+def V2F32Regs
+ : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
+ Float32Regs, 2, ".v2.f32">;
+def V4F32Regs
+ : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
+ Float32Regs, 4, ".v4.f32">;
+def V2I32Regs
+ : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
+ Int32Regs, 2, ".v2.u32">;
+def V4I32Regs
+ : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
+ Int32Regs, 4, ".v4.u32">;
+def V2F64Regs
+ : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
+ Float64Regs, 2, ".v2.f64">;
+def V2I64Regs
+ : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
+ Int64Regs, 2, ".v2.u64">;
+def V2I16Regs
+ : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
+ Int16Regs, 2, ".v2.u16">;
+def V4I16Regs
+ : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
+ Int16Regs, 4, ".v4.u16">;
+def V2I8Regs
+ : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
+ Int8Regs, 2, ".v2.u8">;
+def V4I8Regs
+ : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
+ Int8Regs, 4, ".v4.u8">;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
new file mode 100644
index 000000000000..f1ca466266f6
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -0,0 +1,45 @@
+//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the NVPTXSection class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_NVPTXSECTION_H
+#define LLVM_NVPTXSECTION_H
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/GlobalVariable.h"
+#include <vector>
+
+namespace llvm {
+/// NVPTXSection - Represents a section in PTX
+/// PTX does not have sections. We create this class in order to use
+/// the ASMPrint interface.
+///
+class NVPTXSection : public MCSection {
+
+public:
+ NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+ ~NVPTXSection() {}
+
+ /// Override this as NVPTX has its own way of printing switching
+ /// to a section.
+ virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {}
+
+ /// Base address of PTX sections is zero.
+ virtual bool isBaseAddressKnownZero() const { return true; }
+ virtual bool UseCodeAlign() const { return false; }
+ virtual bool isVirtualSection() const { return false; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
new file mode 100644
index 000000000000..2836cad4f021
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -0,0 +1,77 @@
+//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Split basic blocks so that a basic block that contains a barrier instruction
+// only contains the barrier instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/InstIterator.h"
+#include "NVPTXUtilities.h"
+#include "NVPTXSplitBBatBar.h"
+
+using namespace llvm;
+
+namespace llvm {
+FunctionPass *createSplitBBatBarPass();
+}
+
+char NVPTXSplitBBatBar::ID = 0;
+
+bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
+
+ SmallVector<Instruction *, 4> SplitPoints;
+ bool changed = false;
+
+ // Collect all the split points in SplitPoints
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock::iterator IB = BI->begin();
+ BasicBlock::iterator II = IB;
+ BasicBlock::iterator IE = BI->end();
+
+ // Skit the first intruction. No splitting is needed at this
+ // point even if this is a bar.
+ while (II != IE) {
+ if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
+ Intrinsic::ID id = inst->getIntrinsicID();
+ // If this is a barrier, split at this instruction
+ // and the next instruction.
+ if (llvm::isBarrierIntrinsic(id)) {
+ if (II != IB)
+ SplitPoints.push_back(II);
+ II++;
+ if ((II != IE) && (!II->isTerminator())) {
+ SplitPoints.push_back(II);
+ II++;
+ }
+ continue;
+ }
+ }
+ II++;
+ }
+ }
+
+ for (unsigned i = 0; i != SplitPoints.size(); i++) {
+ changed = true;
+ Instruction *inst = SplitPoints[i];
+ inst->getParent()->splitBasicBlock(inst, "bar_split");
+ }
+
+ return changed;
+}
+
+// This interface will most likely not be necessary, because this pass will
+// not be invoked by the driver, but will be used as a prerequisite to
+// another pass.
+FunctionPass *llvm::createSplitBBatBarPass() {
+ return new NVPTXSplitBBatBar();
+}
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
new file mode 100644
index 000000000000..9e4d5a066d4c
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
@@ -0,0 +1,41 @@
+//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVIDIA specific declarations
+// for splitting basic blocks at barrier instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_SPLIT_BB_AT_BAR_H
+#define NVPTX_SPLIT_BB_AT_BAR_H
+
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+
+namespace llvm {
+
+// actual analysis class, which is a functionpass
+struct NVPTXSplitBBatBar : public FunctionPass {
+ static char ID;
+
+ NVPTXSplitBBatBar() : FunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ }
+ virtual bool runOnFunction(Function &F);
+
+ virtual const char *getPassName() const {
+ return "Split basic blocks at barrier";
+ }
+};
+
+extern FunctionPass *createSplitBBatBarPass();
+}
+
+#endif //NVPTX_SPLIT_BB_AT_BAR_H
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
new file mode 100644
index 000000000000..6aadd43e9446
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -0,0 +1,57 @@
+//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NVPTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXSubtarget.h"
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "NVPTXGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+// Select Driver Interface
+#include "llvm/Support/CommandLine.h"
+namespace {
+cl::opt<NVPTX::DrvInterface>
+DriverInterface(cl::desc("Choose driver interface:"),
+ cl::values(
+ clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+ clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+ clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
+ clEnumValEnd),
+ cl::init(NVPTX::NVCL));
+}
+
+NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget,
+ // because we don't register all
+ // nvptx targets.
+ Is64Bit(is64Bit) {
+
+ drvInterface = DriverInterface;
+
+ // Provide the default CPU if none
+ std::string defCPU = "sm_10";
+
+ // Get the TargetName from the FS if available
+ if (FS.empty() && CPU.empty())
+ TargetName = defCPU;
+ else if (!CPU.empty())
+ TargetName = CPU;
+ else
+ llvm_unreachable("we are not using FeatureStr");
+
+ // Set up the SmVersion
+ SmVersion = atoi(TargetName.c_str()+3);
+}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
new file mode 100644
index 000000000000..8f2a629d229b
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -0,0 +1,92 @@
+//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the NVPTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXSUBTARGET_H
+#define NVPTXSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "NVPTX.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "NVPTXGenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+
+class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
+
+ unsigned int SmVersion;
+ std::string TargetName;
+ NVPTX::DrvInterface drvInterface;
+ bool dummy; // For the 'dummy' feature, see NVPTX.td
+ bool Is64Bit;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ NVPTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ bool hasBrkPt() const { return SmVersion >= 11; }
+ bool hasAtomRedG32() const { return SmVersion >= 11; }
+ bool hasAtomRedS32() const { return SmVersion >= 12; }
+ bool hasAtomRedG64() const { return SmVersion >= 12; }
+ bool hasAtomRedS64() const { return SmVersion >= 20; }
+ bool hasAtomRedGen32() const { return SmVersion >= 20; }
+ bool hasAtomRedGen64() const { return SmVersion >= 20; }
+ bool hasAtomAddF32() const { return SmVersion >= 20; }
+ bool hasVote() const { return SmVersion >= 12; }
+ bool hasDouble() const { return SmVersion >= 13; }
+ bool reqPTX20() const { return SmVersion >= 20; }
+ bool hasF32FTZ() const { return SmVersion >= 20; }
+ bool hasFMAF32() const { return SmVersion >= 20; }
+ bool hasFMAF64() const { return SmVersion >= 13; }
+ bool hasLDU() const { return SmVersion >= 20; }
+ bool hasGenericLdSt() const { return SmVersion >= 20; }
+ inline bool hasHWROT32() const { return false; }
+ inline bool hasSWROT32() const {
+ return true;
+ }
+ inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
+ inline bool hasROT64() const { return SmVersion >= 20; }
+
+
+ bool is64Bit() const { return Is64Bit; }
+
+ unsigned int getSmVersion() const { return SmVersion; }
+ NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
+ std::string getTargetName() const { return TargetName; }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit())
+ p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+ "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
+ "n16:32:64";
+ else
+ p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+ "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
+ "n16:32:64";
+
+ return std::string(p);
+ }
+
+};
+
+} // End llvm namespace
+
+#endif // NVPTXSUBTARGET_H
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
new file mode 100644
index 000000000000..433f415a8786
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -0,0 +1,133 @@
+//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the NVPTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXTargetMachine.h"
+#include "NVPTX.h"
+#include "NVPTXSplitBBatBar.h"
+#include "NVPTXLowerAggrCopies.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTXAllocaHoisting.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+
+using namespace llvm;
+
+
+extern "C" void LLVMInitializeNVPTXTarget() {
+ // Register the target.
+ RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
+ RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
+
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
+
+}
+
+NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ const TargetOptions& Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64bit)
+: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, is64bit),
+ DataLayout(Subtarget.getDataLayout()),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
+/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
+}
+
+
+
+void NVPTXTargetMachine32::anchor() {}
+
+NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+}
+
+void NVPTXTargetMachine64::anchor() {}
+
+NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+}
+
+
+namespace llvm {
+class NVPTXPassConfig : public TargetPassConfig {
+public:
+ NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ NVPTXTargetMachine &getNVPTXTargetMachine() const {
+ return getTM<NVPTXTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+};
+}
+
+TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
+ NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
+ return PassConfig;
+}
+
+bool NVPTXPassConfig::addInstSelector() {
+ addPass(createLowerAggrCopies());
+ addPass(createSplitBBatBarPass());
+ addPass(createAllocaHoisting());
+ addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
+ addPass(createVectorElementizePass(getNVPTXTargetMachine()));
+ return false;
+}
+
+bool NVPTXPassConfig::addPreRegAlloc() {
+ return false;
+}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
new file mode 100644
index 000000000000..b3f9cace6bf4
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -0,0 +1,125 @@
+//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the NVPTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef NVPTX_TARGETMACHINE_H
+#define NVPTX_TARGETMACHINE_H
+
+#include "NVPTXInstrInfo.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
+#include "NVPTXFrameLowering.h"
+#include "ManagedStringPool.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// NVPTXTargetMachine
+///
+class NVPTXTargetMachine : public LLVMTargetMachine {
+ NVPTXSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ NVPTXInstrInfo InstrInfo;
+ NVPTXTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
+
+ // NVPTX does not have any call stack frame, but need a NVPTX specific
+ // FrameLowering class because TargetFrameLowering is abstract.
+ NVPTXFrameLowering FrameLowering;
+
+ // Hold Strings that can be free'd all together with NVPTXTargetMachine
+ ManagedStringPool ManagedStrPool;
+
+ //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ // bool DisableVerify, MCContext *&OutCtx);
+
+public:
+ NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OP,
+ bool is64bit);
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
+
+ virtual const NVPTXRegisterInfo *getRegisterInfo() const {
+ return &(InstrInfo.getRegisterInfo());
+ }
+
+ virtual NVPTXTargetLowering *getTargetLowering() const {
+ return const_cast<NVPTXTargetLowering*>(&TLInfo);
+ }
+
+ virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ //virtual bool addInstSelector(PassManagerBase &PM,
+ // CodeGenOpt::Level OptLevel);
+
+ //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+
+ ManagedStringPool *getManagedStrPool() const {
+ return const_cast<ManagedStringPool*>(&ManagedStrPool);
+ }
+
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+ JITCodeEmitter &,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ raw_ostream &,
+ bool = true) {
+ return true;
+ }
+
+}; // NVPTXTargetMachine.
+
+class NVPTXTargetMachine32 : public NVPTXTargetMachine {
+ virtual void anchor();
+public:
+ NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+class NVPTXTargetMachine64 : public NVPTXTargetMachine {
+ virtual void anchor();
+public:
+ NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
new file mode 100644
index 000000000000..b5698a2fc08f
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -0,0 +1,105 @@
+//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+
+#include "NVPTXSection.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <string>
+
+namespace llvm {
+class GlobalVariable;
+class Module;
+
+class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
+
+public:
+ NVPTXTargetObjectFile() {}
+ ~NVPTXTargetObjectFile() {
+ delete TextSection;
+ delete DataSection;
+ delete BSSSection;
+ delete ReadOnlySection;
+
+ delete StaticCtorSection;
+ delete StaticDtorSection;
+ delete LSDASection;
+ delete EHFrameSection;
+ delete DwarfAbbrevSection;
+ delete DwarfInfoSection;
+ delete DwarfLineSection;
+ delete DwarfFrameSection;
+ delete DwarfPubTypesSection;
+ delete DwarfDebugInlineSection;
+ delete DwarfStrSection;
+ delete DwarfLocSection;
+ delete DwarfARangesSection;
+ delete DwarfRangesSection;
+ delete DwarfMacroInfoSection;
+ }
+
+ virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
+ TextSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getText());
+ DataSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getDataRel());
+ BSSSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getBSS());
+ ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getReadOnly());
+
+ StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ LSDASection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
+ SectionKind::getMetadata());
+ }
+
+ virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
+ return ReadOnlySection;
+ }
+
+ virtual const MCSection *
+ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const {
+ return DataSection;
+ }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
new file mode 100644
index 000000000000..3f52251cc1b2
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -0,0 +1,514 @@
+//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains miscellaneous utility functions
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXUtilities.h"
+#include "NVPTX.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Constants.h"
+#include "llvm/Operator.h"
+#include <algorithm>
+#include <cstring>
+#include <map>
+#include <string>
+#include <vector>
+//#include <iostream>
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/InstIterator.h"
+
+using namespace llvm;
+
+typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
+typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
+typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
+
+ManagedStatic<per_module_annot_t> annotationCache;
+
+
+static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
+ assert(md && "Invalid mdnode for annotation");
+ assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
+ // start index = 1, to skip the global variable key
+ // increment = 2, to skip the value for each property-value pairs
+ for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
+ // property
+ const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
+ assert(prop && "Annotation property not a string");
+
+ // value
+ ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
+ assert(Val && "Value operand not a constant int");
+
+ std::string keyname = prop->getString().str();
+ if (retval.find(keyname) != retval.end())
+ retval[keyname].push_back(Val->getZExtValue());
+ else {
+ std::vector<unsigned> tmp;
+ tmp.push_back(Val->getZExtValue());
+ retval[keyname] = tmp;
+ }
+ }
+}
+
+static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
+ NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
+ if (!NMD)
+ return;
+ key_val_pair_t tmp;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *elem = NMD->getOperand(i);
+
+ Value *entity = elem->getOperand(0);
+ // entity may be null due to DCE
+ if (!entity)
+ continue;
+ if (entity != gv)
+ continue;
+
+ // accumulate annotations for entity in tmp
+ cacheAnnotationFromMD(elem, tmp);
+ }
+
+ if (tmp.empty()) // no annotations for this gv
+ return;
+
+ if ((*annotationCache).find(m) != (*annotationCache).end())
+ (*annotationCache)[m][gv] = tmp;
+ else {
+ global_val_annot_t tmp1;
+ tmp1[gv] = tmp;
+ (*annotationCache)[m] = tmp1;
+ }
+}
+
+bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
+ unsigned &retval) {
+ const Module *m = gv->getParent();
+ if ((*annotationCache).find(m) == (*annotationCache).end())
+ cacheAnnotationFromMD(m, gv);
+ else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
+ cacheAnnotationFromMD(m, gv);
+ if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
+ return false;
+ retval = (*annotationCache)[m][gv][prop][0];
+ return true;
+}
+
+bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
+ std::vector<unsigned> &retval) {
+ const Module *m = gv->getParent();
+ if ((*annotationCache).find(m) == (*annotationCache).end())
+ cacheAnnotationFromMD(m, gv);
+ else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
+ cacheAnnotationFromMD(m, gv);
+ if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
+ return false;
+ retval = (*annotationCache)[m][gv][prop];
+ return true;
+}
+
+bool llvm::isTexture(const llvm::Value &val) {
+ if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if (llvm::findOneNVVMAnnotation(gv,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a texture symbol");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isSurface(const llvm::Value &val) {
+ if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if (llvm::findOneNVVMAnnotation(gv,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a surface symbol");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isSampler(const llvm::Value &val) {
+ if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if (llvm::findOneNVVMAnnotation(gv,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a sampler symbol");
+ return true;
+ }
+ }
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isImageReadOnly(const llvm::Value &val) {
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isImageWriteOnly(const llvm::Value &val) {
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::isImage(const llvm::Value &val) {
+ return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
+}
+
+std::string llvm::getTextureName(const llvm::Value &val) {
+ assert(val.hasName() && "Found texture variable with no name");
+ return val.getName();
+}
+
+std::string llvm::getSurfaceName(const llvm::Value &val) {
+ assert(val.hasName() && "Found surface variable with no name");
+ return val.getName();
+}
+
+std::string llvm::getSamplerName(const llvm::Value &val) {
+ assert(val.hasName() && "Found sampler variable with no name");
+ return val.getName();
+}
+
+bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
+ x));
+}
+
+bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
+ y));
+}
+
+bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
+ z));
+}
+
+bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
+ x));
+}
+
+bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
+ y));
+}
+
+bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
+ z));
+}
+
+bool llvm::getMinCTASm(const Function &F, unsigned &x) {
+ return (llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
+ x));
+}
+
+bool llvm::isKernelFunction(const Function &F) {
+ unsigned x = 0;
+ bool retval = llvm::findOneNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
+ x);
+ if (retval == false) {
+ // There is no NVVM metadata, check the calling convention
+ if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
+ return true;
+ else
+ return false;
+ }
+ return (x==1);
+}
+
+bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
+ std::vector<unsigned> Vs;
+ bool retval = llvm::findAllNVVMAnnotation(&F,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
+ Vs);
+ if (retval == false)
+ return false;
+ for (int i=0, e=Vs.size(); i<e; i++) {
+ unsigned v = Vs[i];
+ if ( (v >> 16) == index ) {
+ align = v & 0xFFFF;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
+ if (MDNode *alignNode = I.getMetadata("callalign")) {
+ for (int i=0, n = alignNode->getNumOperands();
+ i<n; i++) {
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ unsigned v = CI->getZExtValue();
+ if ( (v>>16) == index ) {
+ align = v & 0xFFFF;
+ return true;
+ }
+ if ( (v>>16) > index ) {
+ return false;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
+ if ((id == Intrinsic::nvvm_barrier0) ||
+ (id == Intrinsic::nvvm_barrier0_popc) ||
+ (id == Intrinsic::nvvm_barrier0_and) ||
+ (id == Intrinsic::nvvm_barrier0_or) ||
+ (id == Intrinsic::cuda_syncthreads))
+ return true;
+ return false;
+}
+
+// Interface for checking all memory space transfer related intrinsics
+bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
+ if (id == Intrinsic::nvvm_ptr_local_to_gen ||
+ id == Intrinsic::nvvm_ptr_shared_to_gen ||
+ id == Intrinsic::nvvm_ptr_global_to_gen ||
+ id == Intrinsic::nvvm_ptr_constant_to_gen ||
+ id == Intrinsic::nvvm_ptr_gen_to_global ||
+ id == Intrinsic::nvvm_ptr_gen_to_shared ||
+ id == Intrinsic::nvvm_ptr_gen_to_local ||
+ id == Intrinsic::nvvm_ptr_gen_to_constant ||
+ id == Intrinsic::nvvm_ptr_gen_to_param) {
+ return true;
+ }
+
+ return false;
+}
+
+// consider several special intrinsics in striping pointer casts, and
+// provide an option to ignore GEP indicies for find out the base address only
+// which could be used in simple alias disambigurate.
+const Value *llvm::skipPointerTransfer(const Value *V,
+ bool ignore_GEP_indices) {
+ V = V->stripPointerCasts();
+ while (true) {
+ if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
+ if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
+ V = IS->getArgOperand(0)->stripPointerCasts();
+ continue;
+ }
+ } else if (ignore_GEP_indices)
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand()->stripPointerCasts();
+ continue;
+ }
+ break;
+ }
+ return V;
+}
+
+// consider several special intrinsics in striping pointer casts, and
+// - ignore GEP indicies for find out the base address only, and
+// - tracking PHINode
+// which could be used in simple alias disambigurate.
+const Value *llvm::skipPointerTransfer(const Value *V,
+ std::set<const Value *> &processed) {
+ if (processed.find(V) != processed.end())
+ return NULL;
+ processed.insert(V);
+
+ const Value *V2 = V->stripPointerCasts();
+ if (V2 != V && processed.find(V2) != processed.end())
+ return NULL;
+ processed.insert(V2);
+
+ V = V2;
+
+ while (true) {
+ if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
+ if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
+ V = IS->getArgOperand(0)->stripPointerCasts();
+ continue;
+ }
+ } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand()->stripPointerCasts();
+ continue;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (V != V2 && processed.find(V) != processed.end())
+ return NULL;
+ processed.insert(PN);
+ const Value *common = 0;
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ const Value *pv = PN->getIncomingValue(i);
+ const Value *base = skipPointerTransfer(pv, processed);
+ if (base) {
+ if (common == 0)
+ common = base;
+ else if (common != base)
+ return PN;
+ }
+ }
+ if (common == 0)
+ return PN;
+ V = common;
+ }
+ break;
+ }
+ return V;
+}
+
+
+// The following are some useful utilities for debuggung
+
+BasicBlock *llvm::getParentBlock(Value *v) {
+ if (BasicBlock *B = dyn_cast<BasicBlock>(v))
+ return B;
+
+ if (Instruction *I = dyn_cast<Instruction>(v))
+ return I->getParent();
+
+ return 0;
+}
+
+Function *llvm::getParentFunction(Value *v) {
+ if (Function *F = dyn_cast<Function>(v))
+ return F;
+
+ if (Instruction *I = dyn_cast<Instruction>(v))
+ return I->getParent()->getParent();
+
+ if (BasicBlock *B = dyn_cast<BasicBlock>(v))
+ return B->getParent();
+
+ return 0;
+}
+
+// Dump a block by name
+void llvm::dumpBlock(Value *v, char *blockName) {
+ Function *F = getParentFunction(v);
+ if (F == 0)
+ return;
+
+ for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
+ BasicBlock *B = it;
+ if (strcmp(B->getName().data(), blockName) == 0) {
+ B->dump();
+ return;
+ }
+ }
+}
+
+// Find an instruction by name
+Instruction *llvm::getInst(Value *base, char *instName) {
+ Function *F = getParentFunction(base);
+ if (F == 0)
+ return 0;
+
+ for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
+ Instruction *I = &*it;
+ if (strcmp(I->getName().data(), instName) == 0) {
+ return I;
+ }
+ }
+
+ return 0;
+}
+
+// Dump an instruction by nane
+void llvm::dumpInst(Value *base, char *instName) {
+ Instruction *I = getInst(base, instName);
+ if (I)
+ I->dump();
+}
+
+// Dump an instruction and all dependent instructions
+void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) {
+ if (Instruction *I = dyn_cast<Instruction>(v)) {
+
+ if (visited->find(I) != visited->end())
+ return;
+
+ visited->insert(I);
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ dumpInstRec(I->getOperand(i), visited);
+
+ I->dump();
+ }
+}
+
+// Dump an instruction and all dependent instructions
+void llvm::dumpInstRec(Value *v) {
+ std::set<Instruction *> visited;
+
+ //BasicBlock *B = getParentBlock(v);
+
+ dumpInstRec(v, &visited);
+}
+
+// Dump the parent for Instruction, block or function
+void llvm::dumpParent(Value *v) {
+ if (Instruction *I = dyn_cast<Instruction>(v)) {
+ I->getParent()->dump();
+ return;
+ }
+
+ if (BasicBlock *B = dyn_cast<BasicBlock>(v)) {
+ B->getParent()->dump();
+ return;
+ }
+
+ if (Function *F = dyn_cast<Function>(v)) {
+ F->getParent()->dump();
+ return;
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
new file mode 100644
index 000000000000..fe6ad559e9df
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -0,0 +1,94 @@
+//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the NVVM specific utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTXUTILITIES_H
+#define NVPTXUTILITIES_H
+
+#include "llvm/Value.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include <cstdarg>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace llvm
+{
+
+#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
+#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
+
+bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
+bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
+ std::vector<unsigned> &);
+
+bool isTexture(const llvm::Value &);
+bool isSurface(const llvm::Value &);
+bool isSampler(const llvm::Value &);
+bool isImage(const llvm::Value &);
+bool isImageReadOnly(const llvm::Value &);
+bool isImageWriteOnly(const llvm::Value &);
+
+std::string getTextureName(const llvm::Value &);
+std::string getSurfaceName(const llvm::Value &);
+std::string getSamplerName(const llvm::Value &);
+
+bool getMaxNTIDx(const llvm::Function &, unsigned &);
+bool getMaxNTIDy(const llvm::Function &, unsigned &);
+bool getMaxNTIDz(const llvm::Function &, unsigned &);
+
+bool getReqNTIDx(const llvm::Function &, unsigned &);
+bool getReqNTIDy(const llvm::Function &, unsigned &);
+bool getReqNTIDz(const llvm::Function &, unsigned &);
+
+bool getMinCTASm(const llvm::Function &, unsigned &);
+bool isKernelFunction(const llvm::Function &);
+
+bool getAlign(const llvm::Function &, unsigned index, unsigned &);
+bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
+
+bool isBarrierIntrinsic(llvm::Intrinsic::ID);
+
+/// make_vector - Helper function which is useful for building temporary vectors
+/// to pass into type construction of CallInst ctors. This turns a null
+/// terminated list of pointers (or other value types) into a real live vector.
+///
+template<typename T>
+inline std::vector<T> make_vector(T A, ...) {
+ va_list Args;
+ va_start(Args, A);
+ std::vector<T> Result;
+ Result.push_back(A);
+ while (T Val = va_arg(Args, T))
+ Result.push_back(Val);
+ va_end(Args);
+ return Result;
+}
+
+bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
+const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
+const Value *skipPointerTransfer(const Value *V,
+ std::set<const Value *> &processed);
+BasicBlock *getParentBlock(Value *v);
+Function *getParentFunction(Value *v);
+void dumpBlock(Value *v, char *blockName);
+Instruction *getInst(Value *base, char *instName);
+void dumpInst(Value *base, char *instName);
+void dumpInstRec(Value *v, std::set<Instruction *> *visited);
+void dumpInstRec(Value *v);
+void dumpParent(Value *v);
+
+}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td
new file mode 100644
index 000000000000..775df19be162
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXVector.td
@@ -0,0 +1,1481 @@
+//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------
+// Vector Specific
+//-----------------------------------
+
+//
+// All vector instructions derive from NVPTXVecInst
+//
+
+class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ NVPTXInst sInst=NOP>
+ : NVPTXInst<outs, ins, asmstr, pattern> {
+ NVPTXInst scalarInst=sInst;
+}
+
+let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
+// Extract v2i16
+def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
+ (ins V2I16Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int16Regs:$dst, (vector_extract
+ (v2i16 V2I16Regs:$src), imm:$c))],
+ IMOV16rr>;
+
+// Extract v4i16
+def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
+ (ins V4I16Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int16Regs:$dst, (vector_extract
+ (v4i16 V4I16Regs:$src), imm:$c))],
+ IMOV16rr>;
+
+// Extract v2i8
+def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
+ (ins V2I8Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int8Regs:$dst, (vector_extract
+ (v2i8 V2I8Regs:$src), imm:$c))],
+ IMOV8rr>;
+
+// Extract v4i8
+def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
+ (ins V4I8Regs:$src, i8imm:$c),
+ "mov.u16 \t$dst, $src${c:vecelem};",
+ [(set Int8Regs:$dst, (vector_extract
+ (v4i8 V4I8Regs:$src), imm:$c))],
+ IMOV8rr>;
+
+// Extract v2i32
+def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
+ (ins V2I32Regs:$src, i8imm:$c),
+ "mov.u32 \t$dst, $src${c:vecelem};",
+ [(set Int32Regs:$dst, (vector_extract
+ (v2i32 V2I32Regs:$src), imm:$c))],
+ IMOV32rr>;
+
+// Extract v2f32
+def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
+ (ins V2F32Regs:$src, i8imm:$c),
+ "mov.f32 \t$dst, $src${c:vecelem};",
+ [(set Float32Regs:$dst, (vector_extract
+ (v2f32 V2F32Regs:$src), imm:$c))],
+ FMOV32rr>;
+
+// Extract v2i64
+def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
+ (ins V2I64Regs:$src, i8imm:$c),
+ "mov.u64 \t$dst, $src${c:vecelem};",
+ [(set Int64Regs:$dst, (vector_extract
+ (v2i64 V2I64Regs:$src), imm:$c))],
+ IMOV64rr>;
+
+// Extract v2f64
+def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
+ (ins V2F64Regs:$src, i8imm:$c),
+ "mov.f64 \t$dst, $src${c:vecelem};",
+ [(set Float64Regs:$dst, (vector_extract
+ (v2f64 V2F64Regs:$src), imm:$c))],
+ FMOV64rr>;
+
+// Extract v4i32
+def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
+ (ins V4I32Regs:$src, i8imm:$c),
+ "mov.u32 \t$dst, $src${c:vecelem};",
+ [(set Int32Regs:$dst, (vector_extract
+ (v4i32 V4I32Regs:$src), imm:$c))],
+ IMOV32rr>;
+
+// Extract v4f32
+def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
+ (ins V4F32Regs:$src, i8imm:$c),
+ "mov.f32 \t$dst, $src${c:vecelem};",
+ [(set Float32Regs:$dst, (vector_extract
+ (v4f32 V4F32Regs:$src), imm:$c))],
+ FMOV32rr>;
+}
+
+let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
+// Insert v2i8
+def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
+ (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
+ "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V2I8Regs:$dst,
+ (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
+ IMOV8rr>;
+
+// Insert v4i8
+def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
+ (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
+ "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V4I8Regs:$dst,
+ (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
+ IMOV8rr>;
+
+// Insert v2i16
+def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
+ (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
+ "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V2I16Regs:$dst,
+ (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
+
+// Insert v4i16
+def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
+ (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
+ "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
+ [(set V4I16Regs:$dst,
+ (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
+
+// Insert v2i32
+def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
+ (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
+ "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
+ [(set V2I32Regs:$dst,
+ (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
+
+// Insert v2f32
+def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
+ (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
+ "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
+ [(set V2F32Regs:$dst,
+ (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
+
+// Insert v2i64
+def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
+ (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
+ "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
+ [(set V2I64Regs:$dst,
+ (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
+ IMOV64rr>;
+
+// Insert v2f64
+def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
+ (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
+ "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
+ [(set V2F64Regs:$dst,
+ (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
+ FMOV64rr>;
+
+// Insert v4i32
+def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
+ (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
+ "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
+ [(set V4I32Regs:$dst,
+ (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
+
+// Insert v4f32
+def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
+ (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
+ "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
+ "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
+ [(set V4F32Regs:$dst,
+ (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
+}
+
+class BinOpAsmString<string c> {
+ string s = c;
+}
+
+class V4AsmStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(!strconcat(
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
+ opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
+ opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
+
+class V2AsmStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
+
+class V4MADStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(!strconcat(
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
+ opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
+ opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
+
+class V2MADStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
+
+class V4UnaryStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(!strconcat(
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1;\n\t"),
+ opcode), " \t${dst}_2, ${a}_2;\n\t"),
+ opcode), " \t${dst}_3, ${a}_3;")>;
+
+class V2UnaryStr<string opcode> : BinOpAsmString<
+ !strconcat(!strconcat(!strconcat(
+ opcode, " \t${dst}_0, ${a}_0;\n\t"),
+ opcode), " \t${dst}_1, ${a}_1;")>;
+
+class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
+ NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
+ asmstr.s,
+ [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
+ sInst>;
+
+class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
+ NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
+ asmstr.s,
+ [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
+ sInst>;
+
+class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
+ NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
+ asmstr.s,
+ [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
+
+multiclass IntBinVOp<string asmstr, SDNode OpNode,
+ NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
+ i16op=NOP, NVPTXInst i8op=NOP> {
+ def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
+ i64op>;
+ def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
+ i32op>;
+ def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
+ i32op>;
+ def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
+ i16op>;
+ def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
+ i16op>;
+ def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
+ i8op>;
+ def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
+ i8op>;
+}
+
+multiclass FloatBinVOp<string asmstr, SDNode OpNode,
+ NVPTXInst f64=NOP, NVPTXInst f32=NOP,
+ NVPTXInst f32_ftz=NOP> {
+ def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
+ V2F64Regs, f64>;
+ def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
+ V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
+ def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
+ V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
+ def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
+ V4F32Regs, f32>;
+ def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
+ V2F32Regs, f32>;
+}
+
+multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
+ NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
+ NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
+ def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
+ V2I64Regs, i64op>;
+ def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
+ V4I32Regs, i32op>;
+ def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
+ V2I32Regs, i32op>;
+ def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V4I16Regs, i16op>;
+ def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V2I16Regs, i16op>;
+ def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V4I8Regs, i8op>;
+ def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
+ V2I8Regs, i8op>;
+}
+
+
+// Integer Arithmetic
+let VecInstType=isVecOther.Value in {
+defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
+defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
+
+def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
+ ADDCCi32rr>;
+def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
+ ADDCCi32rr>;
+def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
+ SUBCCi32rr>;
+def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
+ SUBCCi32rr>;
+def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
+ ADDCCCi32rr>;
+def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
+ ADDCCCi32rr>;
+def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
+ SUBCCCi32rr>;
+def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
+ SUBCCCi32rr>;
+
+def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
+ SHLi64rr>;
+def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
+ SHLi32rr>;
+def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
+ SHLi32rr>;
+def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
+ SHLi16rr>;
+def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
+ SHLi16rr>;
+def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs,
+ SHLi8rr>;
+def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs,
+ SHLi8rr>;
+}
+
+// cvt to v*i32, helpers for shift
+class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
+ NVPTXInst sInst=NOP> :
+ NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
+
+class VecCVTStrHelper<string op, string dest, string src> {
+ string s=!strconcat(op, !strconcat("\t",
+ !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
+}
+
+class Vec2CVTStr<string op> {
+ string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
+ !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
+}
+
+class Vec4CVTStr<string op> {
+ string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
+ !strconcat("\n\t",
+ !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
+ !strconcat("\n\t",
+ !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
+ !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
+}
+
+let VecInstType=isVecOther.Value in {
+def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
+ Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
+def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
+ Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
+def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
+ Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
+def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
+ Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
+def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
+ Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
+}
+
+def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
+ (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
+def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
+ (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
+def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
+ (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
+
+def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
+ (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
+def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
+ (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
+
+let VecInstType=isVecOther.Value in {
+def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
+ SRAi64rr>;
+def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
+ SRAi32rr>;
+def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
+ SRAi32rr>;
+def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
+ SRAi16rr>;
+def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
+ SRAi16rr>;
+def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs,
+ SRAi8rr>;
+def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs,
+ SRAi8rr>;
+
+def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
+ SRLi64rr>;
+def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
+ SRLi32rr>;
+def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
+ SRLi32rr>;
+def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
+ SRLi16rr>;
+def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
+ SRLi16rr>;
+def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs,
+ SRLi8rr>;
+def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs,
+ SRLi8rr>;
+
+defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
+ MULTi8rr>;
+defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
+ MULTHSi16rr,
+ MULTHSi8rr>;
+defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
+ MULTHUi16rr,
+ MULTHUi8rr>;
+defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
+ SDIVi8rr>;
+defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
+ UDIVi8rr>;
+defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
+ SREMi8rr>;
+defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
+ UREMi8rr>;
+}
+
+def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
+ (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
+def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
+ (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
+def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
+ (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
+
+def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
+ (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
+def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
+ (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
+
+def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
+ (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
+def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
+ (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
+def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
+ (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
+
+def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
+ (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
+def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
+ (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
+
+multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
+ NVPTXRegClass regclassv2,
+ SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
+ Predicate Pred> {
+ def V4 : NVPTXVecInst<(outs regclassv4:$dst),
+ (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
+ V4MADStr<asmstr>.s,
+ [(set regclassv4:$dst,
+ (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
+ sop>,
+ Requires<[Pred]>;
+ def V2 : NVPTXVecInst<(outs regclassv2:$dst),
+ (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
+ V2MADStr<asmstr>.s,
+ [(set regclassv2:$dst,
+ (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
+ sop>,
+ Requires<[Pred]>;
+}
+
+multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
+ Predicate Pred> {
+ def V2 : NVPTXVecInst<(outs regclass:$dst),
+ (ins regclass:$a, regclass:$b, regclass:$c),
+ V2MADStr<asmstr>.s,
+ [(set regclass:$dst, (add
+ (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
+ Requires<[Pred]>;
+}
+multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
+ Predicate Pred> {
+ def V2 : NVPTXVecInst<(outs regclass:$dst),
+ (ins regclass:$a, regclass:$b, regclass:$c),
+ V2MADStr<asmstr>.s,
+ [(set regclass:$dst, (fadd
+ (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
+ Requires<[Pred]>;
+}
+
+let VecInstType=isVecOther.Value in {
+defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
+defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
+ true>;
+defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
+ true>;
+defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
+
+defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
+
+defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
+defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
+defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
+
+defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
+ FMAD32_ftzrrr, doFMADF32_ftz>;
+defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
+ FMA32_ftzrrr, doFMAF32_ftz>;
+defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
+ doFMADF32>;
+defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
+ doFMAF32>;
+defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
+}
+
+let VecInstType=isVecOther.Value in {
+def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
+ FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
+def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
+ FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
+def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
+ FDIV32rr_prec>, Requires<[reqPTX20]>;
+def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
+ FDIV32rr_prec>, Requires<[reqPTX20]>;
+def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
+ FDIV32rr_ftz>, Requires<[doF32FTZ]>;
+def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
+ FDIV32rr_ftz>, Requires<[doF32FTZ]>;
+def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
+def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
+def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
+}
+
+def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
+
+let VecInstType=isVecOther.Value in {
+def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
+ FNEGf32_ftz>, Requires<[doF32FTZ]>;
+def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
+ FNEGf32_ftz>, Requires<[doF32FTZ]>;
+def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
+def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
+def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
+
+// Logical Arithmetic
+defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
+defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
+defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
+
+defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
+}
+
+
+multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
+ (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
+ (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
+ Requires<[Pred]>;
+}
+
+defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
+defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
+defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
+defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
+
+multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
+ (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
+ (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
+ Requires<[Pred]>;
+}
+
+defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
+defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
+defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
+defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
+
+multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
+ def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
+ (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
+ Requires<[Pred]>;
+
+ def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
+ (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
+ Requires<[Pred]>;
+}
+
+defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
+
+class VecModStr<string vecsize, string elem, string extra, string l="">
+{
+ string t1 = !strconcat("${c", elem);
+ string t2 = !strconcat(t1, ":vecv");
+ string t3 = !strconcat(t2, vecsize);
+ string t4 = !strconcat(t3, extra);
+ string t5 = !strconcat(t4, l);
+ string s = !strconcat(t5, "}");
+}
+class ShuffleOneLine<string vecsize, string elem, string type>
+{
+ string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
+ string t2 = !strconcat(t1, "mov.");
+ string t3 = !strconcat(t2, type);
+ string t4 = !strconcat(t3, " \t${dst}_");
+ string t5 = !strconcat(t4, elem);
+ string t6 = !strconcat(t5, ", $src1");
+ string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
+ string t8 = !strconcat(t7, ";\n\t");
+ string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
+ string t10 = !strconcat(t9, "mov.");
+ string t11 = !strconcat(t10, type);
+ string t12 = !strconcat(t11, " \t${dst}_");
+ string t13 = !strconcat(t12, elem);
+ string t14 = !strconcat(t13, ", $src2");
+ string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
+ string s = !strconcat(t15, ";");
+}
+class ShuffleAsmStr2<string type>
+{
+ string t1 = ShuffleOneLine<"2", "0", type>.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
+}
+class ShuffleAsmStr4<string type>
+{
+ string t1 = ShuffleOneLine<"4", "0", type>.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
+ string t4 = !strconcat(t3, "\n\t");
+ string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
+ string t6 = !strconcat(t5, "\n\t");
+ string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
+}
+
+let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
+def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
+ (ins V4F32Regs:$src1, V4F32Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"f32">.s),
+ [], FMOV32rr>;
+
+def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
+ (ins V4I32Regs:$src1, V4I32Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"u32">.s),
+ [], IMOV32rr>;
+
+def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
+ (ins V4I16Regs:$src1, V4I16Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"u16">.s),
+ [], IMOV16rr>;
+
+def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
+ (ins V4I8Regs:$src1, V4I8Regs:$src2,
+ i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
+ ShuffleAsmStr4<"u16">.s),
+ [], IMOV8rr>;
+
+def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
+ (ins V2F32Regs:$src1, V2F32Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"f32">.s),
+ [], FMOV32rr>;
+
+def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
+ (ins V2I32Regs:$src1, V2I32Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u32">.s),
+ [], IMOV32rr>;
+
+def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
+ (ins V2I8Regs:$src1, V2I8Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u16">.s),
+ [], IMOV8rr>;
+
+def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
+ (ins V2I16Regs:$src1, V2I16Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u16">.s),
+ [], IMOV16rr>;
+
+def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
+ (ins V2F64Regs:$src1, V2F64Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"f64">.s),
+ [], FMOV64rr>;
+
+def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
+ (ins V2I64Regs:$src1, V2I64Regs:$src2,
+ i8imm:$c0, i8imm:$c1),
+ !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
+ ShuffleAsmStr2<"u64">.s),
+ [], IMOV64rr>;
+}
+
+def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
+}]>;
+def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
+}]>;
+def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
+}]>;
+def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
+}]>;
+
+// The spurious call is here to silence a compiler warning about N being
+// unused.
+def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs),
+ [{ N->getGluedNode(); return true; }]>;
+
+def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
+ (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
+ (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
+ (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
+ (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
+ (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
+ (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
+ (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
+ (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
+ (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
+ (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
+
+def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
+ (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
+ (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
+
+class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
+ NVPTXInst si>
+ : NVPTXVecInst<(outs vclass:$dst),
+ (ins sclass:$a1, sclass:$a2),
+ !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
+ [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
+ si>;
+class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
+ NVPTXInst si>
+ : NVPTXVecInst<(outs vclass:$dst),
+ (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
+ !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
+ [(set vclass:$dst,
+ (build_vector sclass:$a1, sclass:$a2,
+ sclass:$a3, sclass:$a4))], si>;
+
+let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
+def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
+ FMOV32rr>;
+def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
+ FMOV64rr>;
+
+def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
+ IMOV32rr>;
+def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
+ IMOV64rr>;
+def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
+ IMOV16rr>;
+def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs,
+ IMOV8rr>;
+
+def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
+ FMOV32rr>;
+
+def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
+ IMOV32rr>;
+def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
+ IMOV16rr>;
+def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
+ IMOV8rr>;
+}
+
+class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
+ : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
+ !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
+ [], sop>;
+
+let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
+ VecInstType=isVecOther.Value in {
+def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
+def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
+
+def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
+def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
+
+def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
+def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
+
+def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
+def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
+
+def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
+def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
+}
+
+// extract subvector patterns
+def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
+
+def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
+ (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
+ (V4f32Extract V4F32Regs:$src, 1))>;
+def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
+ (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
+ (V4f32Extract V4F32Regs:$src, 3))>;
+def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
+ (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
+ (V4i32Extract V4I32Regs:$src, 1))>;
+def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
+ (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
+ (V4i32Extract V4I32Regs:$src, 3))>;
+def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
+ (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
+ (V4i16Extract V4I16Regs:$src, 1))>;
+def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
+ (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
+ (V4i16Extract V4I16Regs:$src, 3))>;
+def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
+ (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
+ (V4i8Extract V4I8Regs:$src, 1))>;
+def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
+ (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
+ (V4i8Extract V4I8Regs:$src, 3))>;
+
+// Select instructions
+class Select_OneLine<string type, string pos> {
+ string t1 = !strconcat("selp.", type);
+ string t2 = !strconcat(t1, " \t${dst}_");
+ string t3 = !strconcat(t2, pos);
+ string t4 = !strconcat(t3, ", ${src1}_");
+ string t5 = !strconcat(t4, pos);
+ string t6 = !strconcat(t5, ", ${src2}_");
+ string t7 = !strconcat(t6, pos);
+ string s = !strconcat(t7, ", $p;");
+}
+
+class Select_Str2<string type> {
+ string t1 = Select_OneLine<type, "0">.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string s = !strconcat(t2, Select_OneLine<type, "1">.s);
+}
+
+class Select_Str4<string type> {
+ string t1 = Select_OneLine<type, "0">.s;
+ string t2 = !strconcat(t1, "\n\t");
+ string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
+ string t4 = !strconcat(t3, "\n\t");
+ string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
+ string t6 = !strconcat(t5, "\n\t");
+ string s = !strconcat(t6, Select_OneLine<type, "3">.s);
+
+}
+
+class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
+ : NVPTXVecInst<(outs vclass:$dst),
+ (ins vclass:$src1, vclass:$src2, Int1Regs:$p),
+ asmstr,
+ [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
+ vclass:$src2))],
+ sop>;
+
+let VecInstType=isVecOther.Value in {
+def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
+def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
+def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
+def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
+def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
+def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>;
+def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>;
+
+def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
+def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
+def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
+}
+
+// Comparison instructions
+
+// setcc convenience fragments.
+def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOEQ)>;
+def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOGT)>;
+def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOGE)>;
+def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOLT)>;
+def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOLE)>;
+def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETONE)>;
+def vseto : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETO)>;
+def vsetuo : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUO)>;
+def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUEQ)>;
+def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGT)>;
+def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGE)>;
+def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETULT)>;
+def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETULE)>;
+def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUNE)>;
+def vseteq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETEQ)>;
+def vsetgt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGT)>;
+def vsetge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGE)>;
+def vsetlt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETLT)>;
+def vsetle : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETLE)>;
+def vsetne : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETNE)>;
+
+class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
+ NVPTXInst sop>
+ : NVPTXVecInst<(outs outrclass:$dst),
+ (ins inrclass:$a, inrclass:$b),
+ "Unsupported",
+ [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
+ sop>;
+
+multiclass Vec_Compare_All<PatFrag op,
+ NVPTXInst inst8,
+ NVPTXInst inst16,
+ NVPTXInst inst32,
+ NVPTXInst inst64>
+{
+ def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>;
+ def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>;
+ def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
+ def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
+ def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
+ def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
+ def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
+}
+
+let VecInstType=isVecOther.Value in {
+ defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
+ ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
+ defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
+ ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
+ defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
+ ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
+ defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
+ ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
+ defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
+ ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
+ defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
+ ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
+ defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
+ ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
+ defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
+ ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
+ defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
+ ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
+ defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
+ ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
+ defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
+ ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
+ defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
+ ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
+}
+
+multiclass FVec_Compare_All<PatFrag op,
+ NVPTXInst instf32,
+ NVPTXInst instf64>
+{
+ def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
+ def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
+ def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
+}
+
+let VecInstType=isVecOther.Value in {
+ defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
+ FSetGTf64rr_toi64>;
+ defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
+ FSetLTf64rr_toi64>;
+ defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
+ FSetGEf64rr_toi64>;
+ defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
+ FSetLEf64rr_toi64>;
+ defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
+ FSetEQf64rr_toi64>;
+ defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
+ FSetNEf64rr_toi64>;
+
+ defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
+ FSetUGTf64rr_toi64>;
+ defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
+ FSetULTf64rr_toi64>;
+ defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
+ FSetUGEf64rr_toi64>;
+ defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
+ FSetULEf64rr_toi64>;
+ defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
+ FSetUEQf64rr_toi64>;
+ defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
+ FSetUNEf64rr_toi64>;
+
+ defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32,
+ FSetNUMf64rr_toi64>;
+ defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
+ FSetNANf64rr_toi64>;
+}
+
+class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
+ (ins i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("ld.param", opstr),
+ "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
+
+class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs regclass:$d1, regclass:$d2),
+ (ins i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("ld.param", opstr),
+ "\t{{$d1, $d2}}, [retval0+$b];"), []>;
+
+
+class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
+ i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
+
+class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[param$a+$b], {{$s1, $s2}};"), []>;
+
+class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
+ i32imm:$a),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
+
+class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
+ NVPTXInst<(outs),
+ (ins regclass:$s1, regclass:$s2, i32imm:$a),
+ !strconcat(!strconcat("st.param", opstr),
+ "\t[func_retval+$a], {{$s1, $s2}};"), []>;
+
+def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
+def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
+def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
+
+def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
+def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
+def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
+def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
+
+def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
+def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
+def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
+
+def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
+def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
+def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
+
+def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
+def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
+def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
+def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
+
+def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
+def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
+def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
+
+def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
+def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
+def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
+
+def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
+def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
+def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
+def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
+
+def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
+def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
+def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
+
+class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
+ NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
+ "loadparam : $dst <- [$a, $b]",
+ [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
+ sop>;
+
+class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
+ : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
+ "storeparam : [$a, $b] <- $val",
+ [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
+
+class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
+ NVPTXInst sop=NOP>
+ : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
+ "storeretval : retval[$a] <- $val",
+ [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
+
+let VecInstType=isVecLD.Value in {
+def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32",
+ LoadParamScalar4I32>;
+def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16",
+ LoadParamScalar4I16>;
+def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8",
+ LoadParamScalar4I8>;
+
+def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64",
+ LoadParamScalar2I64>;
+def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32",
+ LoadParamScalar2I32>;
+def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16",
+ LoadParamScalar2I16>;
+def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8",
+ LoadParamScalar2I8>;
+
+def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32",
+ LoadParamScalar4F32>;
+def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32",
+ LoadParamScalar2F32>;
+def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64",
+ LoadParamScalar2F64>;
+}
+
+let VecInstType=isVecST.Value in {
+def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32",
+ StoreParamScalar4I32>;
+def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16",
+ StoreParamScalar4I16>;
+def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8",
+ StoreParamScalar4I8>;
+
+def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64",
+ StoreParamScalar2I64>;
+def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32",
+ StoreParamScalar2I32>;
+def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16",
+ StoreParamScalar2I16>;
+def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8",
+ StoreParamScalar2I8>;
+
+def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32",
+ StoreParamScalar4F32>;
+def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32",
+ StoreParamScalar2F32>;
+def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64",
+ StoreParamScalar2F64>;
+
+def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
+ StoreRetvalScalar4I32>;
+def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
+ StoreRetvalScalar4I16>;
+def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8",
+ StoreRetvalScalar4I8>;
+
+def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
+ StoreRetvalScalar2I64>;
+def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
+ StoreRetvalScalar2I32>;
+def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
+ StoreRetvalScalar2I16>;
+def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8",
+ StoreRetvalScalar2I8>;
+
+def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
+ StoreRetvalScalar4F32>;
+def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
+ StoreRetvalScalar2F32>;
+def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
+ StoreRetvalScalar2F64>;
+
+}
+
+
+// Int vector to int scalar bit convert
+// v4i8 -> i32
+def : Pat<(i32 (bitconvert V4I8Regs:$s)),
+ (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
+ (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
+// v4i16 -> i64
+def : Pat<(i64 (bitconvert V4I16Regs:$s)),
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
+ (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2),
+ (V4i16Extract V4I16Regs:$s,3))>;
+// v2i8 -> i16
+def : Pat<(i16 (bitconvert V2I8Regs:$s)),
+ (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
+// v2i16 -> i32
+def : Pat<(i32 (bitconvert V2I16Regs:$s)),
+ (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
+ (V2i16Extract V2I16Regs:$s,1))>;
+// v2i32 -> i64
+def : Pat<(i64 (bitconvert V2I32Regs:$s)),
+ (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
+ (V2i32Extract V2I32Regs:$s,1))>;
+
+// Int scalar to int vector bit convert
+let VecInstType=isVecDest.Value in {
+// i32 -> v4i8
+def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
+ "Error!",
+ [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
+ I32toV4I8>;
+// i64 -> v4i16
+def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
+ "Error!",
+ [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
+ I64toV4I16>;
+// i16 -> v2i8
+def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
+ "Error!",
+ [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
+ I16toV2I8>;
+// i32 -> v2i16
+def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
+ "Error!",
+ [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
+ I32toV2I16>;
+// i64 -> v2i32
+def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
+ "Error!",
+ [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
+ I64toV2I32>;
+}
+
+// Int vector to int vector bit convert
+// v4i8 -> v2i16
+def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
+ (VecI32toV2I16
+ (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
+ (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
+// v4i16 -> v2i32
+def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
+ (VecI64toV2I32
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
+// v2i16 -> v4i8
+def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
+ (VecI32toV4I8
+ (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
+// v2i32 -> v4i16
+def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
+ (VecI64toV4I16
+ (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
+// v2i64 -> v4i32
+def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
+ (Build_Vector4_i32
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
+ (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
+// v4i32 -> v2i64
+def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
+ (Build_Vector2_i64
+ (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
+ (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
+
+// Fp scalar to fp vector convert
+// f64 -> v2f32
+let VecInstType=isVecDest.Value in {
+def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
+ "Error!",
+ [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
+ F64toV2F32>;
+}
+
+// Fp vector to fp scalar convert
+// v2f32 -> f64
+def : Pat<(f64 (bitconvert V2F32Regs:$s)),
+ (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
+
+// Fp scalar to int vector convert
+// f32 -> v4i8
+def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
+ (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
+// f32 -> v2i16
+def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
+ (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
+// f64 -> v4i16
+def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
+ (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
+// f64 -> v2i32
+def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
+ (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
+
+// Int vector to fp scalar convert
+// v4i8 -> f32
+def : Pat<(f32 (bitconvert V4I8Regs:$s)),
+ (BITCONVERT_32_I2F
+ (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
+ (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
+// v4i16 -> f64
+def : Pat<(f64 (bitconvert V4I16Regs:$s)),
+ (BITCONVERT_64_I2F
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
+// v2i16 -> f32
+def : Pat<(f32 (bitconvert V2I16Regs:$s)),
+ (BITCONVERT_32_I2F
+ (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
+// v2i32 -> f64
+def : Pat<(f64 (bitconvert V2I32Regs:$s)),
+ (BITCONVERT_64_I2F
+ (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
+
+// Int scalar to fp vector convert
+// i64 -> v2f32
+def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
+ (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
+
+// Fp vector to int scalar convert
+// v2f32 -> i64
+def : Pat<(i64 (bitconvert V2F32Regs:$s)),
+ (BITCONVERT_64_F2I
+ (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
+
+// Int vector to fp vector convert
+// v2i64 -> v4f32
+def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
+ (Build_Vector4_f32
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 0)), 0)),
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 0)), 1)),
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 1)), 0)),
+ (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
+ (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
+// v2i64 -> v2f64
+def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
+ (Build_Vector2_f64
+ (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
+ (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
+// v2i32 -> v2f32
+def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
+ (Build_Vector2_f32
+ (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
+ (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
+// v4i32 -> v2f64
+def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
+ (Build_Vector2_f64
+ (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
+ (V4i32Extract V4I32Regs:$s,1))),
+ (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
+ (V4i32Extract V4I32Regs:$s,3))))>;
+// v4i32 -> v4f32
+def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
+ (Build_Vector4_f32
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
+ (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
+// v4i16 -> v2f32
+def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
+ (VecF64toV2F32 (BITCONVERT_64_I2F
+ (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
+ (V4i16Extract V4I16Regs:$s,1),
+ (V4i16Extract V4I16Regs:$s,2),
+ (V4i16Extract V4I16Regs:$s,3))))>;
+
+// Fp vector to int vector convert
+// v2i64 <- v4f32
+def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
+ (Build_Vector2_i64
+ (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
+ (V4f32Extract V4F32Regs:$s,1))),
+ (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
+ (V4f32Extract V4F32Regs:$s,3))))>;
+// v2i64 <- v2f64
+def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
+ (Build_Vector2_i64
+ (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
+ (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
+// v2i32 <- v2f32
+def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
+ (Build_Vector2_i32
+ (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
+ (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
+// v4i32 <- v2f64
+def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
+ (Build_Vector4_i32
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 0)), 0)),
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 0)), 1)),
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 1)), 0)),
+ (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
+ (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
+// v4i32 <- v4f32
+def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
+ (Build_Vector4_i32
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
+ (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
+// v4i16 <- v2f32
+def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
+ (VecI64toV4I16 (BITCONVERT_64_F2I
+ (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
+ (V2f32Extract V2F32Regs:$s,1))))>;
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
new file mode 100644
index 000000000000..6a0e5328f62f
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXutil.cpp
@@ -0,0 +1,92 @@
+//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions that can be used in CodeGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXutil.h"
+#include "NVPTX.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+bool isParamLoad(const MachineInstr *MI)
+{
+ if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
+ (MI->getOpcode() != NVPTX::LD_i64_avar))
+ return false;
+ if (MI->getOperand(2).isImm() == false)
+ return false;
+ if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
+ return false;
+ return true;
+}
+
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
+
+static int encode_leb128(uint64_t val, int *nbytes,
+ char *space, int splen)
+{
+ char *a;
+ char *end = space + splen;
+
+ a = space;
+ do {
+ unsigned char uc;
+
+ if (a >= end)
+ return 1;
+ uc = val & DATA_MASK;
+ val >>= DIGIT_WIDTH;
+ if (val != 0)
+ uc |= MORE_BYTES;
+ *a = uc;
+ a++;
+ } while (val);
+ *nbytes = a - space;
+ return 0;
+}
+
+#undef DATA_MASK
+#undef DIGIT_WIDTH
+#undef MORE_BYTES
+
+uint64_t encode_leb128(const char *str)
+{
+ union { uint64_t x; char a[8]; } temp64;
+
+ temp64.x = 0;
+
+ for (unsigned i=0,e=strlen(str); i!=e; ++i)
+ temp64.a[i] = str[e-1-i];
+
+ char encoded[16];
+ int nbytes;
+
+ int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
+
+ (void)retval;
+ assert(retval == 0 &&
+ "Encoding to leb128 failed");
+
+ assert(nbytes <= 8 &&
+ "Cannot support register names with leb128 encoding > 8 bytes");
+
+ temp64.x = 0;
+ for (int i=0; i<nbytes; ++i)
+ temp64.a[i] = encoded[i];
+
+ return temp64.x;
+}
+
+} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
new file mode 100644
index 000000000000..d1d117159486
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXutil.h
@@ -0,0 +1,25 @@
+//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions that can be used in CodeGen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_NVPTX_UTIL_H
+#define LLVM_TARGET_NVPTX_UTIL_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+bool isParamLoad(const MachineInstr *);
+uint64_t encode_leb128(const char *str);
+}
+
+#endif
diff --git a/lib/Target/NVPTX/TargetInfo/CMakeLists.txt b/lib/Target/NVPTX/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..0bf13346d2ef
--- /dev/null
+++ b/lib/Target/NVPTX/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+#include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMNVPTXInfo
+ NVPTXTargetInfo.cpp
+ )
+
+add_dependencies(LLVMNVPTXInfo NVPTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
index 2cc30c422f14..ef12b0e64700 100644
--- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;===- ./lib/Target/NVPTX/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = PTXInfo
-parent = PTX
+name = NVPTXInfo
+parent = NVPTX
required_libraries = MC Support Target
-add_to_library_groups = PTX
+add_to_library_groups = NVPTX
diff --git a/lib/Target/PTX/TargetInfo/Makefile b/lib/Target/NVPTX/TargetInfo/Makefile
index 8619785889aa..8622315b47b9 100644
--- a/lib/Target/PTX/TargetInfo/Makefile
+++ b/lib/Target/NVPTX/TargetInfo/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
+##===- lib/Target/NVPTX/TargetInfo/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,7 +7,7 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXInfo
+LIBRARYNAME = LLVMNVPTXInfo
# Hack: we need to include 'main' target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
new file mode 100644
index 000000000000..f3624b9f23c7
--- /dev/null
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheNVPTXTarget32;
+Target llvm::TheNVPTXTarget64;
+
+extern "C" void LLVMInitializeNVPTXTargetInfo() {
+ RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
+ "NVIDIA PTX 32-bit");
+ RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
+ "NVIDIA PTX 64-bit");
+}
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
new file mode 100644
index 000000000000..8043e2de0972
--- /dev/null
+++ b/lib/Target/NVPTX/VectorElementize.cpp
@@ -0,0 +1,1248 @@
+//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts operations on vector types to operations on their
+// element types.
+//
+// For generic binary and unary vector instructions, the conversion is simple.
+// Suppose we have
+// av = bv Vop cv
+// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
+// This gets converted to the following :
+// a1 = b1 Sop c1
+// a2 = b2 Sop c2
+//
+// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
+// For the above example, the map will look as follows:
+// av => [a1, a2]
+// bv => [b1, b2]
+//
+// In addition, initVectorInfo creates the following opcode->opcode map.
+// Vop => Sop
+// OtherVop => OtherSop
+// ...
+//
+// For vector specific instructions like vecbuild, vecshuffle etc, the
+// conversion is different. Look at comments near the functions with
+// prefix createVec<...>.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+
+ NVPTXTargetMachine &TM;
+ MachineRegisterInfo *MRI;
+ const NVPTXRegisterInfo *RegInfo;
+ const NVPTXInstrInfo *InstrInfo;
+
+ llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
+ RegClassMap;
+ llvm::DenseMap<unsigned, bool> SimpleMoveMap;
+
+ llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
+
+ bool isVectorInstr(MachineInstr *);
+
+ SmallVector<unsigned, 4> getScalarRegisters(unsigned);
+ unsigned getScalarVersion(unsigned);
+ unsigned getScalarVersion(MachineInstr *);
+
+ bool isVectorRegister(unsigned);
+ const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
+ unsigned numCopiesNeeded(MachineInstr *);
+
+ void createLoadCopy(MachineFunction&, MachineInstr *,
+ std::vector<MachineInstr *>&);
+ void createStoreCopy(MachineFunction&, MachineInstr *,
+ std::vector<MachineInstr *>&);
+
+ void createVecDest(MachineFunction&, MachineInstr *,
+ std::vector<MachineInstr *>&);
+
+ void createCopies(MachineFunction&, MachineInstr *,
+ std::vector<MachineInstr *>&);
+
+ unsigned copyProp(MachineFunction&);
+ unsigned removeDeadMoves(MachineFunction&);
+
+ void elementize(MachineFunction&);
+
+ bool isSimpleMove(MachineInstr *);
+
+ void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies);
+
+ void createVecExtract(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies);
+
+ void createVecInsert(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies);
+
+ void createVecBuild(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies);
+
+public:
+
+ static char ID; // Pass identification, replacement for typeid
+ VectorElementize(NVPTXTargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm) {}
+
+ virtual const char *getPassName() const {
+ return "Convert LLVM vector types to their element types";
+ }
+};
+
+char VectorElementize::ID = 1;
+}
+
+static cl::opt<bool>
+RemoveRedundantMoves("nvptx-remove-redundant-moves",
+ cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
+ cl::init(true));
+
+#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
+ >> NVPTX::VecInstTypeShift)
+#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
+#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad)
+#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore)
+#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild)
+#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
+#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
+#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert)
+#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest)
+
+bool VectorElementize::isSimpleMove(MachineInstr *mi) {
+ if (mi->isCopy())
+ return true;
+ unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
+ >> NVPTX::SimpleMoveShift;
+ return (TSFlags == 1);
+}
+
+bool VectorElementize::isVectorInstr(MachineInstr *mi) {
+ if ((mi->getOpcode() == NVPTX::PHI) ||
+ (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
+ MachineOperand dest = mi->getOperand(0);
+ return isVectorRegister(dest.getReg());
+ }
+ return ISVECINST(mi);
+}
+
+unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
+ return getScalarVersion(mi->getOpcode());
+}
+
+///=============================================================================
+///Instr is assumed to be a vector instruction. For most vector instructions,
+///the size of the destination vector register gives the number of scalar copies
+///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
+///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
+///number of scalar copies needed.
+///=============================================================================
+unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
+ unsigned numDefs=0;
+ unsigned def;
+ for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
+ MachineOperand oper = Instr->getOperand(i);
+
+ if (!oper.isReg()) continue;
+ if (!oper.isDef()) continue;
+ def = i;
+ numDefs++;
+ }
+ assert((numDefs <= 1) && "Only 0 or 1 defs supported");
+
+ if (numDefs == 1) {
+ unsigned regnum = Instr->getOperand(def).getReg();
+ if (ISVECEXTRACT(Instr))
+ regnum = Instr->getOperand(1).getReg();
+ return getNVPTXVectorSize(MRI->getRegClass(regnum));
+ }
+ else if (numDefs == 0) {
+ assert(ISVECSTORE(Instr)
+ && "Only 0 def instruction supported is vector store");
+
+ unsigned regnum = Instr->getOperand(0).getReg();
+ return getNVPTXVectorSize(MRI->getRegClass(regnum));
+ }
+ return 1;
+}
+
+const TargetRegisterClass *VectorElementize::
+getScalarRegClass(const TargetRegisterClass *RC) {
+ assert(isNVPTXVectorRegClass(RC) &&
+ "Not a vector register class");
+ return getNVPTXElemClass(RC);
+}
+
+bool VectorElementize::isVectorRegister(unsigned reg) {
+ const TargetRegisterClass *RC=MRI->getRegClass(reg);
+ return isNVPTXVectorRegClass(RC);
+}
+
+///=============================================================================
+///For every vector register 'v' that is not already in the VectorToScalarMap,
+///create n scalar registers of the corresponding element type, where n
+///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
+///=============================================================================
+SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
+ assert(isVectorRegister(regnum) && "Expecting a vector register here");
+ // Create the scalar registers and put them in the map, if not already there.
+ if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
+ const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
+ const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
+
+ SmallVector<unsigned, 4> temp;
+
+ for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
+ temp.push_back(MRI->createVirtualRegister(scalarClass));
+
+ VectorToScalarMap[regnum] = temp;
+ }
+ return VectorToScalarMap[regnum];
+}
+
+///=============================================================================
+///For a vector load of the form
+///va <= ldv2 [addr]
+///the following multi output instruction is created :
+///[v1, v2] <= LD [addr]
+///Look at NVPTXVector.td for the definitions of multi output loads.
+///=============================================================================
+void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ copies.push_back(F.CloneMachineInstr(Instr));
+
+ MachineInstr *copy=copies[0];
+ copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
+
+ // Remove the dest, that should be a vector operand.
+ MachineOperand dest = copy->getOperand(0);
+ unsigned regnum = dest.getReg();
+
+ SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
+ copy->RemoveOperand(0);
+
+ std::vector<MachineOperand> otherOperands;
+ for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
+ otherOperands.push_back(copy->getOperand(i));
+
+ for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
+ copy->RemoveOperand(0);
+
+ for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
+ copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
+ }
+
+ for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
+ copy->addOperand(otherOperands[i]);
+
+}
+
+///=============================================================================
+///For a vector store of the form
+///stv2 va, [addr]
+///the following multi input instruction is created :
+///ST v1, v2, [addr]
+///Look at NVPTXVector.td for the definitions of multi input stores.
+///=============================================================================
+void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ copies.push_back(F.CloneMachineInstr(Instr));
+
+ MachineInstr *copy=copies[0];
+ copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
+
+ MachineOperand src = copy->getOperand(0);
+ unsigned regnum = src.getReg();
+
+ SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
+ copy->RemoveOperand(0);
+
+ std::vector<MachineOperand> otherOperands;
+ for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
+ otherOperands.push_back(copy->getOperand(i));
+
+ for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
+ copy->RemoveOperand(0);
+
+ for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
+ copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
+
+ for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
+ copy->addOperand(otherOperands[i]);
+}
+
+///=============================================================================
+///va <= shufflev2 vb, vc, <i1>, <i2>
+///gets converted to 2 moves into a1 and a2. The source of the moves depend on
+///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
+///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
+///instructions will be
+///a1 <= c2
+///a2 <= b1
+///=============================================================================
+void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ unsigned numcopies=numCopiesNeeded(Instr);
+
+ unsigned destregnum = Instr->getOperand(0).getReg();
+ unsigned src1regnum = Instr->getOperand(1).getReg();
+ unsigned src2regnum = Instr->getOperand(2).getReg();
+
+ SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
+ SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
+ SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
+
+ DebugLoc DL = Instr->getDebugLoc();
+
+ for (unsigned i=0; i<numcopies; i++) {
+ MachineInstr *copy = BuildMI(F, DL,
+ InstrInfo->get(getScalarVersion(Instr)), dest[i]);
+ MachineOperand which=Instr->getOperand(3+i);
+ assert(which.isImm() && "Shuffle operand not a constant");
+
+ int src=which.getImm();
+ int elem=src%numcopies;
+
+ if (which.getImm() < numcopies)
+ copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
+ else
+ copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
+ copies.push_back(copy);
+ }
+}
+
+///=============================================================================
+///a <= extractv2 va, <i1>
+///gets turned into a simple move to the scalar register a. The source depends
+///on i1.
+///=============================================================================
+void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ unsigned srcregnum = Instr->getOperand(1).getReg();
+
+ SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
+
+ MachineOperand which = Instr->getOperand(2);
+ assert(which.isImm() && "Extract operand not a constant");
+
+ DebugLoc DL = Instr->getDebugLoc();
+
+ MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
+ Instr->getOperand(0).getReg());
+ copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
+
+ copies.push_back(copy);
+}
+
+///=============================================================================
+///va <= vecinsertv2 vb, c, <i1>
+///This instruction copies all elements of vb to va, except the 'i1'th element.
+///The scalar value c becomes the 'i1'th element of va.
+///This gets translated to 2 (4 for vecinsertv4) moves.
+///=============================================================================
+void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ unsigned numcopies=numCopiesNeeded(Instr);
+
+ unsigned destregnum = Instr->getOperand(0).getReg();
+ unsigned srcregnum = Instr->getOperand(1).getReg();
+
+ SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
+ SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
+
+ MachineOperand which=Instr->getOperand(3);
+ assert(which.isImm() && "Insert operand not a constant");
+ unsigned int elem=which.getImm();
+
+ DebugLoc DL = Instr->getDebugLoc();
+
+ for (unsigned i=0; i<numcopies; i++) {
+ MachineInstr *copy = BuildMI(F, DL,
+ InstrInfo->get(getScalarVersion(Instr)), dest[i]);
+
+ if (i != elem)
+ copy->addOperand(MachineOperand::CreateReg(src[i], false));
+ else
+ copy->addOperand(Instr->getOperand(2));
+
+ copies.push_back(copy);
+ }
+
+}
+
+///=============================================================================
+///va <= buildv2 b1, b2
+///gets translated to
+///a1 <= b1
+///a2 <= b2
+///=============================================================================
+void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ unsigned numcopies=numCopiesNeeded(Instr);
+
+ unsigned destregnum = Instr->getOperand(0).getReg();
+
+ SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
+
+ DebugLoc DL = Instr->getDebugLoc();
+
+ for (unsigned i=0; i<numcopies; i++) {
+ MachineInstr *copy = BuildMI(F, DL,
+ InstrInfo->get(getScalarVersion(Instr)), dest[i]);
+
+ copy->addOperand(Instr->getOperand(1+i));
+
+ copies.push_back(copy);
+ }
+
+}
+
+///=============================================================================
+///For a tex inst of the form
+///va <= op [scalar operands]
+///the following multi output instruction is created :
+///[v1, v2] <= op' [scalar operands]
+///=============================================================================
+void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ copies.push_back(F.CloneMachineInstr(Instr));
+
+ MachineInstr *copy=copies[0];
+ copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
+
+ // Remove the dest, that should be a vector operand.
+ MachineOperand dest = copy->getOperand(0);
+ unsigned regnum = dest.getReg();
+
+ SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
+ copy->RemoveOperand(0);
+
+ std::vector<MachineOperand> otherOperands;
+ for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
+ otherOperands.push_back(copy->getOperand(i));
+
+ for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
+ copy->RemoveOperand(0);
+
+ for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
+ copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
+
+ for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
+ copy->addOperand(otherOperands[i]);
+}
+
+///=============================================================================
+///Look at the vector instruction type and dispatch to the createVec<...>
+///function that creates the scalar copies.
+///=============================================================================
+void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
+ std::vector<MachineInstr *>& copies) {
+ if (ISVECLOAD(Instr)) {
+ createLoadCopy(F, Instr, copies);
+ return;
+ }
+ if (ISVECSTORE(Instr)) {
+ createStoreCopy(F, Instr, copies);
+ return;
+ }
+ if (ISVECSHUFFLE(Instr)) {
+ createVecShuffle(F, Instr, copies);
+ return;
+ }
+ if (ISVECEXTRACT(Instr)) {
+ createVecExtract(F, Instr, copies);
+ return;
+ }
+ if (ISVECINSERT(Instr)) {
+ createVecInsert(F, Instr, copies);
+ return;
+ }
+ if (ISVECDEST(Instr)) {
+ createVecDest(F, Instr, copies);
+ return;
+ }
+ if (ISVECBUILD(Instr)) {
+ createVecBuild(F, Instr, copies);
+ return;
+ }
+
+ unsigned numcopies=numCopiesNeeded(Instr);
+
+ for (unsigned i=0; i<numcopies; ++i)
+ copies.push_back(F.CloneMachineInstr(Instr));
+
+ for (unsigned i=0; i<numcopies; ++i) {
+ MachineInstr *copy = copies[i];
+
+ std::vector<MachineOperand> allOperands;
+ std::vector<bool> isDef;
+
+ for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
+ MachineOperand oper = copy->getOperand(j);
+ allOperands.push_back(oper);
+ if (oper.isReg())
+ isDef.push_back(oper.isDef());
+ else
+ isDef.push_back(false);
+ }
+
+ for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
+ copy->RemoveOperand(0);
+
+ copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
+
+ for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
+ MachineOperand oper=allOperands[j];
+ if (oper.isReg()) {
+ unsigned regnum = oper.getReg();
+ if (isVectorRegister(regnum)) {
+
+ SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
+ copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
+ }
+ else
+ copy->addOperand(oper);
+ }
+ else
+ copy->addOperand(oper);
+ }
+ }
+}
+
+///=============================================================================
+///Scan through all basic blocks, looking for vector instructions.
+///For each vector instruction I, insert the scalar copies before I, and
+///add I into toRemove vector. Finally remove all instructions in toRemove.
+///=============================================================================
+void VectorElementize::elementize(MachineFunction &F) {
+ for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
+ BI!=BE; ++BI) {
+ MachineBasicBlock *BB = &*BI;
+
+ std::vector<MachineInstr *> copies;
+ std::vector<MachineInstr *> toRemove;
+
+ for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
+ II!=IE; ++II) {
+ MachineInstr *Instr = &*II;
+
+ if (!isVectorInstr(Instr))
+ continue;
+
+ copies.clear();
+ createCopies(F, Instr, copies);
+ for (unsigned i=0, e=copies.size(); i!=e; ++i)
+ BB->insert(II, copies[i]);
+
+ assert((copies.size() > 0) && "Problem in createCopies");
+ toRemove.push_back(Instr);
+ }
+ for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
+ F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
+ }
+}
+
+///=============================================================================
+///a <= b
+///...
+///...
+///x <= op(a, ...)
+///gets converted to
+///
+///x <= op(b, ...)
+///The original move is still present. This works on SSA form machine code.
+///Note that a <= b should be a simple vreg-to-vreg move instruction.
+///TBD : I didn't find a function that can do replaceOperand, so I remove
+///all operands and add all of them again, replacing the one while adding.
+///=============================================================================
+unsigned VectorElementize::copyProp(MachineFunction &F) {
+ unsigned numReplacements = 0;
+
+ for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
+ ++BI) {
+ MachineBasicBlock *BB = &*BI;
+
+ for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
+ ++II) {
+ MachineInstr *Instr = &*II;
+
+ // Don't do copy propagation on PHI as it will cause unnecessary
+ // live range overlap.
+ if ((Instr->getOpcode() == TargetOpcode::PHI) ||
+ (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
+ continue;
+
+ bool needsReplacement = false;
+
+ for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
+ MachineOperand oper = Instr->getOperand(i);
+ if (!oper.isReg()) continue;
+ if (oper.isDef()) continue;
+ if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
+
+ MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
+
+ if (!defInstr) continue;
+
+ if (!isSimpleMove(defInstr)) continue;
+
+ MachineOperand defSrc = defInstr->getOperand(1);
+ if (!defSrc.isReg()) continue;
+ if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
+
+ needsReplacement = true;
+
+ }
+ if (!needsReplacement) continue;
+
+ numReplacements++;
+
+ std::vector<MachineOperand> operands;
+
+ for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
+ MachineOperand oper = Instr->getOperand(i);
+ bool flag = false;
+ do {
+ if (!(oper.isReg()))
+ break;
+ if (oper.isDef())
+ break;
+ if (!(RegInfo->isVirtualRegister(oper.getReg())))
+ break;
+ MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
+ if (!(isSimpleMove(defInstr)))
+ break;
+ MachineOperand defSrc = defInstr->getOperand(1);
+ if (!(defSrc.isReg()))
+ break;
+ if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
+ break;
+ operands.push_back(defSrc);
+ flag = true;
+ } while (0);
+ if (flag == false)
+ operands.push_back(oper);
+ }
+
+ for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
+ Instr->RemoveOperand(0);
+ for (unsigned i=0, e=operands.size(); i!=e; ++i)
+ Instr->addOperand(operands[i]);
+
+ }
+ }
+ return numReplacements;
+}
+
+///=============================================================================
+///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
+///them to deadMoves vector. Then remove all instructions in deadMoves.
+///=============================================================================
+unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
+ std::vector<MachineInstr *> deadMoves;
+ for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
+ ++BI) {
+ MachineBasicBlock *BB = &*BI;
+
+ for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
+ ++II) {
+ MachineInstr *Instr = &*II;
+
+ if (!isSimpleMove(Instr)) continue;
+
+ MachineOperand dest = Instr->getOperand(0);
+ assert(dest.isReg() && "dest of move not a register");
+ assert(RegInfo->isVirtualRegister(dest.getReg()) &&
+ "dest of move not a virtual register");
+
+ if (MRI->use_empty(dest.getReg())) {
+ deadMoves.push_back(Instr);
+ }
+ }
+ }
+
+ for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
+ F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
+
+ return deadMoves.size();
+}
+
+///=============================================================================
+///Main function for this pass.
+///=============================================================================
+bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
+ MRI = &F.getRegInfo();
+
+ RegInfo = TM.getRegisterInfo();
+ InstrInfo = TM.getInstrInfo();
+
+ VectorToScalarMap.clear();
+
+ elementize(F);
+
+ if (RemoveRedundantMoves)
+ while (1) {
+ if (copyProp(F) == 0) break;
+ removeDeadMoves(F);
+ }
+
+ return true;
+}
+
+FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
+ return new VectorElementize(tm);
+}
+
+unsigned VectorElementize::getScalarVersion(unsigned opcode) {
+ if (opcode == NVPTX::PHI)
+ return opcode;
+ if (opcode == NVPTX::IMPLICIT_DEF)
+ return opcode;
+ switch(opcode) {
+ default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
+ case TargetOpcode::COPY: return TargetOpcode::COPY;
+ case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
+ case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
+ case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
+ case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
+ case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
+ case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
+ case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
+ case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
+ case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
+ case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
+ case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
+ case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
+ case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
+ case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
+ case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
+ case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
+ case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
+ case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
+ case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
+ case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
+ case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
+ case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
+ case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
+ case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
+ case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
+ case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
+ case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
+ case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
+ case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
+ case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
+ case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
+ case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
+ case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
+ case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
+ case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
+ case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
+ case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
+ case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
+ case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
+ case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
+ case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
+ case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
+ case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
+ case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
+ case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
+ case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
+ case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
+ case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
+ case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
+ case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
+ case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
+ case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
+ case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
+ case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
+ case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
+ case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
+ case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
+ case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
+ case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
+ case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
+ case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
+ case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
+ case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
+ case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
+ case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
+ case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
+ case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
+ case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
+ case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
+ case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
+ case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
+ case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
+ case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
+ case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
+ case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
+ case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
+ case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
+ case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
+ case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
+ case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
+ case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
+ case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
+ case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
+ case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
+ case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
+ case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
+ case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
+ case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
+ case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
+ case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
+ case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
+ case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
+ case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
+ case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
+ case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
+ case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
+ case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
+ case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
+ case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
+ case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
+ case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
+ case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
+ case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
+ case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
+ case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
+ case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
+ case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
+ case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
+ case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
+ case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
+ case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
+ case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
+ case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
+ case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
+ case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
+ case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
+ case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
+ case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
+ case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
+ case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
+ case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
+ case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
+ case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
+ case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
+ case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
+ case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
+ case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
+ case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
+ case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
+ case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
+ case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
+ case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
+ case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
+ case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
+ case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
+ case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
+ case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
+ case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
+ case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
+ case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
+ case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
+ case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
+ case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
+ case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
+ case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
+ case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
+ case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
+ case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
+ case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
+ case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
+ case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
+ case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
+ case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
+ case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
+ case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
+ case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
+ case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
+ case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
+ case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
+ case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
+ case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
+ case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
+ case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
+ case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
+ case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
+ case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
+ case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
+ case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
+ case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
+ case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
+ case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
+ case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
+ case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
+ case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
+ case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
+ case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
+ case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
+ case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
+ case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
+ case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
+ case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
+ case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
+ case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
+ case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
+ case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
+ case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
+ case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
+ case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
+ case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
+ case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
+ case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
+ case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
+ case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
+ case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
+ case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
+ case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
+ case NVPTX::VNegV2I16: return NVPTX::INEG16;
+ case NVPTX::VNegV2I32: return NVPTX::INEG32;
+ case NVPTX::VNegV2I64: return NVPTX::INEG64;
+ case NVPTX::VNegV2I8: return NVPTX::INEG8;
+ case NVPTX::VNegV4I16: return NVPTX::INEG16;
+ case NVPTX::VNegV4I32: return NVPTX::INEG32;
+ case NVPTX::VNegV4I8: return NVPTX::INEG8;
+ case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
+ case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
+ case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
+ case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
+ case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
+ case NVPTX::VNotV2I16: return NVPTX::NOT16;
+ case NVPTX::VNotV2I32: return NVPTX::NOT32;
+ case NVPTX::VNotV2I64: return NVPTX::NOT64;
+ case NVPTX::VNotV2I8: return NVPTX::NOT8;
+ case NVPTX::VNotV4I16: return NVPTX::NOT16;
+ case NVPTX::VNotV4I32: return NVPTX::NOT32;
+ case NVPTX::VNotV4I8: return NVPTX::NOT8;
+ case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
+ case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
+ case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
+ case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
+ case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
+ case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
+ case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
+ case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
+ case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
+ case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
+ case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
+ case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
+ case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
+ case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
+ case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
+ case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
+ case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
+ case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
+ case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
+ case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
+ case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
+ case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
+ case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
+ case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
+ case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
+ case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
+ case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
+ case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
+ case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
+ case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
+ case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
+ case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
+ case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
+ case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
+ case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
+ case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
+ case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
+ case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
+ case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
+ case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
+ case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
+ case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
+ case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
+ case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
+ case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
+ case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
+ case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
+ case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
+ case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
+ case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
+ case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
+ case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
+ case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
+ case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
+ case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
+ case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
+ case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
+ case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
+ case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
+ case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
+ case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
+ case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
+ case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
+ case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
+ case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
+ case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
+ case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
+ case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
+ case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
+ case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
+ case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
+ case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
+ case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
+ case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
+ case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
+ case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
+ case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
+ case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
+ case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
+ case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
+ case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
+ case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
+ case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
+ case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
+ case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
+ case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
+ case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
+ case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
+ case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
+ case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
+ case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
+ case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
+ case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
+ case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
+ case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
+ case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
+ case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
+ case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
+ case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
+ case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
+ case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
+ case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
+ case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
+ case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
+ case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
+ case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
+ case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
+ case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
+ case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
+ case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
+ case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
+ case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
+ case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
+ case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
+ case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
+ case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
+ case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
+ case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
+ case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
+ case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
+ case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
+ case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
+ case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
+ case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
+ case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
+ case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
+ case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
+ case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
+ case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
+ case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
+ case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
+ case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
+ case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
+ case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
+ case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
+ case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
+ case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
+ case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
+ case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
+ case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
+ case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
+ case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
+ case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
+ case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
+ case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
+ case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
+ case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
+ case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
+ case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+ case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+ case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+
+ case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
+ case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
+ case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
+ case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
+ case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
+ case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
+ case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
+ case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
+ case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
+ case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
+ case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
+ case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
+ case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
+ case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
+ case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
+ case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
+ case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
+ case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
+ case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
+ case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
+ case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
+ case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
+ case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
+ case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
+ case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
+ case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
+ case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
+ case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
+ case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
+ case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
+ case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
+ case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
+ case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
+ case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
+ case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
+ case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
+
+ case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
+ case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
+ case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari;
+ case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi;
+ case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
+ case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
+ case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari;
+ case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi;
+
+ case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
+ case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
+ case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari;
+ case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi;
+ case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
+ case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
+ case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari;
+ case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi;
+
+ case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
+ case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
+ case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari;
+ case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi;
+ case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
+ case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
+ case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari;
+ case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi;
+
+ case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
+ case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
+ case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari;
+ case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi;
+ case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
+ case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
+ case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari;
+ case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi;
+
+ case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
+ case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
+ case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari;
+ case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi;
+ case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
+ case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
+ case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari;
+ case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi;
+
+ case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
+ case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
+ case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari;
+ case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi;
+ case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
+ case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
+ case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari;
+ case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi;
+
+ case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
+ case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
+ case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari;
+ case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi;
+ case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
+ case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
+ case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari;
+ case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi;
+
+ case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
+ case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
+ case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari;
+ case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi;
+ case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
+ case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
+ case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari;
+ case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi;
+
+ case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
+ case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
+ case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari;
+ case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi;
+ case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
+ case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
+ case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari;
+ case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi;
+
+ case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
+ case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
+ case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari;
+ case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi;
+ case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
+ case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
+ case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari;
+ case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi;
+ }
+ return 0;
+}
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
new file mode 100644
index 000000000000..a7347efd7850
--- /dev/null
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -0,0 +1,125 @@
+#ifndef __CL_COMMON_DEFINES_H__
+#define __CL_COMMON_DEFINES_H__
+// This file includes defines that are common to both kernel code and
+// the NVPTX back-end.
+
+//
+// Common defines for Image intrinsics
+// Channel order
+enum {
+ CLK_R = 0x10B0,
+ CLK_A = 0x10B1,
+ CLK_RG = 0x10B2,
+ CLK_RA = 0x10B3,
+ CLK_RGB = 0x10B4,
+ CLK_RGBA = 0x10B5,
+ CLK_BGRA = 0x10B6,
+ CLK_ARGB = 0x10B7,
+
+#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0)
+ CLK_xRGB = 0x10B7,
+#endif
+
+ CLK_INTENSITY = 0x10B8,
+ CLK_LUMINANCE = 0x10B9
+
+#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
+ ,
+ CLK_Rx = 0x10BA,
+ CLK_RGx = 0x10BB,
+ CLK_RGBx = 0x10BC
+#endif
+};
+
+
+typedef enum clk_channel_type {
+ // valid formats for float return types
+ CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
+ CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+ CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
+ CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+ CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
+ CLK_FLOAT = 0x10DE, // four channel RGBA float
+
+#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
+ CLK_UNORM_SHORT_565 = 0x10D4,
+ CLK_UNORM_SHORT_555 = 0x10D5,
+ CLK_UNORM_INT_101010 = 0x10D6,
+#endif
+
+ // valid only for integer return types
+ CLK_SIGNED_INT8 = 0x10D7,
+ CLK_SIGNED_INT16 = 0x10D8,
+ CLK_SIGNED_INT32 = 0x10D9,
+ CLK_UNSIGNED_INT8 = 0x10DA,
+ CLK_UNSIGNED_INT16 = 0x10DB,
+ CLK_UNSIGNED_INT32 = 0x10DC,
+
+ // CI SPI for CPU
+ __CLK_UNORM_INT8888 , // four channel ARGB unorm8
+ __CLK_UNORM_INT8888R, // four channel BGRA unorm8
+
+ __CLK_VALID_IMAGE_TYPE_COUNT,
+ __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
+ __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+ // represent any image type
+ __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
+}clk_channel_type;
+
+typedef enum clk_sampler_type {
+ __CLK_ADDRESS_BASE = 0,
+ CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
+
+#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
+ CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
+#endif
+ __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
+ CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+ __CLK_ADDRESS_BITS = 3, // number of bits required to
+ // represent address info
+
+ __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+ CLK_NORMALIZED_COORDS_FALSE = 0,
+ CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+ __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE |
+ CLK_NORMALIZED_COORDS_TRUE,
+ __CLK_NORMALIZED_BITS = 1, // number of bits required to
+ // represent normalization
+
+ __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE +
+ __CLK_NORMALIZED_BITS,
+ CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+ CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+ CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+ __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
+ CLK_FILTER_ANISOTROPIC,
+ __CLK_FILTER_BITS = 2, // number of bits required to
+ // represent address info
+
+ __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+ CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+ CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+ CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+ __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
+ CLK_MIP_ANISOTROPIC,
+ __CLK_MIP_BITS = 2,
+
+ __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+ __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+ __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+ __CLK_ANISOTROPIC_RATIO_BITS = 5,
+ __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >>
+ (__CLK_ANISOTROPIC_RATIO_BITS-1)
+} clk_sampler_type;
+
+// Memory synchronization
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+#endif // __CL_COMMON_DEFINES_H__
diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py
new file mode 100644
index 000000000000..ed0666823124
--- /dev/null
+++ b/lib/Target/NVPTX/gen-register-defs.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+num_regs = 396
+
+outFile = open('NVPTXRegisterInfo.td', 'w')
+
+outFile.write('''
+//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class NVPTXReg<string n> : Register<n> {
+ let Namespace = "NVPTX";
+}
+
+class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
+ : RegisterClass <"NVPTX", regTypes, alignment, regList>;
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+// Special Registers used as stack pointer
+def VRFrame : NVPTXReg<"%SP">;
+def VRFrameLocal : NVPTXReg<"%SPL">;
+
+// Special Registers used as the stack
+def VRDepot : NVPTXReg<"%Depot">;
+''')
+
+# Predicates
+outFile.write('''
+//===--- Predicate --------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
+
+# Int8
+outFile.write('''
+//===--- 8-bit ------------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
+
+# Int16
+outFile.write('''
+//===--- 16-bit -----------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
+
+# Int32
+outFile.write('''
+//===--- 32-bit -----------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
+
+# Int64
+outFile.write('''
+//===--- 64-bit -----------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
+
+# F32
+outFile.write('''
+//===--- 32-bit float -----------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
+
+# F64
+outFile.write('''
+//===--- 64-bit float -----------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
+
+# Vector registers
+outFile.write('''
+//===--- Vector -----------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
+
+for i in range(0, num_regs):
+ outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
+
+# Argument registers
+outFile.write('''
+//===--- Arguments --------------------------------------------------------===//
+''')
+for i in range(0, num_regs):
+ outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
+for i in range(0, num_regs):
+ outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
+
+outFile.write('''
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+''')
+
+outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
+
+outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
+
+outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
+outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
+
+outFile.write('''
+// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
+''')
+
+outFile.write('''
+class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
+ NVPTXRegClass sClass,
+ int e,
+ string n>
+ : NVPTXRegClass<regTypes, alignment, regList>
+{
+ NVPTXRegClass scalarClass=sClass;
+ int elems=e;
+ string name=n;
+}
+''')
+
+
+outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
+outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
+
+outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
+outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
+
+outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
+outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
+
+outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
+outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
+
+outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
+outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
+
+outFile.close()
+
+
+outFile = open('NVPTXNumRegisters.h', 'w')
+outFile.write('''
+//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NVPTX_NUM_REGISTERS_H
+#define NVPTX_NUM_REGISTERS_H
+
+namespace llvm {
+
+const unsigned NVPTXNumRegisters = %d;
+
+}
+
+#endif
+''' % num_regs)
+
+outFile.close()
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
deleted file mode 100644
index a3be342f77fb..000000000000
--- a/lib/Target/PTX/CMakeLists.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS PTX.td)
-
-tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget)
-add_public_tablegen_target(PTXCommonTableGen)
-
-add_llvm_target(PTXCodeGen
- PTXAsmPrinter.cpp
- PTXISelDAGToDAG.cpp
- PTXISelLowering.cpp
- PTXInstrInfo.cpp
- PTXFPRoundingModePass.cpp
- PTXFrameLowering.cpp
- PTXMCAsmStreamer.cpp
- PTXMCInstLower.cpp
- PTXMFInfoExtract.cpp
- PTXMachineFunctionInfo.cpp
- PTXParamManager.cpp
- PTXRegAlloc.cpp
- PTXRegisterInfo.cpp
- PTXSelectionDAGInfo.cpp
- PTXSubtarget.cpp
- PTXTargetMachine.cpp
- )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(InstPrinter)
-add_subdirectory(MCTargetDesc)
-
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
deleted file mode 100644
index b25289347ba5..000000000000
--- a/lib/Target/PTX/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPTXAsmPrinter
- PTXInstPrinter.cpp
- )
-
-add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
-
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
deleted file mode 100644
index 1830213267b8..000000000000
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a PTX MCInst to a .ptx file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "PTXInstPrinter.h"
-#include "MCTargetDesc/PTXBaseInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#include "PTXGenAsmWriter.inc"
-
-PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) :
- MCInstPrinter(MAI, MII, MRI) {
- // Initialize the set of available features.
- setAvailableFeatures(STI.getFeatureBits());
-}
-
-void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- // Decode the register number into type and offset
- unsigned RegSpace = RegNo & 0x7;
- unsigned RegType = (RegNo >> 3) & 0x7;
- unsigned RegOffset = RegNo >> 6;
-
- // Print the register
- OS << "%";
-
- switch (RegSpace) {
- default:
- llvm_unreachable("Unknown register space!");
- case PTXRegisterSpace::Reg:
- switch (RegType) {
- default:
- llvm_unreachable("Unknown register type!");
- case PTXRegisterType::Pred:
- OS << "p";
- break;
- case PTXRegisterType::B16:
- OS << "rh";
- break;
- case PTXRegisterType::B32:
- OS << "r";
- break;
- case PTXRegisterType::B64:
- OS << "rd";
- break;
- case PTXRegisterType::F32:
- OS << "f";
- break;
- case PTXRegisterType::F64:
- OS << "fd";
- break;
- }
- break;
- case PTXRegisterSpace::Return:
- OS << "ret";
- break;
- case PTXRegisterSpace::Argument:
- OS << "arg";
- break;
- }
-
- OS << RegOffset;
-}
-
-void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
- printPredicate(MI, O);
- switch (MI->getOpcode()) {
- default:
- printInstruction(MI, O);
- break;
- case PTX::CALL:
- printCall(MI, O);
- }
- O << ";";
- printAnnotation(O, Annot);
-}
-
-void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
- // The last two operands are the predicate operands
- int RegIndex;
- int OpIndex;
-
- if (MI->getOpcode() == PTX::CALL) {
- RegIndex = 0;
- OpIndex = 1;
- } else {
- RegIndex = MI->getNumOperands()-2;
- OpIndex = MI->getNumOperands()-1;
- }
-
- int PredOp = MI->getOperand(OpIndex).getImm();
- if (PredOp == PTXPredicate::None)
- return;
-
- if (PredOp == PTXPredicate::Negate)
- O << '!';
- else
- O << '@';
-
- printOperand(MI, RegIndex, O);
-}
-
-void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
- O << "\tcall.uni\t";
- // The first two operands are the predicate slot
- unsigned Index = 2;
- unsigned NumRets = MI->getOperand(Index++).getImm();
-
- if (NumRets > 0) {
- O << "(";
- printOperand(MI, Index++, O);
- for (unsigned i = 1; i < NumRets; ++i) {
- O << ", ";
- printOperand(MI, Index++, O);
- }
- O << "), ";
- }
-
- const MCExpr* Expr = MI->getOperand(Index++).getExpr();
- unsigned NumArgs = MI->getOperand(Index++).getImm();
-
- // if the function call is to printf or puts, change to vprintf
- if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
- const MCSymbol &Sym = SymRefExpr->getSymbol();
- if (Sym.getName() == "printf" || Sym.getName() == "puts") {
- O << "vprintf";
- } else {
- O << Sym.getName();
- }
- } else {
- O << *Expr;
- }
-
- O << ", (";
-
- if (NumArgs > 0) {
- printOperand(MI, Index++, O);
- for (unsigned i = 1; i < NumArgs; ++i) {
- O << ", ";
- printOperand(MI, Index++, O);
- }
- }
- O << ")";
-}
-
-void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm()) {
- O << Op.getImm();
- } else if (Op.isFPImm()) {
- double Imm = Op.getFPImm();
- APFloat FPImm(Imm);
- APInt FPIntImm = FPImm.bitcastToAPInt();
- O << "0D";
- // PTX requires us to output the full 64 bits, even if the number is zero
- if (FPIntImm.getZExtValue() > 0) {
- O << FPIntImm.toString(16, false);
- } else {
- O << "0000000000000000";
- }
- } else if (Op.isReg()) {
- printRegName(O, Op.getReg());
- } else {
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- const MCExpr *Expr = Op.getExpr();
- if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
- const MCSymbol &Sym = SymRefExpr->getSymbol();
- O << Sym.getName();
- } else {
- O << *Op.getExpr();
- }
- }
-}
-
-void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- // By definition, operand OpNo+1 is an i32imm
- const MCOperand &Op2 = MI->getOperand(OpNo+1);
- printOperand(MI, OpNo, O);
- if (Op2.getImm() == 0)
- return; // don't print "+0"
- O << "+" << Op2.getImm();
-}
-
-void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- assert (Op.isImm() && "Rounding modes must be immediate values");
- switch (Op.getImm()) {
- default:
- llvm_unreachable("Unknown rounding mode!");
- case PTXRoundingMode::RndDefault:
- llvm_unreachable("FP rounding-mode pass did not handle instruction!");
- case PTXRoundingMode::RndNone:
- // Do not print anything.
- break;
- case PTXRoundingMode::RndNearestEven:
- O << ".rn";
- break;
- case PTXRoundingMode::RndTowardsZero:
- O << ".rz";
- break;
- case PTXRoundingMode::RndNegInf:
- O << ".rm";
- break;
- case PTXRoundingMode::RndPosInf:
- O << ".rp";
- break;
- case PTXRoundingMode::RndApprox:
- O << ".approx";
- break;
- case PTXRoundingMode::RndNearestEvenInt:
- O << ".rni";
- break;
- case PTXRoundingMode::RndTowardsZeroInt:
- O << ".rzi";
- break;
- case PTXRoundingMode::RndNegInfInt:
- O << ".rmi";
- break;
- case PTXRoundingMode::RndPosInfInt:
- O << ".rpi";
- break;
- }
-}
-
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
deleted file mode 100644
index ea4d50477d70..000000000000
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints n PTX MCInst to a .ptx file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTXINSTPRINTER_H
-#define PTXINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace llvm {
-
-class MCOperand;
-
-class PTXInstPrinter : public MCInstPrinter {
-public:
- PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
-
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
-
- // Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- void printPredicate(const MCInst *MI, raw_ostream &O);
- void printCall(const MCInst *MI, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-}
-
-#endif
-
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index d1fd74c369b9..000000000000
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_llvm_library(LLVMPTXDesc
- PTXMCTargetDesc.cpp
- PTXMCAsmInfo.cpp
- )
-
-add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
deleted file mode 100644
index a3e0f320fcb5..000000000000
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ /dev/null
@@ -1,134 +0,0 @@
-//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the PTX target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTXBASEINFO_H
-#define PTXBASEINFO_H
-
-#include "PTXMCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
- namespace PTXStateSpace {
- enum {
- Global = 0, // default to global state space
- Constant = 1,
- Local = 2,
- Parameter = 3,
- Shared = 4
- };
- } // namespace PTXStateSpace
-
- namespace PTXPredicate {
- enum {
- Normal = 0,
- Negate = 1,
- None = 2
- };
- } // namespace PTXPredicate
-
- /// Namespace to hold all target-specific flags.
- namespace PTXRoundingMode {
- // Instruction Flags
- enum {
- // Rounding Mode Flags
- RndMask = 15,
- RndDefault = 0, // ---
- RndNone = 1, // <NONE>
- RndNearestEven = 2, // .rn
- RndTowardsZero = 3, // .rz
- RndNegInf = 4, // .rm
- RndPosInf = 5, // .rp
- RndApprox = 6, // .approx
- RndNearestEvenInt = 7, // .rni
- RndTowardsZeroInt = 8, // .rzi
- RndNegInfInt = 9, // .rmi
- RndPosInfInt = 10 // .rpi
- };
- } // namespace PTXII
-
- namespace PTXRegisterType {
- // Register type encoded in MCOperands
- enum {
- Pred = 0,
- B16,
- B32,
- B64,
- F32,
- F64
- };
- } // namespace PTXRegisterType
-
- namespace PTXRegisterSpace {
- // Register space encoded in MCOperands
- enum {
- Reg = 0,
- Local,
- Param,
- Argument,
- Return
- };
- }
-
- inline static void decodeRegisterName(raw_ostream &OS,
- unsigned EncodedReg) {
- OS << "%";
-
- unsigned RegSpace = EncodedReg & 0x7;
- unsigned RegType = (EncodedReg >> 3) & 0x7;
- unsigned RegOffset = EncodedReg >> 6;
-
- switch (RegSpace) {
- default:
- llvm_unreachable("Unknown register space!");
- case PTXRegisterSpace::Reg:
- switch (RegType) {
- default:
- llvm_unreachable("Unknown register type!");
- case PTXRegisterType::Pred:
- OS << "p";
- break;
- case PTXRegisterType::B16:
- OS << "rh";
- break;
- case PTXRegisterType::B32:
- OS << "r";
- break;
- case PTXRegisterType::B64:
- OS << "rd";
- break;
- case PTXRegisterType::F32:
- OS << "f";
- break;
- case PTXRegisterType::F64:
- OS << "fd";
- break;
- }
- break;
- case PTXRegisterSpace::Return:
- OS << "ret";
- break;
- case PTXRegisterSpace::Argument:
- OS << "arg";
- break;
- }
-
- OS << RegOffset;
- }
-} // namespace llvm
-
-#endif
-
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
deleted file mode 100644
index cdfbc8046246..000000000000
--- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the PTXMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
-
-using namespace llvm;
-
-void PTXMCAsmInfo::anchor() { }
-
-PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
- Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::ptx64)
- PointerSize = 8;
-
- CommentString = "//";
-
- PrivateGlobalPrefix = "$L__";
-
- AllowPeriodsInName = false;
-
- HasSetDirective = false;
-
- HasDotTypeDotSizeDirective = false;
-
- HasSingleParameterDotFile = false;
-}
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
deleted file mode 100644
index 08fb970fc290..000000000000
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides PTX specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXMCTargetDesc.h"
-#include "PTXMCAsmInfo.h"
-#include "InstPrinter/PTXInstPrinter.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "PTXGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "PTXGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "PTXGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createPTXMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitPTXMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- // PTX does not have a return address register.
- InitPTXMCRegisterInfo(X, 0);
- return X;
-}
-
-static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitPTXMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM, OL);
- return X;
-}
-
-static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- assert(SyntaxVariant == 0 && "We only have one syntax variant");
- return new PTXInstPrinter(MAI, MII, MRI, STI);
-}
-
-extern "C" void LLVMInitializePTXTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
- RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
- createPTXMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
- createPTXMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
-}
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
deleted file mode 100644
index ffb92cb89e39..000000000000
--- a/lib/Target/PTX/PTX.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// PTX back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_H
-#define PTX_H
-
-#include "MCTargetDesc/PTXBaseInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class MachineInstr;
- class MCInst;
- class PTXAsmPrinter;
- class PTXTargetMachine;
- class FunctionPass;
-
- FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-
- FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-
- FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-
- FunctionPass *createPTXRegisterAllocator();
-
- void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- PTXAsmPrinter &AP);
-
-} // namespace llvm;
-
-#endif // PTX_H
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
deleted file mode 100644
index 994a68ed207a..000000000000
--- a/lib/Target/PTX/PTX.td
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is the top level entry point for the PTX target.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-//===- Architectural Features ---------------------------------------------===//
-
-def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
- "Do not demote .f64 to .f32">;
-
-def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
- "Disable Fused-Multiply Add">;
-
-//===- PTX Version --------------------------------------------------------===//
-
-def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
- "Use PTX Language Version 2.0">;
-
-def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
- "Use PTX Language Version 2.1">;
-
-def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
- "Use PTX Language Version 2.2">;
-
-def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
- "Use PTX Language Version 2.3">;
-
-//===- PTX Target ---------------------------------------------------------===//
-
-def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
- "Use Shader Model 1.0">;
-def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
- "Use Shader Model 1.1">;
-def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
- "Use Shader Model 1.2">;
-def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
- "Use Shader Model 1.3">;
-def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
- "Use Shader Model 2.0", [FeatureDouble]>;
-def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
- "Use Shader Model 2.1", [FeatureDouble]>;
-def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
- "Use Shader Model 2.2", [FeatureDouble]>;
-def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
- "Use Shader Model 2.3", [FeatureDouble]>;
-
-def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
- "PTX_COMPUTE_1_0",
- "Use Compute Compatibility 1.0">;
-def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
- "PTX_COMPUTE_1_1",
- "Use Compute Compatibility 1.1">;
-def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
- "PTX_COMPUTE_1_2",
- "Use Compute Compatibility 1.2">;
-def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
- "PTX_COMPUTE_1_3",
- "Use Compute Compatibility 1.3">;
-def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
- "PTX_COMPUTE_2_0",
- "Use Compute Compatibility 2.0",
- [FeatureDouble]>;
-
-//===----------------------------------------------------------------------===//
-// PTX supported processors
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
-
-def : Proc<"generic", []>;
-
-// Processor definitions for compute/shader models
-def : Proc<"compute_10", [FeatureCOMPUTE10]>;
-def : Proc<"compute_11", [FeatureCOMPUTE11]>;
-def : Proc<"compute_12", [FeatureCOMPUTE12]>;
-def : Proc<"compute_13", [FeatureCOMPUTE13]>;
-def : Proc<"compute_20", [FeatureCOMPUTE20]>;
-def : Proc<"sm_10", [FeatureSM10]>;
-def : Proc<"sm_11", [FeatureSM11]>;
-def : Proc<"sm_12", [FeatureSM12]>;
-def : Proc<"sm_13", [FeatureSM13]>;
-def : Proc<"sm_20", [FeatureSM20]>;
-def : Proc<"sm_21", [FeatureSM21]>;
-def : Proc<"sm_22", [FeatureSM22]>;
-def : Proc<"sm_23", [FeatureSM23]>;
-
-// Processor definitions for common GPU architectures
-def : Proc<"g80", [FeatureSM10]>;
-def : Proc<"gt200", [FeatureSM13]>;
-def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
-def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "PTXRegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "PTXInstrInfo.td"
-
-def PTXInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// Assembly printer
-//===----------------------------------------------------------------------===//
-// PTX uses the MC printer for asm output, so make sure the TableGen
-// AsmWriter bits get associated with the correct class.
-def PTXAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- bit isMCAsmWriter = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Target Declaration
-//===----------------------------------------------------------------------===//
-
-def PTX : Target {
- let InstructionSet = PTXInstrInfo;
- let AssemblyWriters = [PTXAsmWriter];
-}
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
deleted file mode 100644
index 0b6ac7bcb588..000000000000
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ /dev/null
@@ -1,561 +0,0 @@
-//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to PTX assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ptx-asm-printer"
-
-#include "PTXAsmPrinter.h"
-#include "PTX.h"
-#include "PTXMachineFunctionInfo.h"
-#include "PTXParamManager.h"
-#include "PTXRegisterInfo.h"
-#include "PTXTargetMachine.h"
-#include "llvm/Argument.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static const char PARAM_PREFIX[] = "__param_";
-static const char RETURN_PREFIX[] = "__ret_";
-
-static const char *getRegisterTypeName(unsigned RegType) {
- switch (RegType) {
- default:
- llvm_unreachable("Unknown register type");
- case PTXRegisterType::Pred:
- return ".pred";
- case PTXRegisterType::B16:
- return ".b16";
- case PTXRegisterType::B32:
- return ".b32";
- case PTXRegisterType::B64:
- return ".b64";
- case PTXRegisterType::F32:
- return ".f32";
- case PTXRegisterType::F64:
- return ".f64";
- }
-}
-
-static const char *getStateSpaceName(unsigned addressSpace) {
- switch (addressSpace) {
- default: llvm_unreachable("Unknown state space");
- case PTXStateSpace::Global: return "global";
- case PTXStateSpace::Constant: return "const";
- case PTXStateSpace::Local: return "local";
- case PTXStateSpace::Parameter: return "param";
- case PTXStateSpace::Shared: return "shared";
- }
-}
-
-static const char *getTypeName(Type* type) {
- while (true) {
- switch (type->getTypeID()) {
- default: llvm_unreachable("Unknown type");
- case Type::FloatTyID: return ".f32";
- case Type::DoubleTyID: return ".f64";
- case Type::IntegerTyID:
- switch (type->getPrimitiveSizeInBits()) {
- default: llvm_unreachable("Unknown integer bit-width");
- case 16: return ".u16";
- case 32: return ".u32";
- case 64: return ".u64";
- }
- case Type::ArrayTyID:
- case Type::PointerTyID:
- type = dyn_cast<SequentialType>(type)->getElementType();
- break;
- }
- }
- return NULL;
-}
-
-bool PTXAsmPrinter::doFinalization(Module &M) {
- // XXX Temproarily remove global variables so that doFinalization() will not
- // emit them again (global variables are emitted at beginning).
-
- Module::GlobalListType &global_list = M.getGlobalList();
- int i, n = global_list.size();
- GlobalVariable **gv_array = new GlobalVariable* [n];
-
- // first, back-up GlobalVariable in gv_array
- i = 0;
- for (Module::global_iterator I = global_list.begin(), E = global_list.end();
- I != E; ++I)
- gv_array[i++] = &*I;
-
- // second, empty global_list
- while (!global_list.empty())
- global_list.remove(global_list.begin());
-
- // call doFinalization
- bool ret = AsmPrinter::doFinalization(M);
-
- // now we restore global variables
- for (i = 0; i < n; i ++)
- global_list.insert(global_list.end(), gv_array[i]);
-
- delete[] gv_array;
- return ret;
-}
-
-void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
-{
- const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
-
- // Emit the PTX .version and .target attributes
- OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString());
- OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() +
- (ST.supportsDouble() ? ""
- : ", map_f64_to_f32"));
- // .address_size directive is optional, but it must immediately follow
- // the .target directive if present within a module
- if (ST.supportsPTX23()) {
- const char *addrSize = ST.is64Bit() ? "64" : "32";
- OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize);
- }
-
- OutStreamer.AddBlankLine();
-
- // Define any .file directives
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(M);
-
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I) {
- DICompileUnit DIUnit(*I);
- StringRef FN = DIUnit.getFilename();
- StringRef Dir = DIUnit.getDirectory();
- GetOrCreateSourceID(FN, Dir);
- }
-
- OutStreamer.AddBlankLine();
-
- // declare external functions
- for (Module::const_iterator i = M.begin(), e = M.end();
- i != e; ++i)
- EmitFunctionDeclaration(i);
-
- // declare global variables
- for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
- i != e; ++i)
- EmitVariableDeclaration(i);
-}
-
-void PTXAsmPrinter::EmitFunctionBodyStart() {
- OutStreamer.EmitRawText(Twine("{"));
-
- const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
- const PTXParamManager &PM = MFI->getParamManager();
-
- // Print register definitions
- SmallString<128> regDefs;
- raw_svector_ostream os(regDefs);
- unsigned numRegs;
-
- // pred
- numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
- if(numRegs > 0)
- os << "\t.reg .pred %p<" << numRegs << ">;\n";
-
- // i16
- numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
- if(numRegs > 0)
- os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
-
- // i32
- numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
- if(numRegs > 0)
- os << "\t.reg .b32 %r<" << numRegs << ">;\n";
-
- // i64
- numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
- if(numRegs > 0)
- os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
-
- // f32
- numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
- if(numRegs > 0)
- os << "\t.reg .f32 %f<" << numRegs << ">;\n";
-
- // f64
- numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
- if(numRegs > 0)
- os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
-
- // Local params
- for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
- i != e; ++i)
- os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i)
- << ";\n";
-
- OutStreamer.EmitRawText(os.str());
-
-
- const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
- DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
- << " frame object(s)\n");
- for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
- DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
- if (FrameInfo->getObjectSize(i) > 0) {
- OutStreamer.EmitRawText("\t.local .align " +
- Twine(FrameInfo->getObjectAlignment(i)) +
- " .b8 __local" +
- Twine(i) +
- "[" +
- Twine(FrameInfo->getObjectSize(i)) +
- "];");
- }
- }
-
- //unsigned Index = 1;
- // Print parameter passing params
- //for (PTXMachineFunctionInfo::param_iterator
- // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
- // std::string def = "\t.param .b";
- // def += utostr(*i);
- // def += " __ret_";
- // def += utostr(Index);
- // Index++;
- // def += ";";
- // OutStreamer.EmitRawText(Twine(def));
- //}
-}
-
-void PTXAsmPrinter::EmitFunctionBodyEnd() {
- OutStreamer.EmitRawText(Twine("}"));
-}
-
-void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- MCInst TmpInst;
- LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
- OutStreamer.EmitInstruction(TmpInst);
-}
-
-void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
- // Check to see if this is a special global used by LLVM, if so, emit it.
- if (EmitSpecialLLVMGlobal(gv))
- return;
-
- MCSymbol *gvsym = Mang->getSymbol(gv);
-
- assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
-
- SmallString<128> decl;
- raw_svector_ostream os(decl);
-
- // check if it is defined in some other translation unit
- if (gv->isDeclaration())
- os << ".extern ";
-
- // state space: e.g., .global
- os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' ';
-
- // alignment (optional)
- unsigned alignment = gv->getAlignment();
- if (alignment != 0)
- os << ".align " << gv->getAlignment() << ' ';
-
-
- if (PointerType::classof(gv->getType())) {
- PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
- Type* elementTy = pointerTy->getElementType();
-
- if (elementTy->isArrayTy()) {
- assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
-
- ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
- elementTy = arrayTy->getElementType();
-
- unsigned numElements = arrayTy->getNumElements();
-
- while (elementTy->isArrayTy()) {
- arrayTy = dyn_cast<ArrayType>(elementTy);
- elementTy = arrayTy->getElementType();
-
- numElements *= arrayTy->getNumElements();
- }
-
- // FIXME: isPrimitiveType() == false for i16?
- assert(elementTy->isSingleValueType() &&
- "Non-primitive types are not handled");
-
- // Find the size of the element in bits
- unsigned elementSize = elementTy->getPrimitiveSizeInBits();
-
- os << ".b" << elementSize << ' ' << gvsym->getName()
- << '[' << numElements << ']';
- } else {
- os << ".b8" << gvsym->getName() << "[]";
- }
-
- // handle string constants (assume ConstantArray means string)
- if (gv->hasInitializer()) {
- const Constant *C = gv->getInitializer();
- if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
- os << " = {";
-
- for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
- if (i > 0)
- os << ',';
-
- os << "0x";
- os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
- }
-
- os << '}';
- }
- }
- } else {
- // Note: this is currently the fall-through case and most likely generates
- // incorrect code.
- os << getTypeName(gv->getType()) << ' ' << gvsym->getName();
-
- if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType()))
- os << "[]";
- }
-
- os << ';';
-
- OutStreamer.EmitRawText(os.str());
- OutStreamer.AddBlankLine();
-}
-
-void PTXAsmPrinter::EmitFunctionEntryLabel() {
- // The function label could have already been emitted if two symbols end up
- // conflicting due to asm renaming. Detect this and emit an error.
- if (!CurrentFnSym->isUndefined())
- report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
- "' label emitted multiple times to assembly file");
-
- const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
- const PTXParamManager &PM = MFI->getParamManager();
- const bool isKernel = MFI->isKernel();
- const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
-
- SmallString<128> decl;
- raw_svector_ostream os(decl);
- os << (isKernel ? ".entry" : ".func");
-
- if (!isKernel) {
- os << " (";
- if (ST.useParamSpaceForDeviceArgs()) {
- for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
- b = i; i != e; ++i) {
- if (i != b)
- os << ", ";
-
- os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
- }
- } else {
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
- i != e; ++i) {
- if (i != b)
- os << ", ";
-
- os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
- << MFI->getRegisterName(*i);
- }
- }
- os << ')';
- }
-
- // Print function name
- os << ' ' << CurrentFnSym->getName() << " (";
-
- const Function *F = MF->getFunction();
-
- // Print parameters
- if (isKernel || ST.useParamSpaceForDeviceArgs()) {
- /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
- b = i; i != e; ++i) {
- if (i != b)
- os << ", ";
-
- os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
- }*/
- int Counter = 1;
- for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
- b = i; i != e; ++i) {
- if (i != b)
- os << ", ";
- const Type *ArgType = (*i).getType();
- os << ".param .b";
- if (ArgType->isPointerTy()) {
- if (ST.is64Bit())
- os << "64";
- else
- os << "32";
- } else {
- os << ArgType->getPrimitiveSizeInBits();
- }
- if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
- const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
- os << " .ptr";
- switch (PtrType->getAddressSpace()) {
- default:
- llvm_unreachable("Unknown address space in argument");
- case PTXStateSpace::Global:
- os << " .global";
- break;
- case PTXStateSpace::Shared:
- os << " .shared";
- break;
- }
- }
- os << " __param_" << Counter++;
- }
- } else {
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
- i != e; ++i) {
- if (i != b)
- os << ", ";
-
- os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
- << MFI->getRegisterName(*i);
- }
- }
- os << ')';
-
- OutStreamer.EmitRawText(os.str());
-}
-
-void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func)
-{
- const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
-
- std::string decl = "";
-
- // hard-coded emission of extern vprintf function
-
- if (func->getName() == "printf" || func->getName() == "puts") {
- decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b";
- if (ST.is64Bit())
- decl += "64";
- else
- decl += "32";
- decl += " __param_2, .param .b";
- if (ST.is64Bit())
- decl += "64";
- else
- decl += "32";
- decl += " __param_3)\n";
- }
-
- OutStreamer.EmitRawText(Twine(decl));
-}
-
-unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
- StringRef DirName) {
- // If FE did not provide a file name, then assume stdin.
- if (FileName.empty())
- return GetOrCreateSourceID("<stdin>", StringRef());
-
- // MCStream expects full path name as filename.
- if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
- SmallString<128> FullPathName = DirName;
- sys::path::append(FullPathName, FileName);
- // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
- return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
- }
-
- StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
- if (Entry.getValue())
- return Entry.getValue();
-
- unsigned SrcId = SourceIdMap.size();
- Entry.setValue(SrcId);
-
- // Print out a .file directive to specify files for .loc directives.
- OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey());
-
- return SrcId;
-}
-
-MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
- const MCSymbol *Symbol) {
- const MCExpr *Expr;
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
- return MCOperand::CreateExpr(Expr);
-}
-
-MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
- MCOperand MCOp;
- const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
- unsigned EncodedReg;
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unknown operand type");
- case MachineOperand::MO_Register:
- if (MO.getReg() > 0) {
- // Encode the register
- EncodedReg = MFI->getEncodedRegister(MO.getReg());
- } else {
- EncodedReg = 0;
- }
- MCOp = MCOperand::CreateReg(EncodedReg);
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
- MO.getMBB()->getSymbol(), OutContext));
- break;
- case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
- break;
- case MachineOperand::MO_ExternalSymbol:
- MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
- break;
- case MachineOperand::MO_FPImmediate:
- APFloat Val = MO.getFPImm()->getValueAPF();
- bool ignored;
- Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
- MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
- break;
- }
-
- return MCOp;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializePTXAsmPrinter() {
- RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
- RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
-}
diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h
deleted file mode 100644
index 74c8d58a3e9b..000000000000
--- a/lib/Target/PTX/PTXAsmPrinter.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PTX Assembly printer class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTXASMPRINTER_H
-#define PTXASMPRINTER_H
-
-#include "PTX.h"
-#include "PTXTargetMachine.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class MCOperand;
-
-class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
-public:
- explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {}
-
- const char *getPassName() const { return "PTX Assembly Printer"; }
-
- bool doFinalization(Module &M);
-
- virtual void EmitStartOfAsmFile(Module &M);
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd();
- virtual void EmitFunctionEntryLabel();
- virtual void EmitInstruction(const MachineInstr *MI);
-
- unsigned GetOrCreateSourceID(StringRef FileName,
- StringRef DirName);
-
- MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
- MCOperand lowerOperand(const MachineOperand &MO);
-
-private:
- void EmitVariableDeclaration(const GlobalVariable *gv);
- void EmitFunctionDeclaration(const Function* func);
-
- StringMap<unsigned> SourceIdMap;
-}; // class PTXAsmPrinter
-} // namespace llvm
-
-#endif
-
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
deleted file mode 100644
index a21d1728d81c..000000000000
--- a/lib/Target/PTX/PTXFPRoundingModePass.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a machine function pass that sets appropriate FP rounding
-// modes for all relevant instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ptx-fp-rounding-mode"
-
-#include "PTX.h"
-#include "PTXTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-// NOTE: PTXFPRoundingModePass should be executed just before emission.
-
-namespace {
- /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
- /// all FP instructions. Essentially, this pass just looks for all FP
- /// instructions that have a rounding mode set to RndDefault, and sets an
- /// appropriate rounding mode based on the target device.
- ///
- class PTXFPRoundingModePass : public MachineFunctionPass {
- private:
- static char ID;
-
- typedef std::pair<unsigned, unsigned> RndModeDesc;
-
- PTXTargetMachine& TargetMachine;
- DenseMap<unsigned, RndModeDesc> Instrs;
-
- public:
- PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : MachineFunctionPass(ID),
- TargetMachine(TM) {
- initializeMap();
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "PTX FP Rounding Mode Pass";
- }
-
- private:
-
- void initializeMap();
- void processInstruction(MachineInstr &MI);
- }; // class PTXFPRoundingModePass
-} // end anonymous namespace
-
-using namespace llvm;
-
-char PTXFPRoundingModePass::ID = 0;
-
-bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
- // Look at each basic block
- for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
- ++bbi) {
- MachineBasicBlock &MBB = *bbi;
- // Look at each instruction
- for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
- ii != ie; ++ii) {
- MachineInstr &MI = *ii;
- processInstruction(MI);
- }
- }
- return false;
-}
-
-void PTXFPRoundingModePass::initializeMap() {
- using namespace PTXRoundingMode;
- const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
-
- // Build a map of default rounding mode for all instructions that need a
- // rounding mode.
- Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
-
- Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
- Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
- Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
- Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
-
- unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
- Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
- Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
- Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
- Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
-
- unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
- Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
- Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
- Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
- Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
- Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
- Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
-
- Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
-
- Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
- Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
-
- Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
- Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
-
- Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
- Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
-}
-
-void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
- // Is this an instruction that needs a rounding mode?
- if (Instrs.count(MI.getOpcode())) {
- const RndModeDesc &Desc = Instrs[MI.getOpcode()];
- // Get the rounding mode operand
- MachineOperand &Op = MI.getOperand(Desc.first);
- // Update the rounding mode if needed
- if (Op.getImm() == PTXRoundingMode::RndDefault) {
- Op.setImm(Desc.second);
- }
- }
-}
-
-FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new PTXFPRoundingModePass(TM, OptLevel);
-}
-
diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp
deleted file mode 100644
index e6e268e480f8..000000000000
--- a/lib/Target/PTX/PTXFrameLowering.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PTX implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXFrameLowering.h"
-#include "llvm/CodeGen/MachineFunction.h"
-
-using namespace llvm;
-
-void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
-}
-
-void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
-}
diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h
deleted file mode 100644
index 831e81803df6..000000000000
--- a/lib/Target/PTX/PTXFrameLowering.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_FRAMEINFO_H
-#define PTX_FRAMEINFO_H
-
-#include "PTX.h"
-#include "PTXSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
- class PTXSubtarget;
-
-class PTXFrameLowering : public TargetFrameLowering {
-protected:
- const PTXSubtarget &STI;
-
-public:
- explicit PTXFrameLowering(const PTXSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
- STI(sti) {
- }
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- bool hasFP(const MachineFunction &MF) const { return false; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
deleted file mode 100644
index 5c7ee298f31a..000000000000
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ /dev/null
@@ -1,356 +0,0 @@
-//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the PTX target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTX.h"
-#include "PTXMachineFunctionInfo.h"
-#include "PTXTargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-// PTXDAGToDAGISel - PTX specific code to select PTX machine
-// instructions for SelectionDAG operations.
-class PTXDAGToDAGISel : public SelectionDAGISel {
- public:
- PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
-
- virtual const char *getPassName() const {
- return "PTX DAG->DAG Pattern Instruction Selection";
- }
-
- SDNode *Select(SDNode *Node);
-
- // Complex Pattern Selectors.
- bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
- bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
-
- // Include the pieces auto'gened from the target description
-#include "PTXGenDAGISel.inc"
-
- private:
- // We need this only because we can't match intruction BRAdp
- // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
- SDNode *SelectBRCOND(SDNode *Node);
-
- SDNode *SelectREADPARAM(SDNode *Node);
- SDNode *SelectWRITEPARAM(SDNode *Node);
- SDNode *SelectFrameIndex(SDNode *Node);
-
- bool isImm(const SDValue &operand);
- bool SelectImm(const SDValue &operand, SDValue &imm);
-
- const PTXSubtarget& getSubtarget() const;
-}; // class PTXDAGToDAGISel
-} // namespace
-
-// createPTXISelDag - This pass converts a legalized DAG into a
-// PTX-specific DAG, ready for instruction scheduling
-FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new PTXDAGToDAGISel(TM, OptLevel);
-}
-
-PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel) {}
-
-SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
- switch (Node->getOpcode()) {
- case ISD::BRCOND:
- return SelectBRCOND(Node);
- case PTXISD::READ_PARAM:
- return SelectREADPARAM(Node);
- case PTXISD::WRITE_PARAM:
- return SelectWRITEPARAM(Node);
- case ISD::FrameIndex:
- return SelectFrameIndex(Node);
- default:
- return SelectCode(Node);
- }
-}
-
-SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
- assert(Node->getNumOperands() >= 3);
-
- SDValue Chain = Node->getOperand(0);
- SDValue Pred = Node->getOperand(1);
- SDValue Target = Node->getOperand(2); // branch target
- SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
- DebugLoc dl = Node->getDebugLoc();
-
- assert(Target.getOpcode() == ISD::BasicBlock);
- assert(Pred.getValueType() == MVT::i1);
-
- // Emit BRAdp
- SDValue Ops[] = { Target, Pred, PredOp, Chain };
- return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
-}
-
-SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
- SDValue Chain = Node->getOperand(0);
- SDValue Index = Node->getOperand(1);
-
- int OpCode;
-
- // Get the type of parameter we are reading
- EVT VT = Node->getValueType(0);
- assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
-
- MVT Type = VT.getSimpleVT();
-
- if (Type == MVT::i1)
- OpCode = PTX::READPARAMPRED;
- else if (Type == MVT::i16)
- OpCode = PTX::READPARAMI16;
- else if (Type == MVT::i32)
- OpCode = PTX::READPARAMI32;
- else if (Type == MVT::i64)
- OpCode = PTX::READPARAMI64;
- else if (Type == MVT::f32)
- OpCode = PTX::READPARAMF32;
- else {
- assert(Type == MVT::f64 && "Unexpected type!");
- OpCode = PTX::READPARAMF64;
- }
-
- SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
- DebugLoc dl = Node->getDebugLoc();
-
- SDValue Ops[] = { Index, Pred, PredOp, Chain };
- return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
-}
-
-SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
-
- SDValue Chain = Node->getOperand(0);
- SDValue Value = Node->getOperand(1);
-
- int OpCode;
-
- //Node->dumpr(CurDAG);
-
- // Get the type of parameter we are writing
- EVT VT = Value->getValueType(0);
- assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
-
- MVT Type = VT.getSimpleVT();
-
- if (Type == MVT::i1)
- OpCode = PTX::WRITEPARAMPRED;
- else if (Type == MVT::i16)
- OpCode = PTX::WRITEPARAMI16;
- else if (Type == MVT::i32)
- OpCode = PTX::WRITEPARAMI32;
- else if (Type == MVT::i64)
- OpCode = PTX::WRITEPARAMI64;
- else if (Type == MVT::f32)
- OpCode = PTX::WRITEPARAMF32;
- else if (Type == MVT::f64)
- OpCode = PTX::WRITEPARAMF64;
- else
- llvm_unreachable("Invalid type in SelectWRITEPARAM");
-
- SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
- DebugLoc dl = Node->getDebugLoc();
-
- SDValue Ops[] = { Value, Pred, PredOp, Chain };
- SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
-
- //dbgs() << "SelectWRITEPARAM produced:\n\t";
- //Ret->dumpr(CurDAG);
-
- return Ret;
-}
-
-SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
- int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- //dbgs() << "Selecting FrameIndex at index " << FI << "\n";
- //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
-
- PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
-
- SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
- Node->getValueType(0));
-
- return FrameSymbol.getNode();
-}
-
-// Match memory operand of the form [reg+reg]
-bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
- if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
- isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
- return false;
-
- assert(Addr.getValueType().isSimple() && "Type must be simple");
-
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
-
- return true;
-}
-
-// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
-bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
- SDValue &Offset) {
- // FrameIndex addresses are handled separately
- //errs() << "SelectADDRri: ";
- //Addr.getNode()->dumpr();
- if (isa<FrameIndexSDNode>(Addr)) {
- //errs() << "Failure\n";
- return false;
- }
-
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- Base = Addr.getOperand(0);
- if (isa<FrameIndexSDNode>(Base)) {
- //errs() << "Failure\n";
- return false;
- }
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
- //errs() << "Success\n";
- return true;
- }
-
- /*if (Addr.getNumOperands() == 1) {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
- errs() << "Success\n";
- return true;
- }*/
-
- //errs() << "SelectADDRri fails on: ";
- //Addr.getNode()->dumpr();
-
- if (isImm(Addr)) {
- //errs() << "Failure\n";
- return false;
- }
-
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
-
- //errs() << "Success\n";
- return true;
-
- /*if (Addr.getOpcode() != ISD::ADD) {
- // let SelectADDRii handle the [imm] case
- if (isImm(Addr))
- return false;
- // it is [reg]
-
- assert(Addr.getValueType().isSimple() && "Type must be simple");
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
-
- return true;
- }
-
- if (Addr.getNumOperands() < 2)
- return false;
-
- // let SelectADDRii handle the [imm+imm] case
- if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
- return false;
-
- // try [reg+imm] and [imm+reg]
- for (int i = 0; i < 2; i ++)
- if (SelectImm(Addr.getOperand(1-i), Offset)) {
- Base = Addr.getOperand(i);
- return true;
- }
-
- // neither [reg+imm] nor [imm+reg]
- return false;*/
-}
-
-// Match memory operand of the form [imm+imm] and [imm]
-bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
- SDValue &Offset) {
- // is [imm+imm]?
- if (Addr.getOpcode() == ISD::ADD) {
- return SelectImm(Addr.getOperand(0), Base) &&
- SelectImm(Addr.getOperand(1), Offset);
- }
-
- // is [imm]?
- if (SelectImm(Addr, Base)) {
- assert(Addr.getValueType().isSimple() && "Type must be simple");
-
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
-
- return true;
- }
-
- return false;
-}
-
-// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
-bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
- SDValue &Offset) {
- //errs() << "SelectADDRlocal: ";
- //Addr.getNode()->dumpr();
- if (isa<FrameIndexSDNode>(Addr)) {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
- //errs() << "Success\n";
- return true;
- }
-
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- Base = Addr.getOperand(0);
- if (!isa<FrameIndexSDNode>(Base)) {
- //errs() << "Failure\n";
- return false;
- }
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
- //errs() << "Offset: ";
- //Offset.getNode()->dumpr();
- //errs() << "Success\n";
- return true;
- }
-
- //errs() << "Failure\n";
- return false;
-}
-
-bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
- return ConstantSDNode::classof(operand.getNode());
-}
-
-bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
- SDNode *node = operand.getNode();
- if (!ConstantSDNode::classof(node))
- return false;
-
- ConstantSDNode *CN = cast<ConstantSDNode>(node);
- imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
- operand.getValueType());
- return true;
-}
-
-const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
-{
- return TM.getSubtarget<PTXSubtarget>();
-}
-
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
deleted file mode 100644
index ef4455b96bc3..000000000000
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ /dev/null
@@ -1,522 +0,0 @@
-//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PTXTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXISelLowering.h"
-#include "PTX.h"
-#include "PTXMachineFunctionInfo.h"
-#include "PTXRegisterInfo.h"
-#include "PTXSubtarget.h"
-#include "llvm/Function.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation
-//===----------------------------------------------------------------------===//
-
-PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
- // Set up the register classes.
- addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
- addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
- addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
- addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
- addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
- addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
-
- setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- setMinFunctionAlignment(2);
-
- // Let LLVM use loads/stores for all mem* operations
- maxStoresPerMemcpy = 4096;
- maxStoresPerMemmove = 4096;
- maxStoresPerMemset = 4096;
-
- ////////////////////////////////////
- /////////// Expansion //////////////
- ////////////////////////////////////
-
- // (any/zero/sign) extload => load + (any/zero/sign) extend
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
-
- // f32 extload => load + fextend
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
- // f64 truncstore => trunc + store
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // sign_extend_inreg => sign_extend
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- // br_cc => brcond
-
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
-
- // select_cc => setcc
-
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
-
- ////////////////////////////////////
- //////////// Legal /////////////////
- ////////////////////////////////////
-
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
- ////////////////////////////////////
- //////////// Custom ////////////////
- ////////////////////////////////////
-
- // customise setcc to use bitwise logic if possible
-
- //setOperationAction(ISD::SETCC, MVT::i1, Custom);
- setOperationAction(ISD::SETCC, MVT::i1, Legal);
-
- // customize translation of memory addresses
-
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-
- // Compute derived properties from the register classes
- computeRegisterProperties();
-}
-
-EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i1;
-}
-
-SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default:
- llvm_unreachable("Unimplemented operand");
- case ISD::SETCC:
- return LowerSETCC(Op, DAG);
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
- }
-}
-
-const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default:
- llvm_unreachable("Unknown opcode");
- case PTXISD::COPY_ADDRESS:
- return "PTXISD::COPY_ADDRESS";
- case PTXISD::LOAD_PARAM:
- return "PTXISD::LOAD_PARAM";
- case PTXISD::STORE_PARAM:
- return "PTXISD::STORE_PARAM";
- case PTXISD::READ_PARAM:
- return "PTXISD::READ_PARAM";
- case PTXISD::WRITE_PARAM:
- return "PTXISD::WRITE_PARAM";
- case PTXISD::EXIT:
- return "PTXISD::EXIT";
- case PTXISD::RET:
- return "PTXISD::RET";
- case PTXISD::CALL:
- return "PTXISD::CALL";
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Custom Lower Operation
-//===----------------------------------------------------------------------===//
-
-SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
- //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
- // Look for X == 0, X == 1, X != 0, or X != 1
- // We can simplify these to bitwise logic
-
- //if (Op1.getOpcode() == ISD::Constant &&
- // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- // cast<ConstantSDNode>(Op1)->isNullValue()) &&
- // (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- //
- // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
- //}
-
- //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1);
- //if(COp1 && COp1->getZExtValue() == 1) {
- // if(CC == ISD::SETNE) {
- // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0);
- // }
- //}
-
- llvm_unreachable("setcc was not matched by a pattern!");
-
- return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
-}
-
-SDValue PTXTargetLowering::
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-
- assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
-
- SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
- SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
- dl,
- PtrVT.getSimpleVT(),
- targetGlobal);
-
- return movInstr;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-SDValue PTXTargetLowering::
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- if (isVarArg) llvm_unreachable("PTX does not support varargs");
-
- MachineFunction &MF = DAG.getMachineFunction();
- const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
- PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
- PTXParamManager &PM = MFI->getParamManager();
-
- switch (CallConv) {
- default:
- llvm_unreachable("Unsupported calling convention");
- case CallingConv::PTX_Kernel:
- MFI->setKernel(true);
- break;
- case CallingConv::PTX_Device:
- MFI->setKernel(false);
- break;
- }
-
- // We do one of two things here:
- // IsKernel || SM >= 2.0 -> Use param space for arguments
- // SM < 2.0 -> Use registers for arguments
- if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
- // We just need to emit the proper LOAD_PARAM ISDs
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
- "Kernels cannot take pred operands");
-
- unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
- unsigned Param = PM.addArgumentParam(ParamSize);
- const std::string &ParamName = PM.getParamName(Param);
- SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
- MVT::Other);
- SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
- ParamValue);
- InVals.push_back(ArgValue);
- }
- }
- else {
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- EVT RegVT = Ins[i].VT;
- const TargetRegisterClass* TRC = getRegClassFor(RegVT);
- unsigned RegType;
-
- // Determine which register class we need
- if (RegVT == MVT::i1)
- RegType = PTXRegisterType::Pred;
- else if (RegVT == MVT::i16)
- RegType = PTXRegisterType::B16;
- else if (RegVT == MVT::i32)
- RegType = PTXRegisterType::B32;
- else if (RegVT == MVT::i64)
- RegType = PTXRegisterType::B64;
- else if (RegVT == MVT::f32)
- RegType = PTXRegisterType::F32;
- else if (RegVT == MVT::f64)
- RegType = PTXRegisterType::F64;
- else
- llvm_unreachable("Unknown parameter type");
-
- // Use a unique index in the instruction to prevent instruction folding.
- // Yes, this is a hack.
- SDValue Index = DAG.getTargetConstant(i, MVT::i32);
- unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
- Index);
-
- InVals.push_back(ArgValue);
-
- MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
- }
- }
-
- return Chain;
-}
-
-SDValue PTXTargetLowering::
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl,
- SelectionDAG &DAG) const {
- if (isVarArg) llvm_unreachable("PTX does not support varargs");
-
- switch (CallConv) {
- default:
- llvm_unreachable("Unsupported calling convention.");
- case CallingConv::PTX_Kernel:
- assert(Outs.size() == 0 && "Kernel must return void.");
- return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
- case CallingConv::PTX_Device:
- assert(Outs.size() <= 1 && "Can at most return one value.");
- break;
- }
-
- MachineFunction& MF = DAG.getMachineFunction();
- PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
- PTXParamManager &PM = MFI->getParamManager();
-
- SDValue Flag;
- const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
-
- if (ST.useParamSpaceForDeviceArgs()) {
- assert(Outs.size() < 2 && "Device functions can return at most one value");
-
- if (Outs.size() == 1) {
- unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
- unsigned Param = PM.addReturnParam(ParamSize);
- const std::string &ParamName = PM.getParamName(Param);
- SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
- MVT::Other);
- Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
- ParamValue, OutVals[0]);
- }
- } else {
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT RegVT = Outs[i].VT;
- const TargetRegisterClass* TRC;
- unsigned RegType;
-
- // Determine which register class we need
- if (RegVT == MVT::i1) {
- TRC = PTX::RegPredRegisterClass;
- RegType = PTXRegisterType::Pred;
- }
- else if (RegVT == MVT::i16) {
- TRC = PTX::RegI16RegisterClass;
- RegType = PTXRegisterType::B16;
- }
- else if (RegVT == MVT::i32) {
- TRC = PTX::RegI32RegisterClass;
- RegType = PTXRegisterType::B32;
- }
- else if (RegVT == MVT::i64) {
- TRC = PTX::RegI64RegisterClass;
- RegType = PTXRegisterType::B64;
- }
- else if (RegVT == MVT::f32) {
- TRC = PTX::RegF32RegisterClass;
- RegType = PTXRegisterType::F32;
- }
- else if (RegVT == MVT::f64) {
- TRC = PTX::RegF64RegisterClass;
- RegType = PTXRegisterType::F64;
- }
- else {
- llvm_unreachable("Unknown parameter type");
- }
-
- unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
-
- SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
- SDValue OutReg = DAG.getRegister(Reg, RegVT);
-
- Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
-
- MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
- }
- }
-
- if (Flag.getNode() == 0) {
- return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
- }
- else {
- return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
- }
-}
-
-SDValue
-PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
-
- MachineFunction& MF = DAG.getMachineFunction();
- PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>();
- PTXParamManager &PM = PTXMFI->getParamManager();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
- "Calls are not handled for the target device");
-
- // Identify the callee function
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- const Function *function = cast<Function>(GV);
-
- // allow non-device calls only for printf
- bool isPrintf = function->getName() == "printf" || function->getName() == "puts";
-
- assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) &&
- "PTX function calls must be to PTX device functions");
-
- unsigned outSize = isPrintf ? 2 : Outs.size();
-
- std::vector<SDValue> Ops;
- // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
- Ops.resize(outSize + Ins.size() + 4);
-
- Ops[0] = Chain;
-
- // Identify the callee function
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
- Ops[Ins.size()+2] = Callee;
-
- // #Outs
- Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32);
-
- if (isPrintf) {
- // first argument is the address of the global string variable in memory
- unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits());
- SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(),
- MVT::Other);
- Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
- ParamValue0, OutVals[0]);
- Ops[Ins.size()+4] = ParamValue0;
-
- // alignment is the maximum size of all the arguments
- unsigned alignment = 0;
- for (unsigned i = 1; i < OutVals.size(); ++i) {
- alignment = std::max(alignment,
- OutVals[i].getValueType().getSizeInBits());
- }
-
- // size is the alignment multiplied by the number of arguments
- unsigned size = alignment * (OutVals.size() - 1);
-
- // second argument is the address of the stack object (unless no arguments)
- unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits());
- SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(),
- MVT::Other);
- Ops[Ins.size()+5] = ParamValue1;
-
- if (size > 0)
- {
- // create a local stack object to store the arguments
- unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false);
- SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy());
-
- // store each of the arguments to the stack in turn
- for (unsigned int i = 1; i != OutVals.size(); i++) {
- SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy()));
- Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr,
- MachinePointerInfo(),
- false, false, 0);
- }
-
- // copy the address of the local frame index to get the address in non-local space
- SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex);
-
- // store this address in the second argument
- Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr);
- }
- }
- else
- {
- // Generate STORE_PARAM nodes for each function argument. In PTX, function
- // arguments are explicitly stored into .param variables and passed as
- // arguments. There is no register/stack-based calling convention in PTX.
- for (unsigned i = 0; i != OutVals.size(); ++i) {
- unsigned Size = OutVals[i].getValueType().getSizeInBits();
- unsigned Param = PM.addLocalParam(Size);
- const std::string &ParamName = PM.getParamName(Param);
- SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
- MVT::Other);
- Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
- ParamValue, OutVals[i]);
- Ops[i+Ins.size()+4] = ParamValue;
- }
- }
-
- std::vector<SDValue> InParams;
-
- // Generate list of .param variables to hold the return value(s).
- Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
- for (unsigned i = 0; i < Ins.size(); ++i) {
- unsigned Size = Ins[i].VT.getStoreSizeInBits();
- unsigned Param = PM.addLocalParam(Size);
- const std::string &ParamName = PM.getParamName(Param);
- SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
- MVT::Other);
- Ops[i+2] = ParamValue;
- InParams.push_back(ParamValue);
- }
-
- Ops[0] = Chain;
-
- // Create the CALL node.
- Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
-
- // Create the LOAD_PARAM nodes that retrieve the function return value(s).
- for (unsigned i = 0; i < Ins.size(); ++i) {
- SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
- InParams[i]);
- InVals.push_back(Load);
- }
-
- return Chain;
-}
-
-unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
- // All arguments consist of one "register," regardless of the type.
- return 1;
-}
-
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
deleted file mode 100644
index 33220f4dc346..000000000000
--- a/lib/Target/PTX/PTXISelLowering.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that PTX uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_ISEL_LOWERING_H
-#define PTX_ISEL_LOWERING_H
-
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-namespace PTXISD {
- enum NodeType {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- LOAD_PARAM,
- STORE_PARAM,
- READ_PARAM,
- WRITE_PARAM,
- EXIT,
- RET,
- COPY_ADDRESS,
- CALL
- };
-} // namespace PTXISD
-
-class PTXTargetLowering : public TargetLowering {
- public:
- explicit PTXTargetLowering(TargetMachine &TM);
-
- virtual const char *getTargetNodeName(unsigned Opcode) const;
-
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
- virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl,
- SelectionDAG &DAG) const;
-
- virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual EVT getSetCCResultType(EVT VT) const;
-
- virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
-
- private:
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-}; // class PTXTargetLowering
-} // namespace llvm
-
-#endif // PTX_ISEL_LOWERING_H
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
deleted file mode 100644
index 267e8341293a..000000000000
--- a/lib/Target/PTX/PTXInstrFormats.td
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-// Rounding Mode Specifier
-/*class RoundingMode<bits<3> val> {
- bits<3> Value = val;
-}
-
-def RndDefault : RoundingMode<0>;
-def RndNearestEven : RoundingMode<1>;
-def RndNearestZero : RoundingMode<2>;
-def RndNegInf : RoundingMode<3>;
-def RndPosInf : RoundingMode<4>;
-def RndApprox : RoundingMode<5>;*/
-
-
-// Rounding Mode Operand
-def RndMode : Operand<i32> {
- let PrintMethod = "printRoundingMode";
-}
-
-def RndDefault : PatLeaf<(i32 0)>;
-
-// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
-// Leave PrintMethod empty; predicate printing is defined elsewhere.
-def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
- (ops (i1 zero_reg), (i32 2))>;
-
-def RndModeOperand : Operand<OtherVT> {
- let MIOperandInfo = (ops i32imm);
-}
-
-// Instruction Types
-let Namespace = "PTX" in {
-
- class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
- : Instruction {
- dag OutOperandList = oops;
- dag InOperandList = !con(iops, (ins pred:$_p));
- let AsmString = asmstr; // Predicate printing is defined elsewhere.
- let Pattern = pattern;
- let isPredicable = 1;
- }
-}
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
deleted file mode 100644
index 443cd54906c2..000000000000
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ /dev/null
@@ -1,359 +0,0 @@
-//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PTX implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ptx-instrinfo"
-
-#include "PTXInstrInfo.h"
-#include "PTX.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define GET_INSTRINFO_CTOR
-#include "PTXGenInstrInfo.inc"
-
-using namespace llvm;
-
-PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
- : PTXGenInstrInfo(),
- RI(_TM, *this), TM(_TM) {}
-
-static const struct map_entry {
- const TargetRegisterClass *cls;
- const int opcode;
-} map[] = {
- { &PTX::RegI16RegClass, PTX::MOVU16rr },
- { &PTX::RegI32RegClass, PTX::MOVU32rr },
- { &PTX::RegI64RegClass, PTX::MOVU64rr },
- { &PTX::RegF32RegClass, PTX::MOVF32rr },
- { &PTX::RegF64RegClass, PTX::MOVF64rr },
- { &PTX::RegPredRegClass, PTX::MOVPREDrr }
-};
-
-void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg, unsigned SrcReg,
- bool KillSrc) const {
-
- const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
- //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
- // "Invalid register copy between two register classes");
-
- for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
- if (map[i].cls == MRI.getRegClass(DstReg)) {
- const MCInstrDesc &MCID = get(map[i].opcode);
- MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
- addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPredicate(MI);
- return;
- }
- }
-
- llvm_unreachable("Impossible reg-to-reg copy");
-}
-
-bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DstRC != SrcRC)
- return false;
-
- for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
- if (DstRC == map[i].cls) {
- const MCInstrDesc &MCID = get(map[i].opcode);
- MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
- AddDefaultPredicate(MI);
- return true;
- }
-
- return false;
-}
-
-bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case PTX::MOVU16rr:
- case PTX::MOVU32rr:
- case PTX::MOVU64rr:
- case PTX::MOVF32rr:
- case PTX::MOVF64rr:
- case PTX::MOVPREDrr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
- "Invalid register-register move instruction");
- SrcSubIdx = DstSubIdx = 0; // No sub-registers
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-}
-
-// predicate support
-
-bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
- int i = MI->findFirstPredOperandIdx();
- return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
-}
-
-bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- return !isPredicated(MI) && MI->isTerminator();
-}
-
-bool PTXInstrInfo::
-PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const {
- if (Pred.size() < 2)
- llvm_unreachable("lesser than 2 predicate operands are provided");
-
- int i = MI->findFirstPredOperandIdx();
- if (i == -1)
- llvm_unreachable("missing predicate operand");
-
- MI->getOperand(i).setReg(Pred[0].getReg());
- MI->getOperand(i+1).setImm(Pred[1].getImm());
-
- return true;
-}
-
-bool PTXInstrInfo::
-SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const {
- const MachineOperand &PredReg1 = Pred1[0];
- const MachineOperand &PredReg2 = Pred2[0];
- if (PredReg1.getReg() != PredReg2.getReg())
- return false;
-
- const MachineOperand &PredOp1 = Pred1[1];
- const MachineOperand &PredOp2 = Pred2[1];
- if (PredOp1.getImm() != PredOp2.getImm())
- return false;
-
- return true;
-}
-
-bool PTXInstrInfo::
-DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
- // If an instruction sets a predicate register, it defines a predicate.
-
- // TODO supprot 5-operand format of setp instruction
-
- if (MI->getNumOperands() < 1)
- return false;
-
- const MachineOperand &MO = MI->getOperand(0);
-
- if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
- return false;
-
- Pred.push_back(MO);
- Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
- return true;
-}
-
-// branch support
-
-bool PTXInstrInfo::
-AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // TODO implement cases when AllowModify is true
-
- if (MBB.empty())
- return true;
-
- MachineBasicBlock::iterator iter = MBB.end();
- const MachineInstr& instLast1 = *--iter;
- // for special case that MBB has only 1 instruction
- const bool IsSizeOne = MBB.size() == 1;
- // if IsSizeOne is true, *--iter and instLast2 are invalid
- // we put a dummy value in instLast2 and desc2 since they are used
- const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
-
- DEBUG(dbgs() << "\n");
- DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
- DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
- DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
- DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
-
- // this block ends with no branches
- if (!IsAnyKindOfBranch(instLast1)) {
- DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
- return false;
- }
-
- // this block ends with only an unconditional branch
- if (instLast1.isUnconditionalBranch() &&
- // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
- (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
- DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
- TBB = GetBranchTarget(instLast1);
- return false;
- }
-
- // this block ends with a conditional branch and
- // it falls through to a successor block
- if (instLast1.isConditionalBranch() &&
- IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
- DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
- TBB = GetBranchTarget(instLast1);
- int i = instLast1.findFirstPredOperandIdx();
- Cond.push_back(instLast1.getOperand(i));
- Cond.push_back(instLast1.getOperand(i+1));
- return false;
- }
-
- // when IsSizeOne is true, we are done
- if (IsSizeOne)
- return true;
-
- // this block ends with a conditional branch
- // followed by an unconditional branch
- if (instLast2.isConditionalBranch() &&
- instLast1.isUnconditionalBranch()) {
- DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
- TBB = GetBranchTarget(instLast2);
- FBB = GetBranchTarget(instLast1);
- int i = instLast2.findFirstPredOperandIdx();
- Cond.push_back(instLast2.getOperand(i));
- Cond.push_back(instLast2.getOperand(i+1));
- return false;
- }
-
- // branch cannot be understood
- DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
- return true;
-}
-
-unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- unsigned count = 0;
- while (!MBB.empty())
- if (IsAnyKindOfBranch(MBB.back())) {
- MBB.pop_back();
- ++count;
- } else
- break;
- DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
- DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
- return count;
-}
-
-unsigned PTXInstrInfo::
-InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
- DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
- << "\n";
- else dbgs() << "InsertBranch: TBB: (NULL)\n");
- DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
- << "\n";
- else dbgs() << "InsertBranch: FBB: (NULL)\n");
- DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
-
- assert(TBB && "TBB is NULL");
-
- if (FBB) {
- BuildMI(&MBB, DL, get(PTX::BRAdp))
- .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
- return 2;
- } else if (Cond.size()) {
- BuildMI(&MBB, DL, get(PTX::BRAdp))
- .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
- return 1;
- } else {
- BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
- return 1;
- }
-}
-
-// Memory operand folding for spills
-void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MII,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- llvm_unreachable("storeRegToStackSlot should not be called for PTX");
-}
-
-void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MII,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- llvm_unreachable("loadRegFromStackSlot should not be called for PTX");
-}
-
-// static helper routines
-
-MachineSDNode *PTXInstrInfo::
-GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
- DebugLoc dl, EVT VT, SDValue Op1) {
- SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
- SDValue ops[] = { Op1, predReg, predOp };
- return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
-}
-
-MachineSDNode *PTXInstrInfo::
-GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
- DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
- SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
- SDValue ops[] = { Op1, Op2, predReg, predOp };
- return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
-}
-
-void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
- if (MI->findFirstPredOperandIdx() == -1) {
- MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
- MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
- }
-}
-
-bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
- return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
-}
-
-bool PTXInstrInfo::
-IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
- for (MachineBasicBlock::const_succ_iterator
- i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
- if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
- return true;
- return false;
-}
-
-MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
- // FIXME So far all branch instructions put destination in 1st operand
- const MachineOperand& target = inst.getOperand(0);
- assert(target.isMBB() && "FIXME: detect branch target operand");
- return target.getMBB();
-}
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
deleted file mode 100644
index fba89c09394c..000000000000
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ /dev/null
@@ -1,133 +0,0 @@
-//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PTX implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_INSTR_INFO_H
-#define PTX_INSTR_INFO_H
-
-#include "PTXRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "PTXGenInstrInfo.inc"
-
-namespace llvm {
-class PTXTargetMachine;
-
-class MachineSDNode;
-class SDValue;
-class SelectionDAG;
-
-class PTXInstrInfo : public PTXGenInstrInfo {
-private:
- const PTXRegisterInfo RI;
- PTXTargetMachine &TM;
-
-public:
- explicit PTXInstrInfo(PTXTargetMachine &_TM);
-
- virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
-
- virtual bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
- // predicate support
-
- virtual bool isPredicated(const MachineInstr *MI) const;
-
- virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
-
- virtual
- bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const;
-
- virtual
- bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
-
- virtual bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
-
- // PTX is fully-predicable
- virtual bool isPredicable(MachineInstr *MI) const { return true; }
-
- // branch support
-
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify = false) const;
-
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
-
- // Memory operand folding for spills
- // TODO: Implement this eventually and get rid of storeRegToStackSlot and
- // loadRegFromStackSlot. Doing so will get rid of the "stack" registers
- // we currently use to spill, though I doubt the overall effect on ptxas
- // output will be large. I have yet to see a case where ptxas is unable
- // to see through the "stack" register usage and hence generates
- // efficient code anyway.
- // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- // MachineInstr* MI,
- // const SmallVectorImpl<unsigned> &Ops,
- // int FrameIndex) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
- MachineBasicBlock::iterator MII,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass* RC,
- const TargetRegisterInfo* TRI) const;
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MII,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- // static helper routines
-
- static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
- DebugLoc dl, EVT VT,
- SDValue Op1);
-
- static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
- DebugLoc dl, EVT VT,
- SDValue Op1, SDValue Op2);
-
- static void AddDefaultPredicate(MachineInstr *MI);
-
- static bool IsAnyKindOfBranch(const MachineInstr& inst);
-
- static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
-
- static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
-}; // class PTXInstrInfo
-} // namespace llvm
-
-#endif // PTX_INSTR_INFO_H
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
deleted file mode 100644
index bead4286dfd8..000000000000
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ /dev/null
@@ -1,1031 +0,0 @@
-//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the PTX instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "PTXInstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-// Code Generation Predicates
-//===----------------------------------------------------------------------===//
-
-// Shader Model Support
-def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
-def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
-def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
-def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
-
-// PTX Version Support
-def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
-def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
-def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
-def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
-def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
-def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
-
-// Fused-Multiply Add
-def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
-def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
-
-
-
-// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
-// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
-// [SDNPHasChain, SDNPOutGlue]>;
-// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
-// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
- [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
-
-
-// Branch & call targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
-def calltarget : Operand<i32>;
-
-//===----------------------------------------------------------------------===//
-// PTX Specific Node Definitions
-//===----------------------------------------------------------------------===//
-
-// PTX allow generic 3-reg shifts like shl r0, r1, r2
-def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
-def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
-def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
-
-def PTXexit
- : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
-def PTXret
- : SDNode<"PTXISD::RET", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def PTXcopyaddress
- : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
-
-
-
-//===----------------------------------------------------------------------===//
-// Instruction Class Templates
-//===----------------------------------------------------------------------===//
-
-// For floating-point instructions, we cannot just embed the pattern into the
-// instruction definition since we need to muck around with the rounding mode,
-// and I do not know how to insert constants into instructions directly from
-// pattern matches.
-
-//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
-multiclass PTX_FLOAT_2OP<string opcstr> {
- def rr32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, RegF32:$a),
- !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
- def ri32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, f32imm:$a),
- !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
- def rr64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, RegF64:$a),
- !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
- def ri64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, f64imm:$a),
- !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
-}
-
-//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
-multiclass PTX_FLOAT_3OP<string opcstr> {
- def rr32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, RegF32:$a, RegF32:$b),
- !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
- def ri32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, RegF32:$a, f32imm:$b),
- !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
- def rr64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, RegF64:$a, RegF64:$b),
- !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
- def ri64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, RegF64:$a, f64imm:$b),
- !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
-}
-
-//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
-multiclass PTX_FLOAT_4OP<string opcstr> {
- def rrr32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c),
- !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
- def rri32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c),
- !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
- def rii32 : InstPTX<(outs RegF32:$d),
- (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c),
- !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
- def rrr64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c),
- !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
- def rri64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c),
- !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
- def rii64 : InstPTX<(outs RegF64:$d),
- (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c),
- !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
-}
-
-//===- Integer Instructions - 3 Operand Form ------------------------------===//
-multiclass PTX_INT3<string opcstr, SDNode opnode> {
- def rr16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, RegI16:$b),
- !strconcat(opcstr, ".u16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
- def ri16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, i16imm:$b),
- !strconcat(opcstr, ".u16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
- def rr32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, RegI32:$b),
- !strconcat(opcstr, ".u32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
- def ri32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, i32imm:$b),
- !strconcat(opcstr, ".u32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
- def rr64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, RegI64:$b),
- !strconcat(opcstr, ".u64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
- def ri64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, i64imm:$b),
- !strconcat(opcstr, ".u64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
-}
-
-//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===//
-multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> {
- def rr16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, RegI16:$b),
- !strconcat(opcstr, ".s16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
- def ri16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, i16imm:$b),
- !strconcat(opcstr, ".s16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
- def rr32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, RegI32:$b),
- !strconcat(opcstr, ".s32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
- def ri32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, i32imm:$b),
- !strconcat(opcstr, ".s32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
- def rr64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, RegI64:$b),
- !strconcat(opcstr, ".s64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
- def ri64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, i64imm:$b),
- !strconcat(opcstr, ".s64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
-}
-
-//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===//
-multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
- def ripreds : InstPTX<(outs RegPred:$d),
- (ins RegPred:$a, i1imm:$b),
- !strconcat(opcstr, ".pred\t$d, $a, $b"),
- [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
- def rrpreds : InstPTX<(outs RegPred:$d),
- (ins RegPred:$a, RegPred:$b),
- !strconcat(opcstr, ".pred\t$d, $a, $b"),
- [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
- def rr16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, RegI16:$b),
- !strconcat(opcstr, ".b16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
- def ri16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, i16imm:$b),
- !strconcat(opcstr, ".b16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
- def rr32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, RegI32:$b),
- !strconcat(opcstr, ".b32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
- def ri32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, i32imm:$b),
- !strconcat(opcstr, ".b32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
- def rr64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, RegI64:$b),
- !strconcat(opcstr, ".b64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
- def ri64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, i64imm:$b),
- !strconcat(opcstr, ".b64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
-}
-
-//===- Integer Shift Instructions - 3 Operand Form ------------------------===//
-multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> {
- def rr16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, RegI16:$b),
- !strconcat(opcstr, "16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
- def rr32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, RegI32:$b),
- !strconcat(opcstr, "32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
- def rr64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, RegI64:$b),
- !strconcat(opcstr, "64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
- def ri16 : InstPTX<(outs RegI16:$d),
- (ins RegI16:$a, i16imm:$b),
- !strconcat(opcstr, "16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
- def ri32 : InstPTX<(outs RegI32:$d),
- (ins RegI32:$a, i32imm:$b),
- !strconcat(opcstr, "32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
- def ri64 : InstPTX<(outs RegI64:$d),
- (ins RegI64:$a, i64imm:$b),
- !strconcat(opcstr, "64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
- def ir16 : InstPTX<(outs RegI16:$d),
- (ins i16imm:$a, RegI16:$b),
- !strconcat(opcstr, "16\t$d, $a, $b"),
- [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
- def ir32 : InstPTX<(outs RegI32:$d),
- (ins i32imm:$a, RegI32:$b),
- !strconcat(opcstr, "32\t$d, $a, $b"),
- [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
- def ir64 : InstPTX<(outs RegI64:$d),
- (ins i64imm:$a, RegI64:$b),
- !strconcat(opcstr, "64\t$d, $a, $b"),
- [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
-}
-
-//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===//
-multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
- CondCode cmp, string cmpstr> {
- // TODO support 5-operand format: p|q, a, b, c
-
- def rr
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
- !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
- [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
- def ri
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
- !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
- [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
-
- def rr_and_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
- def ri_and_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
- RegPred:$c))]>;
- def rr_or_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
- def ri_or_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
- def rr_xor_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
- def ri_xor_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
- RegPred:$c))]>;
-
- def rr_and_not_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp),
- (not RegPred:$c)))]>;
- def ri_and_not_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
- (not RegPred:$c)))]>;
- def rr_or_not_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp),
- (not RegPred:$c)))]>;
- def ri_or_not_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp),
- (not RegPred:$c)))]>;
- def rr_xor_not_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp),
- (not RegPred:$c)))]>;
- def ri_xor_not_r
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
- (not RegPred:$c)))]>;
-}
-
-//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===//
-multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls,
- CondCode ucmp, CondCode ocmp, string cmpstr> {
- // TODO support 5-operand format: p|q, a, b, c
-
- def rr_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
- !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
- [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
- def rr_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
- !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
- [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
-
- def ri_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
- !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
- [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>;
- def ri_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
- !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
- [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>;
-
- def rr_and_r_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
- RegPred:$c))]>;
- def rr_and_r_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
- RegPred:$c))]>;
-
- def rr_or_r_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
- def rr_or_r_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
-
- def rr_xor_r_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
- RegPred:$c))]>;
- def rr_xor_r_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname,
- "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
- RegPred:$c))]>;
-
- def rr_and_not_r_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
- (not RegPred:$c)))]>;
- def rr_and_not_r_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
- (not RegPred:$c)))]>;
-
- def rr_or_not_r_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp),
- (not RegPred:$c)))]>;
- def rr_or_not_r_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp),
- (not RegPred:$c)))]>;
-
- def rr_xor_not_r_u
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
- (not RegPred:$c)))]>;
- def rr_xor_not_r_o
- : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname,
- "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
- (not RegPred:$c)))]>;
-}
-
-//===- Select Predicate Instructions - 4 Operand Form ---------------------===//
-multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls,
- SDNode immnode> {
- def rr
- : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
- !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
- [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
- def ri
- : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c),
- !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
- [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>;
- def ii
- : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c),
- !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
- [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>;
-}
-
-
-
-//===----------------------------------------------------------------------===//
-// Instructions
-//===----------------------------------------------------------------------===//
-
-///===- Integer Arithmetic Instructions -----------------------------------===//
-
-defm ADD : PTX_INT3<"add", add>;
-defm SUB : PTX_INT3<"sub", sub>;
-defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
-defm DIV : PTX_INT3<"div", udiv>;
-defm SDIV : PTX_INT3_SIGNED<"div", sdiv>;
-defm REM : PTX_INT3<"rem", urem>;
-
-///===- Floating-Point Arithmetic Instructions ----------------------------===//
-
-// FNEG
-defm FNEG : PTX_FLOAT_2OP<"neg">;
-
-// Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add">;
-defm FSUB : PTX_FLOAT_3OP<"sub">;
-defm FMUL : PTX_FLOAT_3OP<"mul">;
-defm FDIV : PTX_FLOAT_3OP<"div">;
-
-// Multi-operation hybrid instructions
-defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>;
-
-
-///===- Floating-Point Intrinsic Instructions -----------------------------===//
-
-// SQRT
-def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
- "sqrt$r.f32\t$d, $a", []>;
-def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
- "sqrt$r.f32\t$d, $a", []>;
-def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
- "sqrt$r.f64\t$d, $a", []>;
-def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
- "sqrt$r.f64\t$d, $a", []>;
-
-// SIN
-def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
- "sin$r.f32\t$d, $a", []>;
-def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
- "sin$r.f32\t$d, $a", []>;
-def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
- "sin$r.f64\t$d, $a", []>;
-def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
- "sin$r.f64\t$d, $a", []>;
-
-// COS
-def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
- "cos$r.f32\t$d, $a", []>;
-def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
- "cos$r.f32\t$d, $a", []>;
-def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
- "cos$r.f64\t$d, $a", []>;
-def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
- "cos$r.f64\t$d, $a", []>;
-
-
-
-
-///===- Comparison and Selection Instructions -----------------------------===//
-
-// .setp
-
-// Compare u16
-
-defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">;
-defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">;
-defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
-defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
-defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
-defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
-defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">;
-defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">;
-defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">;
-defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">;
-
-// Compare u32
-
-defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">;
-defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">;
-defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
-defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
-defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
-defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
-defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">;
-defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">;
-defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">;
-defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">;
-
-// Compare u64
-
-defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">;
-defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">;
-defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
-defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
-defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
-defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
-defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">;
-defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">;
-defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">;
-defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
-
-// Compare f32
-
-defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">;
-defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">;
-defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">;
-defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">;
-defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">;
-defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">;
-
-// Compare f64
-
-defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">;
-defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">;
-defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">;
-defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">;
-defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">;
-defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">;
-
-// .selp
-
-defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>;
-defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>;
-defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>;
-defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>;
-defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>;
-
-///===- Logic and Shift Instructions --------------------------------------===//
-
-defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>;
-defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>;
-defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>;
-
-defm AND : PTX_LOGIC<"and", and>;
-defm OR : PTX_LOGIC<"or", or>;
-defm XOR : PTX_LOGIC<"xor", xor>;
-
-///===- Data Movement and Conversion Instructions -------------------------===//
-
-// any_extend
-// Implement the anyext instruction in terms of the PTX cvt instructions.
-//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>;
-//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>;
-//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>;
-
-// bitconvert
-// These instructions implement the bit-wise conversion between integer and
-// floating-point types.
-def MOVi32f32
- : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>;
-def MOVf32i32
- : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>;
-def MOVi64f64
- : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>;
-def MOVf64i64
- : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>;
-
-let neverHasSideEffects = 1 in {
- def MOVPREDrr
- : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
- def MOVU16rr
- : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
- def MOVU32rr
- : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
- def MOVU64rr
- : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
- def MOVF32rr
- : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
- def MOVF64rr
- : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
-}
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def MOVPREDri
- : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
- [(set RegPred:$d, imm:$a)]>;
- def MOVU16ri
- : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
- [(set RegI16:$d, imm:$a)]>;
- def MOVU32ri
- : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
- [(set RegI32:$d, imm:$a)]>;
- def MOVU64ri
- : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
- [(set RegI64:$d, imm:$a)]>;
- def MOVF32ri
- : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
- [(set RegF32:$d, fpimm:$a)]>;
- def MOVF64ri
- : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
- [(set RegF64:$d, fpimm:$a)]>;
-}
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def MOVaddr32
- : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
- [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
- def MOVaddr64
- : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
- [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
- def MOVframe32
- : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a",
- [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>;
- def MOVframe64
- : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a",
- [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>;
-}
-
-// PTX cvt instructions
-// Note all of these may actually be used, we just define all possible patterns
-// here (that make sense).
-// FIXME: Can we collapse this somehow into a multiclass def?
-
-// To i16
-def CVTu16u32
- : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>;
-def CVTu16u64
- : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
-def CVTu16f32
- : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
- "cvt$r.u16.f32\t$d, $a", []>;
-def CVTs16f32
- : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
- "cvt$r.s16.f32\t$d, $a", []>;
-def CVTu16f64
- : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.u16.f64\t$d, $a", []>;
-def CVTs16f64
- : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.s16.f64\t$d, $a", []>;
-
-// To i32
-def CVTu32u16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>;
-def CVTs32s16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>;
-def CVTu32u64
- : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
-def CVTu32f32
- : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
- "cvt$r.u32.f32\t$d, $a", []>;
-def CVTs32f32
- : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
- "cvt$r.s32.f32\t$d, $a", []>;
-def CVTu32f64
- : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.u32.f64\t$d, $a", []>;
-def CVTs32f64
- : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.s32.f64\t$d, $a", []>;
-
-// To i64
-def CVTu64u16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>;
-def CVTs64s16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>;
-def CVTu64u32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>;
-def CVTs64s32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
-def CVTu64f32
- : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
- "cvt$r.u64.f32\t$d, $a", []>;
-def CVTs64f32
- : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
- "cvt$r.s64.f32\t$d, $a", []>;
-def CVTu64f64
- : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.u64.f64\t$d, $a", []>;
-def CVTs64f64
- : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.s64.f64\t$d, $a", []>;
-
-// To f32
-def CVTf32u16
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
- "cvt$r.f32.u16\t$d, $a", []>;
-def CVTf32s16
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
- "cvt$r.f32.s16\t$d, $a", []>;
-def CVTf32u32
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
- "cvt$r.f32.u32\t$d, $a", []>;
-def CVTf32s32
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
- "cvt$r.f32.s32\t$d, $a", []>;
-def CVTf32u64
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
- "cvt$r.f32.u64\t$d, $a", []>;
-def CVTf32s64
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
- "cvt$r.f32.s64\t$d, $a", []>;
-def CVTf32f64
- : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
- "cvt$r.f32.f64\t$d, $a", []>;
-
-// To f64
-def CVTf64u16
- : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
- "cvt$r.f64.u16\t$d, $a", []>;
-def CVTf64s16
- : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
- "cvt$r.f64.s16\t$d, $a", []>;
-def CVTf64u32
- : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
- "cvt$r.f64.u32\t$d, $a", []>;
-def CVTf64s32
- : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
- "cvt$r.f64.s32\t$d, $a", []>;
-def CVTf64u64
- : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
- "cvt$r.f64.u64\t$d, $a", []>;
-def CVTf64s64
- : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
- "cvt$r.f64.s64\t$d, $a", []>;
-def CVTf64f32
- : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
-
- ///===- Control Flow Instructions -----------------------------------------===//
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
- def BRAd
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
-}
-
-let isBranch = 1, isTerminator = 1 in {
- // FIXME: The pattern part is blank because I cannot (or do not yet know
- // how to) use the first operand of PredicateOperand (a RegPred register) here
- // When this is revisited, make sure to also look at LowerSETCC and try to
- // fold it into negated predicates, if possible.
- def BRAdp
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
- [/*(brcond pred:$_p, bb:$d)*/]>;
-}
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
- def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
-}
-
-let hasSideEffects = 1 in {
- def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
-}
-
-///===- Parameter Passing Pseudo-Instructions -----------------------------===//
-
-def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
- "mov.pred\t$a, %arg$b", []>;
-def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
- "mov.b16\t$a, %arg$b", []>;
-def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
- "mov.b32\t$a, %arg$b", []>;
-def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
- "mov.b64\t$a, %arg$b", []>;
-def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
- "mov.f32\t$a, %arg$b", []>;
-def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
- "mov.f64\t$a, %arg$b", []>;
-
-def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
-def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
-def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
-def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
-def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
-def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
-
-
-//===----------------------------------------------------------------------===//
-// Instruction Selection Patterns
-//===----------------------------------------------------------------------===//
-
-// FADD
-def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)),
- (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>;
-def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)),
- (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>;
-def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)),
- (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>;
-def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)),
- (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>;
-
-// FSUB
-def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)),
- (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>;
-def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)),
- (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>;
-def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)),
- (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>;
-def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)),
- (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>;
-
-// FMUL
-def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)),
- (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>;
-def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)),
- (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>;
-def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)),
- (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>;
-def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)),
- (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>;
-
-// FDIV
-def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)),
- (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>;
-def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)),
- (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>;
-def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)),
- (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>;
-def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
- (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>;
-
-// FMUL+FADD
-def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
- (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>,
- Requires<[SupportsFMA]>;
-def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
- (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
- Requires<[SupportsFMA]>;
-def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
- (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>,
- Requires<[SupportsFMA]>;
-def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
- (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
- Requires<[SupportsFMA]>;
-def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
- (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>,
- Requires<[SupportsFMA]>;
-def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
- (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>,
- Requires<[SupportsFMA]>;
-def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
- (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>,
- Requires<[SupportsFMA]>;
-
-// FNEG
-def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
-def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>;
-def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>;
-def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>;
-
-// FSQRT
-def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>;
-def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>;
-def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>;
-def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>;
-
-// FSIN
-def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>;
-def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>;
-def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>;
-def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>;
-
-// FCOS
-def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>;
-def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>;
-def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>;
-def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>;
-
-// Type conversion notes:
-// - PTX does not directly support converting a predicate to a value, so we
-// use a select instruction to select either 0 or 1 (integer or fp) based
-// on the truth value of the predicate.
-// - PTX does not directly support converting to a predicate type, so we fake it
-// by performing a greater-than test between the value and zero. This follows
-// the C convention that any non-zero value is equivalent to 'true'.
-
-// Conversion to pred
-def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>;
-def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>;
-def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>;
-def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>;
-def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>;
-
-// Conversion to u16
-def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
-def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
-def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
-def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
-def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
-def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
-def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
-def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
-def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
-
-// Conversion to u32
-def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
-def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>;
-def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
-def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
-def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
-def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
-def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
-def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
-def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
-def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
-def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
-def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
-
-// Conversion to u64
-def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
-def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a,
- 0xFFFFFFFFFFFFFFFF, 0)>;
-def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
-def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
-def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>;
-def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
-def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
-def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
-def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
-def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
-def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
-def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
-def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
-def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
-
-// Conversion to f32
-def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
- (MOVf32i32 0x3F800000), (MOVf32i32 0))>;
-def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
-def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
-def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
-def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
-def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
-def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
-def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
-def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
-
-// Conversion to f64
-def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
- (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
-def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
-def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
-def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
-def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
-def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
-def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
-def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
-def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
-
-// setcc - predicate inversion for branch conditions
-def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)),
- (XORripreds RegPred:$a, imm:$b)>;
-
-///===- Intrinsic Instructions --------------------------------------------===//
-include "PTXIntrinsicInstrInfo.td"
-
-///===- Load/Store Instructions -------------------------------------------===//
-include "PTXInstrLoadStore.td"
-
diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td
deleted file mode 100644
index 7a62684b91b9..000000000000
--- a/lib/Target/PTX/PTXInstrLoadStore.td
+++ /dev/null
@@ -1,278 +0,0 @@
-//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the PTX load/store instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-
-// Addressing Predicates
-// We have to differentiate between 32- and 64-bit pointer types
-def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
-def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
-
-//===----------------------------------------------------------------------===//
-// Pattern Fragments for Loads/Stores
-//===----------------------------------------------------------------------===//
-
-def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTXStateSpace::Global;
- return false;
-}]>;
-
-def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTXStateSpace::Constant;
- return false;
-}]>;
-
-def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTXStateSpace::Shared;
- return false;
-}]>;
-
-def store_global
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTXStateSpace::Global;
- return false;
-}]>;
-
-def store_shared
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTXStateSpace::Shared;
- return false;
-}]>;
-
-// Addressing modes.
-def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
-def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
-def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
-def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
-def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
-def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
-def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
-
-// Address operands
-def MEMri32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI32, i32imm);
-}
-def MEMri64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI64, i64imm);
-}
-def LOCALri32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i32imm, i32imm);
-}
-def LOCALri64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i64imm, i64imm);
-}
-def MEMii32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i32imm, i32imm);
-}
-def MEMii64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i64imm, i64imm);
-}
-// The operand here does not correspond to an actual address, so we
-// can use i32 in 64-bit address modes.
-def MEMpi : Operand<i32> {
- let PrintMethod = "printParamOperand";
- let MIOperandInfo = (ops i32imm);
-}
-def MEMret : Operand<i32> {
- let PrintMethod = "printReturnOperand";
- let MIOperandInfo = (ops i32imm);
-}
-
-
-// Load/store .param space
-def PTXloadparam
- : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-def PTXstoreparam
- : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-
-def PTXreadparam
- : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-def PTXwriteparam
- : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-
-
-
-//===----------------------------------------------------------------------===//
-// Classes for loads/stores
-//===----------------------------------------------------------------------===//
-multiclass PTX_LD<string opstr, string typestr,
- RegisterClass RC, PatFrag pat_load> {
- def rr32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr32:$a))]>,
- Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr64:$a))]>,
- Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri32:$a))]>,
- Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri64:$a))]>,
- Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs RC:$d),
- (ins MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii32:$a))]>,
- Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs RC:$d),
- (ins MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii64:$a))]>,
- Requires<[Use64BitAddresses]>;
-}
-
-multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
- PatFrag pat_store> {
- def rr32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr32:$a)]>,
- Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr64:$a)]>,
- Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri32:$a)]>,
- Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri64:$a)]>,
- Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs),
- (ins RC:$d, MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii32:$a)]>,
- Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs),
- (ins RC:$d, MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii64:$a)]>,
- Requires<[Use64BitAddresses]>;
-}
-
-multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
- def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
- !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (load_global ADDRlocal32:$a))]>;
- def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
- !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (load_global ADDRlocal64:$a))]>;
- def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
- !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
- [(store_global RC:$d, ADDRlocal32:$a)]>;
- def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
- !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
- [(store_global RC:$d, ADDRlocal64:$a)]>;
-}
-
-multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
- let hasSideEffects = 1 in {
- def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
- !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (PTXloadparam texternalsym:$a))]>;
- def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
- !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
- [(PTXstoreparam texternalsym:$d, RC:$a)]>;
- }
-}
-
-multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
- defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
- defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
- defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
- defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
- defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
-}
-
-multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
- defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
- defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
- defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
- defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
- defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
-}
-
-
-
-//===----------------------------------------------------------------------===//
-// Instruction definitions for loads/stores
-//===----------------------------------------------------------------------===//
-
-// Global/shared stores
-defm STg : PTX_ST_ALL<"st.global", store_global>;
-defm STs : PTX_ST_ALL<"st.shared", store_shared>;
-
-// Global/shared/constant loads
-defm LDg : PTX_LD_ALL<"ld.global", load_global>;
-defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
-defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
-
-// Param loads/stores
-defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
-defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
-defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
-defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
-defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
-defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
-
-// Local loads/stores
-defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
-defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
-defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
-defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
-defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
-defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
-
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
deleted file mode 100644
index 3416f1cca96d..000000000000
--- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ /dev/null
@@ -1,110 +0,0 @@
-//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the PTX-specific intrinsic instructions.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Special Purpose Register Accessor Intrinsics
-
-class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
- : InstPTX<(outs RegI64:$d), (ins),
- !strconcat("mov.u64\t$d, %", regname),
- [(set RegI64:$d, (intop))]>;
-
-class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
- : InstPTX<(outs RegI32:$d), (ins),
- !strconcat("mov.u32\t$d, %", regname),
- [(set RegI32:$d, (intop))]>;
-
-// TODO Add read vector-version of special registers
-
-//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
-// int_ptx_read_tid_r64>;
-def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
- int_ptx_read_tid_x>;
-def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
- int_ptx_read_tid_y>;
-def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
- int_ptx_read_tid_z>;
-def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
- int_ptx_read_tid_w>;
-
-//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
-// int_ptx_read_ntid_r64>;
-def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
- int_ptx_read_ntid_x>;
-def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
- int_ptx_read_ntid_y>;
-def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
- int_ptx_read_ntid_z>;
-def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
- int_ptx_read_ntid_w>;
-
-def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
- int_ptx_read_laneid>;
-def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
- int_ptx_read_warpid>;
-def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
- int_ptx_read_nwarpid>;
-
-//def PTX_READ_CTAID_R64 :
-//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
-def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
- int_ptx_read_ctaid_x>;
-def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
- int_ptx_read_ctaid_y>;
-def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
- int_ptx_read_ctaid_z>;
-def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
- int_ptx_read_ctaid_w>;
-
-//def PTX_READ_NCTAID_R64 :
-//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
-def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
- int_ptx_read_nctaid_x>;
-def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
- int_ptx_read_nctaid_y>;
-def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
- int_ptx_read_nctaid_z>;
-def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
- int_ptx_read_nctaid_w>;
-
-def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
- int_ptx_read_smid>;
-def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
- int_ptx_read_nsmid>;
-def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
- int_ptx_read_gridid>;
-
-def PTX_READ_LANEMASK_EQ
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
-def PTX_READ_LANEMASK_LE
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
-def PTX_READ_LANEMASK_LT
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
-def PTX_READ_LANEMASK_GE
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
-def PTX_READ_LANEMASK_GT
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
-
-def PTX_READ_CLOCK
- : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
-def PTX_READ_CLOCK64
- : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
-
-def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
-def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
-def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
-def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
-
-// PTX Parallel Synchronization and Communication Intrinsics
-
-def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
- [(int_ptx_bar_sync imm:$i)]>;
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
deleted file mode 100644
index 3ed67a6a9b47..000000000000
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ /dev/null
@@ -1,556 +0,0 @@
-//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/PathV2.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-class PTXMCAsmStreamer : public MCStreamer {
- formatted_raw_ostream &OS;
- const MCAsmInfo &MAI;
- OwningPtr<MCInstPrinter> InstPrinter;
- OwningPtr<MCCodeEmitter> Emitter;
-
- SmallString<128> CommentToEmit;
- raw_svector_ostream CommentStream;
-
- unsigned IsVerboseAsm : 1;
- unsigned ShowInst : 1;
-
-public:
- PTXMCAsmStreamer(MCContext &Context,
- formatted_raw_ostream &os,
- bool isVerboseAsm, bool useLoc,
- MCInstPrinter *printer,
- MCCodeEmitter *emitter,
- bool showInst)
- : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
- InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
- IsVerboseAsm(isVerboseAsm),
- ShowInst(showInst) {
- if (InstPrinter && IsVerboseAsm)
- InstPrinter->setCommentStream(CommentStream);
- }
-
- ~PTXMCAsmStreamer() {}
-
- inline void EmitEOL() {
- // If we don't have any comments, just emit a \n.
- if (!IsVerboseAsm) {
- OS << '\n';
- return;
- }
- EmitCommentsAndEOL();
- }
- void EmitCommentsAndEOL();
-
- /// isVerboseAsm - Return true if this streamer supports verbose assembly at
- /// all.
- virtual bool isVerboseAsm() const { return IsVerboseAsm; }
-
- /// hasRawTextSupport - We support EmitRawText.
- virtual bool hasRawTextSupport() const { return true; }
-
- /// AddComment - Add a comment that can be emitted to the generated .s
- /// file if applicable as a QoI issue to make the output of the compiler
- /// more readable. This only affects the MCAsmStreamer, and only when
- /// verbose assembly output is enabled.
- virtual void AddComment(const Twine &T);
-
- /// AddEncodingComment - Add a comment showing the encoding of an instruction.
- virtual void AddEncodingComment(const MCInst &Inst);
-
- /// GetCommentOS - Return a raw_ostream that comments can be written to.
- /// Unlike AddComment, you are required to terminate comments with \n if you
- /// use this method.
- virtual raw_ostream &GetCommentOS() {
- if (!IsVerboseAsm)
- return nulls(); // Discard comments unless in verbose asm mode.
- return CommentStream;
- }
-
- /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
- virtual void AddBlankLine() {
- EmitEOL();
- }
-
- /// @name MCStreamer Interface
- /// @{
-
- virtual void ChangeSection(const MCSection *Section);
- virtual void InitSections() { /* PTX does not use sections */ }
-
- virtual void EmitLabel(MCSymbol *Symbol);
-
- virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-
- virtual void EmitThumbFunc(MCSymbol *Func);
-
- virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-
- virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-
- virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
- const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize);
-
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-
- virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
- virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
- virtual void EmitCOFFSymbolStorageClass(int StorageClass);
- virtual void EmitCOFFSymbolType(int Type);
- virtual void EndCOFFSymbolDef();
- virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
- virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment);
-
- /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
- ///
- /// @param Symbol - The common symbol to emit.
- /// @param Size - The size of the common symbol.
- /// @param ByteAlignment - The alignment of the common symbol in bytes.
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment);
-
- virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
- unsigned Size = 0, unsigned ByteAlignment = 0);
-
- virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment = 0);
-
- virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-
- virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace);
- virtual void EmitULEB128Value(const MCExpr *Value);
- virtual void EmitSLEB128Value(const MCExpr *Value);
- virtual void EmitGPRel32Value(const MCExpr *Value);
-
-
- virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
- unsigned AddrSpace);
-
- virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
- unsigned ValueSize = 1,
- unsigned MaxBytesToEmit = 0);
-
- virtual void EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit = 0);
-
- virtual bool EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0);
-
- virtual void EmitFileDirective(StringRef Filename);
- virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
- StringRef Filename);
-
- virtual void EmitInstruction(const MCInst &Inst);
-
- /// EmitRawText - If this file is backed by an assembly streamer, this dumps
- /// the specified string in the output .s file. This capability is
- /// indicated by the hasRawTextSupport() predicate.
- virtual void EmitRawText(StringRef String);
-
- virtual void FinishImpl();
-
- /// @}
-
-}; // class PTXMCAsmStreamer
-
-}
-
-/// TODO: Add appropriate implementation of Emit*() methods when needed
-
-void PTXMCAsmStreamer::AddComment(const Twine &T) {
- if (!IsVerboseAsm) return;
-
- // Make sure that CommentStream is flushed.
- CommentStream.flush();
-
- T.toVector(CommentToEmit);
- // Each comment goes on its own line.
- CommentToEmit.push_back('\n');
-
- // Tell the comment stream that the vector changed underneath it.
- CommentStream.resync();
-}
-
-void PTXMCAsmStreamer::EmitCommentsAndEOL() {
- if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
- OS << '\n';
- return;
- }
-
- CommentStream.flush();
- StringRef Comments = CommentToEmit.str();
-
- assert(Comments.back() == '\n' &&
- "Comment array not newline terminated");
- do {
- // Emit a line of comments.
- OS.PadToColumn(MAI.getCommentColumn());
- size_t Position = Comments.find('\n');
- OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
-
- Comments = Comments.substr(Position+1);
- } while (!Comments.empty());
-
- CommentToEmit.clear();
- // Tell the comment stream that the vector changed underneath it.
- CommentStream.resync();
-}
-
-static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
- assert(Bytes && "Invalid size!");
- return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
-}
-
-void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
- assert(Section && "Cannot switch to a null section!");
-}
-
-void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(getCurrentSection() && "Cannot emit before setting section!");
-
- OS << *Symbol << MAI.getLabelSuffix();
- EmitEOL();
- Symbol->setSection(*getCurrentSection());
-}
-
-void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
-
-void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
-
-void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- OS << *Symbol << " = " << *Value;
- EmitEOL();
-
- // FIXME: Lift context changes into super class.
- Symbol->setVariableValue(Value);
-}
-
-void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
- const MCSymbol *Symbol) {
- OS << ".weakref " << *Alias << ", " << *Symbol;
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
- const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize) {
- report_fatal_error("Unimplemented.");
-}
-
-void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
- MCSymbolAttr Attribute) {}
-
-void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
-
-void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
-
-void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
-
-void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
-
-void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
-
-void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
-
-void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {}
-
-void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {}
-
-void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- unsigned Size, unsigned ByteAlignment) {}
-
-void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
- MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment) {}
-
-static inline char toOctal(int X) { return (X&7)+'0'; }
-
-static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
- OS << '"';
-
- for (unsigned i = 0, e = Data.size(); i != e; ++i) {
- unsigned char C = Data[i];
- if (C == '"' || C == '\\') {
- OS << '\\' << (char)C;
- continue;
- }
-
- if (isprint((unsigned char)C)) {
- OS << (char)C;
- continue;
- }
-
- switch (C) {
- case '\b': OS << "\\b"; break;
- case '\f': OS << "\\f"; break;
- case '\n': OS << "\\n"; break;
- case '\r': OS << "\\r"; break;
- case '\t': OS << "\\t"; break;
- default:
- OS << '\\';
- OS << toOctal(C >> 6);
- OS << toOctal(C >> 3);
- OS << toOctal(C >> 0);
- break;
- }
- }
-
- OS << '"';
-}
-
-void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
- assert(getCurrentSection() && "Cannot emit contents before setting section!");
- if (Data.empty()) return;
-
- if (Data.size() == 1) {
- OS << MAI.getData8bitsDirective(AddrSpace);
- OS << (unsigned)(unsigned char)Data[0];
- EmitEOL();
- return;
- }
-
- // If the data ends with 0 and the target supports .asciz, use it, otherwise
- // use .ascii
- if (MAI.getAscizDirective() && Data.back() == 0) {
- OS << MAI.getAscizDirective();
- Data = Data.substr(0, Data.size()-1);
- } else {
- OS << MAI.getAsciiDirective();
- }
-
- OS << ' ';
- PrintQuotedString(Data, OS);
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
- assert(getCurrentSection() && "Cannot emit contents before setting section!");
- const char *Directive = 0;
- switch (Size) {
- default: break;
- case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
- case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
- case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
- case 8:
- Directive = MAI.getData64bitsDirective(AddrSpace);
- // If the target doesn't support 64-bit data, emit as two 32-bit halves.
- if (Directive) break;
- int64_t IntValue;
- if (!Value->EvaluateAsAbsolute(IntValue))
- report_fatal_error("Don't know how to emit this value.");
- if (getContext().getAsmInfo().isLittleEndian()) {
- EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
- EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
- } else {
- EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
- EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
- }
- return;
- }
-
- assert(Directive && "Invalid size for machine code value!");
- OS << Directive << *Value;
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
- assert(MAI.hasLEB128() && "Cannot print a .uleb");
- OS << ".uleb128 " << *Value;
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
- assert(MAI.hasLEB128() && "Cannot print a .sleb");
- OS << ".sleb128 " << *Value;
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
- assert(MAI.getGPRel32Directive() != 0);
- OS << MAI.getGPRel32Directive() << *Value;
- EmitEOL();
-}
-
-
-/// EmitFill - Emit NumBytes bytes worth of the value specified by
-/// FillValue. This implements directives such as '.space'.
-void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
- unsigned AddrSpace) {
- if (NumBytes == 0) return;
-
- if (AddrSpace == 0)
- if (const char *ZeroDirective = MAI.getZeroDirective()) {
- OS << ZeroDirective << NumBytes;
- if (FillValue != 0)
- OS << ',' << (int)FillValue;
- EmitEOL();
- return;
- }
-
- // Emit a byte at a time.
- MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
-}
-
-void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
- int64_t Value,
- unsigned ValueSize,
- unsigned MaxBytesToEmit) {
- // Some assemblers don't support non-power of two alignments, so we always
- // emit alignments as a power of two if possible.
- if (isPowerOf2_32(ByteAlignment)) {
- switch (ValueSize) {
- default: llvm_unreachable("Invalid size for machine code value!");
- case 1: OS << MAI.getAlignDirective(); break;
- // FIXME: use MAI for this!
- case 2: OS << ".p2alignw "; break;
- case 4: OS << ".p2alignl "; break;
- case 8: llvm_unreachable("Unsupported alignment size!");
- }
-
- if (MAI.getAlignmentIsInBytes())
- OS << ByteAlignment;
- else
- OS << Log2_32(ByteAlignment);
-
- if (Value || MaxBytesToEmit) {
- OS << ", 0x";
- OS.write_hex(truncateToSize(Value, ValueSize));
-
- if (MaxBytesToEmit)
- OS << ", " << MaxBytesToEmit;
- }
- EmitEOL();
- return;
- }
-
- // Non-power of two alignment. This is not widely supported by assemblers.
- // FIXME: Parameterize this based on MAI.
- switch (ValueSize) {
- default: llvm_unreachable("Invalid size for machine code value!");
- case 1: OS << ".balign"; break;
- case 2: OS << ".balignw"; break;
- case 4: OS << ".balignl"; break;
- case 8: llvm_unreachable("Unsupported alignment size!");
- }
-
- OS << ' ' << ByteAlignment;
- OS << ", " << truncateToSize(Value, ValueSize);
- if (MaxBytesToEmit)
- OS << ", " << MaxBytesToEmit;
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit) {}
-
-bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) {return false;}
-
-
-void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
- assert(MAI.hasSingleParameterDotFile());
- OS << "\t.file\t";
- PrintQuotedString(Filename, OS);
- EmitEOL();
-}
-
-// FIXME: should we inherit from MCAsmStreamer?
-bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
- StringRef Directory,
- StringRef Filename) {
- if (!Directory.empty()) {
- if (sys::path::is_absolute(Filename))
- return EmitDwarfFileDirective(FileNo, "", Filename);
- SmallString<128> FullPathName = Directory;
- sys::path::append(FullPathName, Filename);
- return EmitDwarfFileDirective(FileNo, "", FullPathName);
- }
-
- OS << "\t.file\t" << FileNo << ' ';
- PrintQuotedString(Filename, OS);
- EmitEOL();
- return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
-}
-
-void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
-
-void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
- assert(getCurrentSection() && "Cannot emit contents before setting section!");
-
- // Show the encoding in a comment if we have a code emitter.
- if (Emitter)
- AddEncodingComment(Inst);
-
- // Show the MCInst if enabled.
- if (ShowInst) {
- Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
- GetCommentOS() << "\n";
- }
-
- // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
- if (InstPrinter)
- InstPrinter->printInst(&Inst, OS, "");
- else
- Inst.print(OS, &MAI);
- EmitEOL();
-}
-
-/// EmitRawText - If this file is backed by an assembly streamer, this dumps
-/// the specified string in the output .s file. This capability is
-/// indicated by the hasRawTextSupport() predicate.
-void PTXMCAsmStreamer::EmitRawText(StringRef String) {
- if (!String.empty() && String.back() == '\n')
- String = String.substr(0, String.size()-1);
- OS << String;
- EmitEOL();
-}
-
-void PTXMCAsmStreamer::FinishImpl() {}
-
-namespace llvm {
- MCStreamer *createPTXAsmStreamer(MCContext &Context,
- formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useLoc, bool useCFI,
- bool useDwarfDirectory,
- MCInstPrinter *IP,
- MCCodeEmitter *CE, MCAsmBackend *MAB,
- bool ShowInst) {
- return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
- IP, CE, ShowInst);
- }
-}
diff --git a/lib/Target/PTX/PTXMCInstLower.cpp b/lib/Target/PTX/PTXMCInstLower.cpp
deleted file mode 100644
index 142e639b3ef7..000000000000
--- a/lib/Target/PTX/PTXMCInstLower.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower PTX MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTX.h"
-#include "PTXAsmPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
-
-void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- PTXAsmPrinter &AP) {
- OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp;
- OutMI.addOperand(AP.lowerOperand(MO));
- }
-}
-
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
deleted file mode 100644
index 172a0e031356..000000000000
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an information extractor for PTX machine functions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ptx-mf-info-extract"
-
-#include "PTX.h"
-#include "PTXTargetMachine.h"
-#include "PTXMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-// NOTE: PTXMFInfoExtract must after register allocation!
-
-namespace {
- /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
- /// function information for PTXAsmPrinter
- ///
- class PTXMFInfoExtract : public MachineFunctionPass {
- private:
- static char ID;
-
- public:
- PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "PTX Machine Function Info Extractor";
- }
- }; // class PTXMFInfoExtract
-} // end anonymous namespace
-
-using namespace llvm;
-
-char PTXMFInfoExtract::ID = 0;
-
-bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
- PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // Generate list of all virtual registers used in this function
- for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
- unsigned RegType;
- if (TRC == PTX::RegPredRegisterClass)
- RegType = PTXRegisterType::Pred;
- else if (TRC == PTX::RegI16RegisterClass)
- RegType = PTXRegisterType::B16;
- else if (TRC == PTX::RegI32RegisterClass)
- RegType = PTXRegisterType::B32;
- else if (TRC == PTX::RegI64RegisterClass)
- RegType = PTXRegisterType::B64;
- else if (TRC == PTX::RegF32RegisterClass)
- RegType = PTXRegisterType::F32;
- else if (TRC == PTX::RegF64RegisterClass)
- RegType = PTXRegisterType::F64;
- else
- llvm_unreachable("Unkown register class.");
- MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
- }
-
- return false;
-}
-
-FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new PTXMFInfoExtract(TM, OptLevel);
-}
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
deleted file mode 100644
index bb7574cbcd71..000000000000
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ /dev/null
@@ -1,202 +0,0 @@
-//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares PTX-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_MACHINE_FUNCTION_INFO_H
-#define PTX_MACHINE_FUNCTION_INFO_H
-
-#include "PTX.h"
-#include "PTXParamManager.h"
-#include "PTXRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
-/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
-/// contains private PTX target-specific information for each MachineFunction.
-///
-class PTXMachineFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
- bool IsKernel;
- DenseSet<unsigned> RegArgs;
- DenseSet<unsigned> RegRets;
-
- typedef DenseMap<int, std::string> FrameMap;
-
- FrameMap FrameSymbols;
-
- struct RegisterInfo {
- unsigned Reg;
- unsigned Type;
- unsigned Space;
- unsigned Offset;
- unsigned Encoded;
- };
-
- typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
-
- RegisterInfoMap RegInfo;
-
- PTXParamManager ParamManager;
-
-public:
- typedef DenseSet<unsigned>::const_iterator reg_iterator;
-
- PTXMachineFunctionInfo(MachineFunction &MF)
- : IsKernel(false) {
- }
-
- /// getParamManager - Returns the PTXParamManager instance for this function.
- PTXParamManager& getParamManager() { return ParamManager; }
- const PTXParamManager& getParamManager() const { return ParamManager; }
-
- /// setKernel/isKernel - Gets/sets a flag that indicates if this function is
- /// a PTX kernel function.
- void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
- bool isKernel() const { return IsKernel; }
-
- /// argreg_begin/argreg_end - Returns iterators to the set of registers
- /// containing function arguments.
- reg_iterator argreg_begin() const { return RegArgs.begin(); }
- reg_iterator argreg_end() const { return RegArgs.end(); }
-
- /// retreg_begin/retreg_end - Returns iterators to the set of registers
- /// containing the function return values.
- reg_iterator retreg_begin() const { return RegRets.begin(); }
- reg_iterator retreg_end() const { return RegRets.end(); }
-
- /// addRegister - Adds a virtual register to the set of all used registers
- void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
- if (!RegInfo.count(Reg)) {
- RegisterInfo Info;
- Info.Reg = Reg;
- Info.Type = RegType;
- Info.Space = RegSpace;
-
- // Determine register offset
- Info.Offset = 0;
- for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
- e = RegInfo.end(); i != e; ++i) {
- const RegisterInfo& RI = i->second;
- if (RI.Space == RegSpace)
- if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
- Info.Offset++;
- }
-
- // Encode the register data into a single register number
- Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
-
- RegInfo[Reg] = Info;
-
- if (RegSpace == PTXRegisterSpace::Argument)
- RegArgs.insert(Reg);
- else if (RegSpace == PTXRegisterSpace::Return)
- RegRets.insert(Reg);
- }
- }
-
- /// countRegisters - Returns the number of registers of the given type and
- /// space.
- unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
- unsigned Count = 0;
- for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
- i != e; ++i) {
- const RegisterInfo& RI = i->second;
- if (RI.Type == RegType && RI.Space == RegSpace)
- Count++;
- }
- return Count;
- }
-
- /// getEncodedRegister - Returns the encoded value of the register.
- unsigned getEncodedRegister(unsigned Reg) const {
- return RegInfo.lookup(Reg).Encoded;
- }
-
- /// addRetReg - Adds a register to the set of return-value registers.
- void addRetReg(unsigned Reg) {
- if (!RegRets.count(Reg)) {
- RegRets.insert(Reg);
- }
- }
-
- /// addArgReg - Adds a register to the set of function argument registers.
- void addArgReg(unsigned Reg) {
- RegArgs.insert(Reg);
- }
-
- /// getRegisterName - Returns the name of the specified virtual register. This
- /// name is used during PTX emission.
- std::string getRegisterName(unsigned Reg) const {
- if (RegInfo.count(Reg)) {
- const RegisterInfo& RI = RegInfo.lookup(Reg);
- std::string Name;
- raw_string_ostream NameStr(Name);
- decodeRegisterName(NameStr, RI.Encoded);
- NameStr.flush();
- return Name;
- }
- else if (Reg == PTX::NoRegister)
- return "%noreg";
- else
- llvm_unreachable("Register not in register name map");
- }
-
- /// getEncodedRegisterName - Returns the name of the encoded register.
- std::string getEncodedRegisterName(unsigned EncodedReg) const {
- std::string Name;
- raw_string_ostream NameStr(Name);
- decodeRegisterName(NameStr, EncodedReg);
- NameStr.flush();
- return Name;
- }
-
- /// getRegisterType - Returns the type of the specified virtual register.
- unsigned getRegisterType(unsigned Reg) const {
- if (RegInfo.count(Reg))
- return RegInfo.lookup(Reg).Type;
- else
- llvm_unreachable("Unknown register");
- }
-
- /// getOffsetForRegister - Returns the offset of the virtual register
- unsigned getOffsetForRegister(unsigned Reg) const {
- if (RegInfo.count(Reg))
- return RegInfo.lookup(Reg).Offset;
- else
- return 0;
- }
-
- /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
- const char* getFrameSymbol(int FrameIndex) {
- if (FrameSymbols.count(FrameIndex)) {
- return FrameSymbols.lookup(FrameIndex).c_str();
- } else {
- std::string Name = "__local";
- Name += utostr(FrameIndex);
- // The whole point of caching this name is to ensure the pointer we pass
- // to any getExternalSymbol() calls will remain valid for the lifetime of
- // the back-end instance. This is to work around an issue in SelectionDAG
- // where symbol names are expected to be life-long strings.
- FrameSymbols[FrameIndex] = Name;
- return FrameSymbols[FrameIndex].c_str();
- }
- }
-}; // class PTXMachineFunctionInfo
-} // namespace llvm
-
-#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
deleted file mode 100644
index cc1cc711c82e..000000000000
--- a/lib/Target/PTX/PTXParamManager.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===-- PTXParamManager.cpp - Manager for .param variables ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PTXParamManager class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXParamManager.h"
-#include "PTX.h"
-#include "llvm/ADT/StringExtras.h"
-
-using namespace llvm;
-
-PTXParamManager::PTXParamManager() {
-}
-
-unsigned PTXParamManager::addArgumentParam(unsigned Size) {
- PTXParam Param;
- Param.Type = PTX_PARAM_TYPE_ARGUMENT;
- Param.Size = Size;
-
- std::string Name;
- Name = "__param_";
- Name += utostr(ArgumentParams.size()+1);
- Param.Name = Name;
-
- unsigned Index = AllParams.size();
- AllParams[Index] = Param;
- ArgumentParams.push_back(Index);
-
- return Index;
-}
-
-unsigned PTXParamManager::addReturnParam(unsigned Size) {
- PTXParam Param;
- Param.Type = PTX_PARAM_TYPE_RETURN;
- Param.Size = Size;
-
- std::string Name;
- Name = "__ret_";
- Name += utostr(ReturnParams.size()+1);
- Param.Name = Name;
-
- unsigned Index = AllParams.size();
- AllParams[Index] = Param;
- ReturnParams.push_back(Index);
-
- return Index;
-}
-
-unsigned PTXParamManager::addLocalParam(unsigned Size) {
- PTXParam Param;
- Param.Type = PTX_PARAM_TYPE_LOCAL;
- Param.Size = Size;
-
- std::string Name;
- Name = "__localparam_";
- Name += utostr(LocalParams.size()+1);
- Param.Name = Name;
-
- unsigned Index = AllParams.size();
- AllParams[Index] = Param;
- LocalParams.push_back(Index);
-
- return Index;
-}
-
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
deleted file mode 100644
index 92e7728b4f8b..000000000000
--- a/lib/Target/PTX/PTXParamManager.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PTXParamManager class, which manages all defined .param
-// variables for a particular function.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_PARAM_MANAGER_H
-#define PTX_PARAM_MANAGER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include <string>
-
-namespace llvm {
-
-/// PTXParamManager - This class manages all .param variables defined for a
-/// particular function.
-class PTXParamManager {
-private:
-
- /// PTXParamType - Type of a .param variable
- enum PTXParamType {
- PTX_PARAM_TYPE_ARGUMENT,
- PTX_PARAM_TYPE_RETURN,
- PTX_PARAM_TYPE_LOCAL
- };
-
- /// PTXParam - Definition of a PTX .param variable
- struct PTXParam {
- PTXParamType Type;
- unsigned Size;
- std::string Name;
- };
-
- DenseMap<unsigned, PTXParam> AllParams;
- SmallVector<unsigned, 4> ArgumentParams;
- SmallVector<unsigned, 4> ReturnParams;
- SmallVector<unsigned, 4> LocalParams;
-
-public:
-
- typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
-
- PTXParamManager();
-
- param_iterator arg_begin() const { return ArgumentParams.begin(); }
- param_iterator arg_end() const { return ArgumentParams.end(); }
- param_iterator ret_begin() const { return ReturnParams.begin(); }
- param_iterator ret_end() const { return ReturnParams.end(); }
- param_iterator local_begin() const { return LocalParams.begin(); }
- param_iterator local_end() const { return LocalParams.end(); }
-
- /// addArgumentParam - Returns a new .param used as an argument.
- unsigned addArgumentParam(unsigned Size);
-
- /// addReturnParam - Returns a new .param used as a return argument.
- unsigned addReturnParam(unsigned Size);
-
- /// addLocalParam - Returns a new .param used as a local .param variable.
- unsigned addLocalParam(unsigned Size);
-
- /// getParamName - Returns the name of the parameter as a string.
- const std::string &getParamName(unsigned Param) const {
- assert(AllParams.count(Param) == 1 && "Param has not been defined!");
- return AllParams.find(Param)->second.Name;
- }
-
- /// getParamSize - Returns the size of the parameter in bits.
- unsigned getParamSize(unsigned Param) const {
- assert(AllParams.count(Param) == 1 && "Param has not been defined!");
- return AllParams.find(Param)->second.Size;
- }
-
-};
-
-}
-
-#endif
-
diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp
deleted file mode 100644
index 7fd53752bf66..000000000000
--- a/lib/Target/PTX/PTXRegAlloc.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a register allocator for PTX code.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ptx-reg-alloc"
-
-#include "PTX.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-
-using namespace llvm;
-
-namespace {
- // Special register allocator for PTX.
- class PTXRegAlloc : public MachineFunctionPass {
- public:
- static char ID;
- PTXRegAlloc() : MachineFunctionPass(ID) {}
-
- virtual const char* getPassName() const {
- return "PTX Register Allocator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- // We do not actually do anything (at least not yet).
- return false;
- }
- };
-
- char PTXRegAlloc::ID = 0;
-
- static RegisterRegAlloc
- ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
-}
-
-FunctionPass *llvm::createPTXRegisterAllocator() {
- return new PTXRegAlloc();
-}
-
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
deleted file mode 100644
index b6ffd38232e3..000000000000
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PTX implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXRegisterInfo.h"
-#include "PTX.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "PTXGenRegisterInfo.inc"
-
-using namespace llvm;
-
-PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &tii)
- // PTX does not have a return address register.
- : PTXGenRegisterInfo(0), TII(tii) {
-}
-
-void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/,
- int /*SPAdj*/,
- RegScavenger * /*RS*/) const {
- llvm_unreachable("FrameIndex should have been previously eliminated!");
-}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
deleted file mode 100644
index 5614ce793b90..000000000000
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PTX implementation of the MRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_REGISTER_INFO_H
-#define PTX_REGISTER_INFO_H
-
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/BitVector.h"
-
-#define GET_REGINFO_HEADER
-#include "PTXGenRegisterInfo.inc"
-
-namespace llvm {
-class PTXTargetMachine;
-class MachineFunction;
-
-struct PTXRegisterInfo : public PTXGenRegisterInfo {
-private:
- const TargetInstrInfo &TII;
-
-public:
- PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &tii);
-
- virtual const uint16_t
- *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
- static const uint16_t CalleeSavedRegs[] = { 0 };
- return CalleeSavedRegs; // save nothing
- }
-
- virtual BitVector getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- return Reserved; // reserve no regs
- }
-
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj,
- RegScavenger *RS = NULL) const;
-
- virtual unsigned getFrameRegister(const MachineFunction &MF) const {
- llvm_unreachable("PTX does not have a frame register");
- }
-}; // struct PTXRegisterInfo
-} // namespace llvm
-
-#endif // PTX_REGISTER_INFO_H
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
deleted file mode 100644
index e8b262e48bde..000000000000
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ /dev/null
@@ -1,36 +0,0 @@
-//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class PTXReg<string n> : Register<n> {
- let Namespace = "PTX";
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-
-// The generated register info code throws warnings for empty register classes
-// (e.g. zero-length arrays), so we use a dummy register here just to prevent
-// these warnings.
-def DUMMY_REG : PTXReg<"R0">;
-
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
-def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
-def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
-def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
-def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
-def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
-
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
deleted file mode 100644
index a116fabaa53b..000000000000
--- a/lib/Target/PTX/PTXSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PTXSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ptx-selectiondag-info"
-#include "PTXTargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-using namespace llvm;
-
-PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
-}
-
-PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
-}
-
-SDValue
-PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const {
- // Do repeated 4-byte loads and stores. To be improved.
- // This requires 4-byte alignment.
- if ((Align & 3) != 0)
- return SDValue();
- // This requires the copy size to be a constant, preferably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (!ConstantSize)
- return SDValue();
- uint64_t SizeVal = ConstantSize->getZExtValue();
- // Always inline memcpys. In PTX, we do not have a C library that provides
- // a memcpy function.
- //if (!AlwaysInline)
- // return SDValue();
-
- unsigned BytesLeft = SizeVal & 3;
- unsigned NumMemOps = SizeVal >> 2;
- unsigned EmittedNumMemOps = 0;
- EVT VT = MVT::i32;
- unsigned VTSize = 4;
- unsigned i = 0;
- const unsigned MAX_LOADS_IN_LDM = 6;
- SDValue TFOps[MAX_LOADS_IN_LDM];
- SDValue Loads[MAX_LOADS_IN_LDM];
- uint64_t SrcOff = 0, DstOff = 0;
- EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
-
- // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
- // same number of stores. The loads and stores will get combined into
- // ldm/stm later on.
- while (EmittedNumMemOps < NumMemOps) {
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, PointerType, Src,
- DAG.getConstant(SrcOff, PointerType)),
- SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- SrcOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, PointerType, Dst,
- DAG.getConstant(DstOff, PointerType)),
- DstPtrInfo.getWithOffset(DstOff),
- isVolatile, false, 0);
- DstOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- EmittedNumMemOps += i;
- }
-
- if (BytesLeft == 0)
- return Chain;
-
- // Issue loads / stores for the trailing (1 - 3) bytes.
- unsigned BytesLeftSave = BytesLeft;
- i = 0;
- while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, PointerType, Src,
- DAG.getConstant(SrcOff, PointerType)),
- SrcPtrInfo.getWithOffset(SrcOff), false, false,
- false, 0);
- TFOps[i] = Loads[i].getValue(1);
- ++i;
- SrcOff += VTSize;
- BytesLeft -= VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- i = 0;
- BytesLeft = BytesLeftSave;
- while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, PointerType, Dst,
- DAG.getConstant(DstOff, PointerType)),
- DstPtrInfo.getWithOffset(DstOff), false, false, 0);
- ++i;
- DstOff += VTSize;
- BytesLeft -= VTSize;
- }
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-}
-
-SDValue PTXSelectionDAGInfo::
-EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo) const {
- llvm_unreachable("memset lowering not implemented for PTX yet");
-}
-
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.h b/lib/Target/PTX/PTXSelectionDAGInfo.h
deleted file mode 100644
index e0c716718f08..000000000000
--- a/lib/Target/PTX/PTXSelectionDAGInfo.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PTX subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTXSELECTIONDAGINFO_H
-#define PTXSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
-/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
-class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const PTXSubtarget *Subtarget;
-
-public:
- explicit PTXSelectionDAGInfo(const TargetMachine &TM);
- ~PTXSelectionDAGInfo();
-
- virtual
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const;
-
- virtual
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align,
- bool isVolatile,
- MachinePointerInfo DstPtrInfo) const;
-};
-
-}
-
-#endif
-
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
deleted file mode 100644
index 454f64e6bba3..000000000000
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PTX specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXSubtarget.h"
-#include "PTX.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "PTXGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-void PTXSubtarget::anchor() { }
-
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit)
- : PTXGenSubtargetInfo(TT, CPU, FS),
- PTXTarget(PTX_COMPUTE_1_0),
- PTXVersion(PTX_VERSION_2_0),
- SupportsDouble(false),
- SupportsFMA(true),
- Is64Bit(is64Bit) {
- std::string TARGET = CPU;
- if (TARGET.empty())
- TARGET = "generic";
- ParseSubtargetFeatures(TARGET, FS);
-}
-
-std::string PTXSubtarget::getTargetString() const {
- switch(PTXTarget) {
- default: llvm_unreachable("Unknown PTX target");
- case PTX_SM_1_0: return "sm_10";
- case PTX_SM_1_1: return "sm_11";
- case PTX_SM_1_2: return "sm_12";
- case PTX_SM_1_3: return "sm_13";
- case PTX_SM_2_0: return "sm_20";
- case PTX_SM_2_1: return "sm_21";
- case PTX_SM_2_2: return "sm_22";
- case PTX_SM_2_3: return "sm_23";
- case PTX_COMPUTE_1_0: return "compute_10";
- case PTX_COMPUTE_1_1: return "compute_11";
- case PTX_COMPUTE_1_2: return "compute_12";
- case PTX_COMPUTE_1_3: return "compute_13";
- case PTX_COMPUTE_2_0: return "compute_20";
- }
-}
-
-std::string PTXSubtarget::getPTXVersionString() const {
- switch(PTXVersion) {
- case PTX_VERSION_2_0: return "2.0";
- case PTX_VERSION_2_1: return "2.1";
- case PTX_VERSION_2_2: return "2.2";
- case PTX_VERSION_2_3: return "2.3";
- }
- llvm_unreachable("Invalid PTX version");
-}
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
deleted file mode 100644
index ce93fef02a11..000000000000
--- a/lib/Target/PTX/PTXSubtarget.h
+++ /dev/null
@@ -1,131 +0,0 @@
-//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PTX specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_SUBTARGET_H
-#define PTX_SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-#define GET_SUBTARGETINFO_HEADER
-#include "PTXGenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRef;
-
- class PTXSubtarget : public PTXGenSubtargetInfo {
- virtual void anchor();
- public:
-
- /**
- * Enumeration of Shader Models supported by the back-end.
- */
- enum PTXTargetEnum {
- PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
- PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
- PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
- PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
- PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
- PTX_LAST_COMPUTE,
-
- PTX_SM_1_0, /*< Shader Model 1.0 */
- PTX_SM_1_1, /*< Shader Model 1.1 */
- PTX_SM_1_2, /*< Shader Model 1.2 */
- PTX_SM_1_3, /*< Shader Model 1.3 */
- PTX_SM_2_0, /*< Shader Model 2.0 */
- PTX_SM_2_1, /*< Shader Model 2.1 */
- PTX_SM_2_2, /*< Shader Model 2.2 */
- PTX_SM_2_3, /*< Shader Model 2.3 */
- PTX_LAST_SM
- };
-
- /**
- * Enumeration of PTX versions supported by the back-end.
- *
- * Currently, PTX 2.0 is the minimum supported version.
- */
- enum PTXVersionEnum {
- PTX_VERSION_2_0, /*< PTX Version 2.0 */
- PTX_VERSION_2_1, /*< PTX Version 2.1 */
- PTX_VERSION_2_2, /*< PTX Version 2.2 */
- PTX_VERSION_2_3 /*< PTX Version 2.3 */
- };
-
- private:
-
- /// Shader Model supported on the target GPU.
- PTXTargetEnum PTXTarget;
-
- /// PTX Language Version.
- PTXVersionEnum PTXVersion;
-
- // The native .f64 type is supported on the hardware.
- bool SupportsDouble;
-
- // Support the fused-multiply add (FMA) and multiply-add (MAD)
- // instructions
- bool SupportsFMA;
-
- // Use .u64 instead of .u32 for addresses.
- bool Is64Bit;
-
- public:
-
- PTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit);
-
- // Target architecture accessors
- std::string getTargetString() const;
-
- std::string getPTXVersionString() const;
-
- bool supportsDouble() const { return SupportsDouble; }
-
- bool is64Bit() const { return Is64Bit; }
-
- bool supportsFMA() const { return SupportsFMA; }
-
- bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
-
- bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
-
- bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
-
- bool fdivNeedsRoundingMode() const {
- return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
- (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
- }
-
- bool fmadNeedsRoundingMode() const {
- return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
- (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
- }
-
- bool useParamSpaceForDeviceArgs() const {
- return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
- (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
- }
-
- bool callsAreHandled() const {
- return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
- (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
- }
-
- bool emitPtrAttribute() const {
- return PTXVersion >= PTX_VERSION_2_2;
- }
-
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- }; // class PTXSubtarget
-} // namespace llvm
-
-#endif // PTX_SUBTARGET_H
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
deleted file mode 100644
index 97b8de1a0b44..000000000000
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the PTX target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTXTargetMachine.h"
-#include "PTX.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Scalar.h"
-
-
-using namespace llvm;
-
-namespace llvm {
- MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useLoc,
- bool useCFI, bool useDwarfDirectory,
- MCInstPrinter *InstPrint,
- MCCodeEmitter *CE,
- MCAsmBackend *MAB,
- bool ShowInst);
-}
-
-extern "C" void LLVMInitializePTXTarget() {
-
- RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
- RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
-
- TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
-}
-
-namespace {
- const char* DataLayout32 =
- "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
- const char* DataLayout64 =
- "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
-}
-
-// DataLayout and FrameLowering are filled with dummy data
-PTXTargetMachine::PTXTargetMachine(const Target &T,
- StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- DataLayout(is64Bit ? DataLayout64 : DataLayout32),
- Subtarget(TT, CPU, FS, is64Bit),
- FrameLowering(Subtarget),
- InstrInfo(*this),
- TSInfo(*this),
- TLInfo(*this) {
-}
-
-void PTX32TargetMachine::anchor() { }
-
-PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
-
-void PTX64TargetMachine::anchor() { }
-
-PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
-
-namespace llvm {
-/// PTX Code Generator Pass Configuration Options.
-class PTXPassConfig : public TargetPassConfig {
-public:
- PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- PTXTargetMachine &getPTXTargetMachine() const {
- return getTM<PTXTargetMachine>();
- }
-
- bool addInstSelector();
- FunctionPass *createTargetRegisterAllocator(bool);
- void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
- bool addPostRegAlloc();
- void addMachineLateOptimization();
- bool addPreEmitPass();
-};
-} // namespace
-
-TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) {
- PTXPassConfig *PassConfig = new PTXPassConfig(this, PM);
- PassConfig->disablePass(PrologEpilogCodeInserterID);
- return PassConfig;
-}
-
-bool PTXPassConfig::addInstSelector() {
- PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel()));
- return false;
-}
-
-FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) {
- return createPTXRegisterAllocator();
-}
-
-// Modify the optimized compilation path to bypass optimized register alloction.
-void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
- addFastRegAlloc(RegAllocPass);
-}
-
-bool PTXPassConfig::addPostRegAlloc() {
- // PTXMFInfoExtract must after register allocation!
- //PM->add(createPTXMFInfoExtract(getPTXTargetMachine()));
- return false;
-}
-
-/// Add passes that optimize machine instructions after register allocation.
-void PTXPassConfig::addMachineLateOptimization() {
- if (addPass(BranchFolderPassID) != &NoPassID)
- printAndVerify("After BranchFolding");
-
- if (addPass(TailDuplicateID) != &NoPassID)
- printAndVerify("After TailDuplicate");
-}
-
-bool PTXPassConfig::addPreEmitPass() {
- PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel()));
- PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel()));
- return true;
-}
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
deleted file mode 100644
index 278d1555b00b..000000000000
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PTX specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PTX_TARGET_MACHINE_H
-#define PTX_TARGET_MACHINE_H
-
-#include "PTXISelLowering.h"
-#include "PTXInstrInfo.h"
-#include "PTXFrameLowering.h"
-#include "PTXSelectionDAGInfo.h"
-#include "PTXSubtarget.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class PTXTargetMachine : public LLVMTargetMachine {
- private:
- const TargetData DataLayout;
- PTXSubtarget Subtarget; // has to be initialized before FrameLowering
- PTXFrameLowering FrameLowering;
- PTXInstrInfo InstrInfo;
- PTXSelectionDAGInfo TSInfo;
- PTXTargetLowering TLInfo;
-
- public:
- PTXTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64Bit);
-
- virtual const TargetData *getTargetData() const { return &DataLayout; }
-
- virtual const TargetFrameLowering *getFrameLowering() const {
- return &FrameLowering;
- }
-
- virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo(); }
-
- virtual const PTXTargetLowering *getTargetLowering() const {
- return &TLInfo; }
-
- virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
-
- // Emission of machine code through JITCodeEmitter is not supported.
- virtual bool addPassesToEmitMachineCode(PassManagerBase &,
- JITCodeEmitter &,
- bool = true) {
- return true;
- }
-
- // Emission of machine code through MCJIT is not supported.
- virtual bool addPassesToEmitMC(PassManagerBase &,
- MCContext *&,
- raw_ostream &,
- bool = true) {
- return true;
- }
-
- // Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-}; // class PTXTargetMachine
-
-
-class PTX32TargetMachine : public PTXTargetMachine {
- virtual void anchor();
-public:
-
- PTX32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-}; // class PTX32TargetMachine
-
-class PTX64TargetMachine : public PTXTargetMachine {
- virtual void anchor();
-public:
-
- PTX64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-}; // class PTX32TargetMachine
-
-} // namespace llvm
-
-#endif // PTX_TARGET_MACHINE_H
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
deleted file mode 100644
index d9a5da3a9082..000000000000
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPTXInfo
- PTXTargetInfo.cpp
- )
-
-add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
deleted file mode 100644
index 09a27358da9f..000000000000
--- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PTX.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-Target llvm::ThePTX32Target;
-Target llvm::ThePTX64Target;
-
-extern "C" void LLVMInitializePTXTargetInfo() {
- // see llvm/ADT/Triple.h
- RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
- "PTX (32-bit) [Experimental]");
- RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
- "PTX (64-bit) [Experimental]");
-}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index bcd8bd291623..192d18d66440 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_target(PowerPCCodeGen
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
PPCCodeEmitter.cpp
+ PPCCTRLoops.cpp
PPCHazardRecognizers.cpp
PPCInstrInfo.cpp
PPCISelDAGToDAG.cpp
@@ -28,6 +29,8 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
+add_dependencies(LLVMPowerPCCodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 61d23ce06aa1..d175e3e79eb6 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -86,8 +86,33 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
- assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
unsigned Code = MI->getOperand(OpNo).getImm();
+ if (!Modifier) {
+ unsigned CCReg = MI->getOperand(OpNo+1).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: llvm_unreachable("Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+
+ // Print the CR bit number. The Code is ((BI << 5) | BO) for a
+ // BCC, but we must have the positive form here (BO == 12)
+ unsigned BI = Code >> 5;
+ assert((Code & 0xF) == 12 &&
+ "BO in predicate bit must have the positive form");
+
+ unsigned Value = 4*RegNo + BI;
+ O << Value;
+ return;
+ }
+
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
case PPC::PRED_ALWAYS: return; // Don't print anything for always.
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 73fd5342a165..8f1e211c3e96 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -42,7 +42,7 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier);
+ raw_ostream &O, const char *Modifier = 0);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 5a6827ffd8d3..f6524222fd79 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -77,6 +77,7 @@ public:
} // end anonymous namespace
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new PPCMCCodeEmitter(MCII, STI, Ctx);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index b7fa0646288d..7162e158f033 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -22,6 +22,7 @@ class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
@@ -31,6 +32,7 @@ extern Target ThePPC32Target;
extern Target ThePPC64Target;
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 24a7178d1ff9..9103e1232505 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -30,6 +30,7 @@ namespace llvm {
class AsmPrinter;
class MCInst;
+ FunctionPass *createPPCCTRLoops();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -50,21 +51,27 @@ namespace llvm {
/// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB = 1,
- /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 4, MO_HA16 = 8,
-
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 16,
+ MO_PIC_FLAG = 4,
/// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
/// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 32,
+ MO_NLP_FLAG = 8,
/// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
/// symbol with hidden visibility. This causes a different kind of
/// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 64
+ MO_NLP_HIDDEN_FLAG = 16,
+
+ /// The next are not flags but distinct values.
+ MO_ACCESS_MASK = 224,
+
+ /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+ MO_LO16 = 32, MO_HA16 = 64,
+
+ MO_TPREL16_HA = 96,
+ MO_TPREL16_LO = 128
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index c554d39434c8..b7f16884363a 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -35,6 +35,8 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -42,12 +44,14 @@ def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
-def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
- "Enable GPUL instructions">;
+def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
+ "Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
+ "Enable the isel instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
@@ -64,8 +68,10 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
-def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>;
-def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603]>;
@@ -74,28 +80,37 @@ def : Processor<"603ev", G3Itineraries, [Directive603]>;
def : Processor<"604", G3Itineraries, [Directive604]>;
def : Processor<"604e", G3Itineraries, [Directive604]>;
def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"750", G4Itineraries, [Directive750]>;
+def : Processor<"g3", G3Itineraries, [Directive750]>;
def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
-def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
def : Processor<"970", G5Itineraries,
[Directive970, FeatureAltivec,
- FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
- FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureFSqrt, FeatureSTFIWX,
- Feature64Bit
- /*, Feature64BitRegs */]>;
+def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureSTFIWX, FeatureISEL,
+ Feature64Bit
+ /*, Feature64BitRegs */]>;
+def : Processor<"pwr6", G5Itineraries,
+ [DirectivePwr6, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"pwr7", G5Itineraries,
+ [DirectivePwr7, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,
- FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fb7aa71d98d3..f76b89c803ab 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -22,8 +22,8 @@
#include "PPCSubtarget.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Assembly/Writer.h"
@@ -248,7 +248,9 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'c': // Don't print "$" before a global var name or constant.
break; // PPC never has a prefix.
case 'L': // Write second word of DImode reference.
@@ -451,11 +453,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppc750",
"ppc970",
"ppcA2",
+ "power6",
+ "power7",
"ppc64"
};
unsigned Directive = Subtarget.getDarwinDirective();
- if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
Directive = PPC::DIR_970;
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
Directive = PPC::DIR_7400;
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 5f775e16f1ca..21a0fb200f20 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -135,21 +135,33 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
MBBStartOffset += 4;
continue;
}
-
+
// Otherwise, we have to expand it to a long branch.
- // The BCC operands are:
- // 0. PPC branch predicate
- // 1. CR register
- // 2. Target MBB
- PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
- unsigned CRReg = I->getOperand(1).getReg();
-
MachineInstr *OldBranch = I;
DebugLoc dl = OldBranch->getDebugLoc();
-
- // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
- BuildMI(MBB, I, dl, TII->get(PPC::BCC))
- .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ if (I->getOpcode() == PPC::BCC) {
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
+ } else {
+ llvm_unreachable("Unhandled branch type!");
+ }
// Uncond branch to the real destination.
I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
new file mode 100644
index 000000000000..2a2abb171fb1
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -0,0 +1,724 @@
+//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the PPC branch instructions
+// that decrement and test the count register (CTR) (bdnz and friends).
+// This pass is based on the HexagonHardwareLoops pass.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for CTR loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested CTR loops.
+// - No function calls in loops.
+//
+// Note: As with unconverted loops, PPCBranchSelector must be run after this
+// pass in order to convert long-displacement jumps into jump pairs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ctrloops"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+
+namespace {
+ class CountValue;
+ struct PPCCTRLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ PPCCTRLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "PPC CTR Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+ /// induction variable.
+ /// Should be defined in MachineLoop. Based upon version in class Loop.
+ void getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const;
+
+ /// getTripCount - Return a loop-invariant LLVM register indicating the
+ /// number of times the loop will be executed. If the trip-count cannot
+ /// be determined, this return null.
+ CountValue *getTripCount(MachineLoop *L,
+ SmallVector<MachineInstr *, 2> &OldInsts) const;
+
+ /// isInductionOperation - Return true if the instruction matches the
+ /// pattern for an opertion that defines an induction variable.
+ bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+ /// isInvalidOperation - Return true if the instruction is not valid within
+ /// a CTR loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// containsInavlidInstruction - Return true if the loop contains an
+ /// instruction that inhibits using the CTR loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// converToCTRLoop - Given a loop, check if we can convert it to a
+ /// CTR loop. If so, then perform the conversion and return true.
+ bool convertToCTRLoop(MachineLoop *L);
+
+ /// isDead - Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr *, 1> &DeadPhis) const;
+
+ /// removeIfDead - Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+ };
+
+ char PPCCTRLoops::ID = 0;
+
+
+ // CountValue class - Abstraction for a trip count of a loop. A
+ // smaller vesrsion of the MachineOperand class without the concerns
+ // of changing the operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ unsigned RegNum;
+ int64_t ImmVal;
+ Values(unsigned r) : RegNum(r) {}
+ Values(int64_t i) : ImmVal(i) {}
+ } Contents;
+ bool isNegative;
+
+ public:
+ CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+ isNegative(neg) {}
+ explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+ isNegative(i < 0) {}
+ CountValueType getType() const { return Kind; }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+ bool isNeg() const { return isNegative; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.RegNum;
+ }
+ void setReg(unsigned Val) {
+ Contents.RegNum = Val;
+ }
+ int64_t getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ if (isNegative) {
+ return -Contents.ImmVal;
+ }
+ return Contents.ImmVal;
+ }
+ void setImm(int64_t Val) {
+ Contents.ImmVal = Val;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ if (isReg()) { OS << PrintReg(getReg()); }
+ if (isImm()) { OS << getImm(); }
+ }
+ };
+} // end anonymous namespace
+
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
+ if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ SignedCmp = false;
+ return true;
+ }
+
+ return false;
+}
+
+
+/// createPPCCTRLoops - Factory for creating
+/// the CTR loop phase.
+FunctionPass *llvm::createPPCCTRLoops() {
+ return new PPCCTRLoops();
+}
+
+
+bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
+
+ bool Changed = false;
+
+ // get the loop information
+ MLI = &getAnalysis<MachineLoopInfo>();
+ // get the register information
+ MRI = &MF.getRegInfo();
+ // the target specific instructio info.
+ TII = MF.getTarget().getInstrInfo();
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop()) {
+ Changed |= convertToCTRLoop(L);
+ }
+ }
+
+ return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero. If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+void
+PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) return; // dead loop
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) return; // multiple backedges?
+
+ // make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return;
+
+ // Loop over all of the PHI nodes, looking for a canonical induction variable:
+ // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+ // - The recurrence comes from the backedge.
+ // - the definition is an induction operatio.n
+ for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *MPhi = &*I;
+ unsigned DefReg = MPhi->getOperand(0).getReg();
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ // Check each operand for the value from the backedge.
+ MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+ if (L->contains(MBB)) { // operands comes from the backedge
+ // Check if the definition is an induction operation.
+ MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+ if (isInductionOperation(DI, DefReg)) {
+ IOps.push_back(DI);
+ IVars.push_back(MPhi);
+ }
+ }
+ }
+ }
+ return;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed. The trip count can
+/// be either a register or a constant value. If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable. We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
+ SmallVector<MachineInstr *, 2> &OldInsts) const {
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate a CTR loop if the loop has more than one exit.
+ if (LastMBB == 0)
+ return 0;
+
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI->getOpcode() != PPC::BCC)
+ return 0;
+
+ // We need to make sure that this compare is defining the condition
+ // register actually used by the terminating branch.
+
+ unsigned PredReg = LastI->getOperand(1).getReg();
+ DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
+
+ unsigned PredCond = LastI->getOperand(0).getImm();
+ if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
+ return 0;
+
+ // Check that the loop has a induction variable.
+ SmallVector<MachineInstr *, 4> IVars, IOps;
+ getCanonicalInductionVariable(L, IVars, IOps);
+ for (unsigned i = 0; i < IVars.size(); ++i) {
+ MachineInstr *IOp = IOps[i];
+ MachineInstr *IV_Inst = IVars[i];
+
+ // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
+
+ DEBUG(dbgs() << "Considering:\n");
+ DEBUG(dbgs() << " induction operation: " << *IOp);
+ DEBUG(dbgs() << " induction variable: " << *IV_Inst);
+ DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
+ bool SignedCmp;
+ MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ MI->getOperand(0).getReg() == PredReg) {
+
+ OldInsts.push_back(MI);
+ OldInsts.push_back(IOp);
+
+ DEBUG(dbgs() << " compare: " << *MI);
+
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be an immediate");
+
+ int64_t ImmVal;
+ if (SignedCmp)
+ ImmVal = (short) MO.getImm();
+ else
+ ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
+
+ assert(InitialValue->isReg() && "Expecting register for init value");
+ unsigned InitialValueReg = InitialValue->getReg();
+
+ const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+
+ // Here we need to look for an immediate load (an li or lis/ori pair).
+ if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
+ DefInstr->getOpcode() == PPC::ORI)) {
+ int64_t start = (short) DefInstr->getOperand(2).getImm();
+ const MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
+ DefInstr2->getOpcode() == PPC::LIS)) {
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+ DEBUG(dbgs() << " initial constant: " << *DefInstr2);
+
+ start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
+
+ int64_t count = ImmVal - start;
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ }
+ } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
+ DefInstr->getOpcode() == PPC::LI)) {
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+
+ int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ } else if (iv_value == 1 || iv_value == -1) {
+ // We can't determine a constant starting value.
+ if (ImmVal == 0) {
+ return new CountValue(InitialValueReg, iv_value > 0);
+ }
+ // FIXME: handle non-zero end value.
+ }
+ // FIXME: handle non-unit increments (we might not want to introduce division
+ // but we can handle some 2^n cases with shifts).
+
+ }
+ }
+ }
+ return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+/// addi iv, c
+///
+bool
+PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
+ unsigned IVReg) const {
+ return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
+ MI->getOperand(1).isReg() && // could be a frame index instead
+ MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// CTR loop.
+bool
+PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a CTR loop
+ if (MI->getDesc().isCall()) {
+ return true;
+ }
+ // check if the instruction defines a CTR loop register
+ // (this will also catch nested CTR loops)
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the CTR loop function.
+///
+bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isDead returns true if the instruction is dead
+/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
+/// with special cases for inline asm, physical registers and instructions with
+/// side effects removed)
+bool PPCCTRLoops::isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr *, 1> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (!MRI->use_nodbg_empty(Reg)) {
+ // This instruction has users, but if the only user is the phi node for the
+ // parent block, and the only use of that phi node is this instruction, then
+ // this instruction is dead: both it (and the phi node) can be removed.
+ MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
+ if (llvm::next(I) == MRI->use_end() &&
+ I.getOperand().getParent()->isPHI()) {
+ MachineInstr *OnePhi = I.getOperand().getParent();
+
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (OPO.isReg() && OPO.isDef()) {
+ unsigned OPReg = OPO.getReg();
+
+ MachineRegisterInfo::use_iterator nextJ;
+ for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
+ E = MRI->use_end(); J!=E; J=nextJ) {
+ nextJ = llvm::next(J);
+ MachineOperand& Use = J.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+
+ if (MI != UseMI) {
+ // The phi node has a user that is not MI, bail...
+ return false;
+ }
+ }
+ }
+ }
+
+ DeadPhis.push_back(OnePhi);
+ } else {
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim
+
+ SmallVector<MachineInstr *, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "CTR looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ if (Use.isDebug()) // this might also be a instr -> phi -> instr case
+ // which can also be removed.
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i) {
+ DeadPhis[i]->eraseFromParent();
+ }
+ }
+}
+
+/// converToCTRLoop - check if the loop is a candidate for
+/// converting to a CTR loop. If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first. A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToCTRLoop(*I);
+ }
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed) {
+ return Changed;
+ }
+
+ SmallVector<MachineInstr *, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getTripCount(L, OldInsts);
+ if (TripCount == 0) {
+ DEBUG(dbgs() << "failed to get trip count!\n");
+ return false;
+ }
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L)) {
+ return false;
+ }
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ // No preheader means there's not place for the loop instr.
+ if (Preheader == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ DebugLoc dl;
+ if (InsertPos != Preheader->end())
+ dl = InsertPos->getDebugLoc();
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate CTR loop if the loop has more than one exit.
+ if (LastMBB == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the loop
+ // latch may contains instrs. that need to be executed after the 1st iter.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart)) {
+ return false;
+ }
+ }
+
+ // Convert the loop to a CTR loop
+ DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
+
+ MachineFunction *MF = LastMBB->getParent();
+ const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
+ bool isPPC64 = Subtarget.isPPC64();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+
+ unsigned CountReg;
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ const TargetRegisterClass *SrcRC =
+ MF->getRegInfo().getRegClass(TripCount->getReg());
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ (unsigned) PPC::EXTSW_32_64 :
+ (unsigned) TargetOpcode::COPY;
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
+ if (TripCount->isNeg()) {
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
+ CountReg).addReg(CountReg1);
+ }
+ } else {
+ assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+ // Put the trip count in a register for transfer into the count register.
+
+ int64_t CountImm = TripCount->getImm();
+ assert(!TripCount->isNeg() && "Constant trip count must be positive");
+
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ if (CountImm > 0xFFFF) {
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
+ CountReg).addImm(CountImm >> 16);
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
+ CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
+ } else {
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
+ CountReg).addImm(CountImm);
+ }
+ }
+
+ // Add the mtctr instruction to the beginning of the loop.
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
+ TripCount->isImm() ? RegState::Kill : 0);
+
+ // Make sure the loop start always has a reference in the CFG. We need to
+ // create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with a bdnz instruction.
+ dl = LastI->getDebugLoc();
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ if (MBB != Preheader)
+ MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
+ }
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ assert(LastI->getOpcode() == PPC::BCC &&
+ "loop end must start with a BCC instruction");
+ // Either the BCC branches to the beginning of the loop, or it
+ // branches out of the loop and there is an unconditional branch
+ // to the start of the loop.
+ MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
+ BuildMI(*LastMBB, LastI, dl,
+ TII->get((BranchTarget == LoopStart) ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
+
+ // Conditional branch; just delete it.
+ DEBUG(dbgs() << "Removing old branch: " << *LastI);
+ LastMBB->erase(LastI);
+
+ delete TripCount;
+
+ // The induction operation (add) and the comparison (cmpwi) may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
+ ++NumCTRLoops;
+ return true;
+}
+
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b77a80bbf30d..c24afa908d69 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -330,6 +330,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
if (HasFP)
+ // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
+ // offsets of R1 is not allowed.
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
.addReg(PPC::R31)
.addImm(FPOffset)
@@ -366,9 +368,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(PPC::R0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
@@ -381,9 +383,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(PPC::R0);
}
} else { // PPC64.
@@ -399,9 +401,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
.addReg(PPC::X0)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(PPC::X0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
@@ -414,9 +416,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
.addReg(PPC::X0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(PPC::X0);
}
}
@@ -492,7 +494,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
// subregisters of CR2. We just need to emit a move of CR2.
- if (PPC::CRBITRCRegisterClass->contains(Reg))
+ if (PPC::CRBITRCRegClass.contains(Reg))
continue;
MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
@@ -817,7 +819,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (PPC::GPRCRegisterClass->contains(Reg)) {
+ if (PPC::GPRCRegClass.contains(Reg)) {
HasGPSaveArea = true;
GPRegs.push_back(CSI[i]);
@@ -825,7 +827,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinGPR) {
MinGPR = Reg;
}
- } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+ } else if (PPC::G8RCRegClass.contains(Reg)) {
HasG8SaveArea = true;
G8Regs.push_back(CSI[i]);
@@ -833,7 +835,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinG8R) {
MinG8R = Reg;
}
- } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+ } else if (PPC::F8RCRegClass.contains(Reg)) {
HasFPSaveArea = true;
FPRegs.push_back(CSI[i]);
@@ -842,12 +844,12 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
MinFPR = Reg;
}
// FIXME SVR4: Disable CR save area for now.
- } else if (PPC::CRBITRCRegisterClass->contains(Reg)
- || PPC::CRRCRegisterClass->contains(Reg)) {
+ } else if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
// HasCRSaveArea = true;
- } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
HasVRSAVESaveArea = true;
- } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+ } else if (PPC::VRRCRegClass.contains(Reg)) {
HasVRSaveArea = true;
VRegs.push_back(CSI[i]);
@@ -932,8 +934,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (PPC::CRBITRCRegisterClass->contains(Reg) ||
- PPC::CRRCRegisterClass->contains(Reg)) {
+ if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -950,7 +952,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ if (PPC::VRSAVERCRegClass.contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 5a04888dd45b..a00f686adce1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -111,6 +111,23 @@ namespace {
/// immediate field. Because preinc imms have already been validated, just
/// accept it.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ N.getOpcode() == ISD::TargetGlobalAddress) {
+ Out = N;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
+ /// index field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
+ if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false;
+
Out = N;
return true;
}
@@ -238,11 +255,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
DebugLoc dl;
if (PPCLowering.getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
} else {
- GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
}
@@ -697,7 +714,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
- if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+ if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1)
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
else
@@ -833,7 +850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::MFCR: {
SDValue InFlag = N->getOperand(1);
// Use MFOCRF if supported.
- if (PPCSubTarget.isGigaProcessor())
+ if (PPCSubTarget.hasMFOCRF())
return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
N->getOperand(0), InFlag);
else
@@ -915,12 +932,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- // FIXME: PPC64
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops, 3);
} else {
- llvm_unreachable("R+R preindex loads not supported yet!");
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDUX; break;
+ case MVT::f32: Opcode = PPC::LFSUX; break;
+ case MVT::i32: Opcode = PPC::LWZUX; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
+ "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDUX; break;
+ case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 3b24951d1dc9..aa819eeb30a2 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -51,9 +51,11 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
-cl::desc("enable preincrement load/store generation on PPC (experimental)"),
- cl::Hidden);
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
@@ -64,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
setPow2DivIsCheap();
@@ -73,12 +76,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+ bool isPPC64 = Subtarget->isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
- addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
- addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
- addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+ addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -130,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
- setOperationAction(ISD::FMA , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ if (!Subtarget->hasFSQRT()) {
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
}
@@ -226,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (Subtarget->isSVR4ABI()) {
+ if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
@@ -271,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (Subtarget->has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -290,9 +294,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
- if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
- addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// 64-bit PowerPC wants to expand i128 shifts itself.
@@ -306,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ if (Subtarget->hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -370,12 +374,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
- addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -389,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
+ if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
@@ -398,7 +405,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
setExceptionSelectorRegister(PPC::X4);
@@ -415,7 +422,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BSWAP);
// Darwin long double math library functions have $LDBL128 appended.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -432,6 +439,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
+ if (isPPC64 && Subtarget->isJITCodeModel())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ setSupportJumpTables(false);
+
setInsertFencesForAtomic(true);
setSchedulingPreference(Sched::Hybrid);
@@ -902,10 +914,11 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
Base = N.getOperand(0);
@@ -1006,7 +1019,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
if (N.getOpcode() == ISD::ADD) {
short imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
} else {
@@ -1015,7 +1028,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
@@ -1084,8 +1097,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
- // Disabled by default for now.
- if (!EnablePPCPreinc) return false;
+ if (DisablePPCPreinc) return false;
SDValue Ptr;
EVT VT;
@@ -1103,7 +1115,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- // TODO: Check reg+reg first.
+ if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
// LDU/STU use reg+imm*4, others use reg+imm.
if (VT != MVT::i64) {
@@ -1222,6 +1237,30 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+ bool is64bit = PPCSubTarget.isPPC64();
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+
+ if (model != TLSModel::LocalExec)
+ llvm_unreachable("only local-exec TLS mode supported");
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+}
+
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
@@ -1440,13 +1479,16 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Entry.Node = Nest; Args.push_back(Entry);
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C,
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C,
/*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
@@ -1702,7 +1744,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
@@ -1721,19 +1763,19 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
case MVT::i32:
- RC = PPC::GPRCRegisterClass;
+ RC = &PPC::GPRCRegClass;
break;
case MVT::f32:
- RC = PPC::F4RCRegisterClass;
+ RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = PPC::F8RCRegisterClass;
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
- RC = PPC::VRRCRegisterClass;
+ RC = &PPC::VRRCRegClass;
break;
}
@@ -1763,7 +1805,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2743,7 +2785,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
@@ -2800,7 +2842,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
for (unsigned i = 0; i != RVLocs.size(); ++i)
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
@@ -2864,14 +2906,19 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
}
SDValue
-PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
@@ -2921,7 +2968,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
@@ -2961,7 +3008,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -3485,7 +3532,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
// If this is the first return lowered for this function, add the regs to the
@@ -4559,7 +4606,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
@@ -4899,11 +4946,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_CC_F4 ||
- MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+ if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
+ PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // The SelectPred is ((BI << 5) | BO) for a BCC
+ unsigned BO = SelectPred & 0xF;
+ assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
+
+ unsigned TrueOpNo, FalseOpNo;
+ if (BO == 12) {
+ TrueOpNo = 2;
+ FalseOpNo = 3;
+ } else {
+ TrueOpNo = 3;
+ FalseOpNo = 2;
+ SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ }
+
+ BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(TrueOpNo).getReg())
+ .addReg(MI->getOperand(FalseOpNo).getReg())
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
@@ -5612,18 +5685,18 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'b': // R1-R31
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
- return std::make_pair(0U, PPC::G8RCRegisterClass);
- return std::make_pair(0U, PPC::GPRCRegisterClass);
+ return std::make_pair(0U, &PPC::G8RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, PPC::F4RCRegisterClass);
- else if (VT == MVT::f64)
- return std::make_pair(0U, PPC::F8RCRegisterClass);
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
break;
case 'v':
- return std::make_pair(0U, PPC::VRRCRegisterClass);
+ return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
- return std::make_pair(0U, PPC::CRRCRegisterClass);
+ return std::make_pair(0U, &PPC::CRRCRegClass);
}
}
@@ -5839,11 +5912,30 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+/// is expanded to mul + add.
+bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- unsigned Directive = PPCSubTarget.getDarwinDirective();
- if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2)
- return Sched::ILP;
+ if (DisableILPPref)
+ return TargetLowering::getSchedulingPreference(N);
- return TargetLowering::getSchedulingPreference(N);
+ return Sched::ILP;
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 18eb07200307..b0a013b4b4cf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -366,6 +366,12 @@ namespace llvm {
bool IsZeroVal, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -389,6 +395,7 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -439,12 +446,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual bool
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 7f67a4159dfe..39778a5dc1e1 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -68,15 +68,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA8_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
}
let Uses = [CTR8, RM] in {
def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
}
@@ -88,27 +88,27 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
let isCodeGenOnly = 1 in
def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
def BLA8_ELF : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
let isCodeGenOnly = 1 in
def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func\n\tnop", BrB,
[(PPCcall_nop_SVR4 (i64 imm:$func))]>;
}
let Uses = [X11, CTR8, RM] in {
def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
}
@@ -180,17 +180,17 @@ def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi8 :Pseudo< (outs),
- (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ (ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd8 $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
"#TC_RETURNa8 $func $offset",
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
@@ -229,6 +229,15 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst", BrB, []>;
+ def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst", BrB, []>;
+ }
+}
+
// 64-but CR instructions
def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
@@ -256,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+ "mfspr $rT, 268", SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
[(set G8RC:$result,
@@ -278,45 +296,37 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
-// Copies, extends, truncates.
-def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
- []>;
-def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
- []>;
-
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
- "li $rD, $imm", IntGeneral,
+ "li $rD, $imm", IntSimple,
[(set G8RC:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
- "lis $rD, $imm", IntGeneral,
+ "lis $rD, $imm", IntSimple,
[(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "nand $rA, $rS, $rB", IntGeneral,
+ "nand $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "and $rA, $rS, $rB", IntGeneral,
+ "and $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "andc $rA, $rS, $rB", IntGeneral,
+ "andc $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
+ "or $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "nor $rA, $rS, $rB", IntGeneral,
+ "nor $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "orc $rA, $rS, $rB", IntGeneral,
+ "orc $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "eqv $rA, $rS, $rB", IntGeneral,
+ "eqv $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "xor $rA, $rS, $rB", IntGeneral,
+ "xor $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
// Logical ops with immediate.
@@ -329,20 +339,20 @@ def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
[(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntGeneral,
+ "ori $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntGeneral,
+ "oris $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntGeneral,
+ "xori $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntGeneral,
+ "xoris $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "add $rT, $rA, $rB", IntGeneral,
+ "add $rT, $rA, $rB", IntSimple,
[(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
let Defs = [CARRY] in {
@@ -355,10 +365,13 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
[(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
}
def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntGeneral,
+ "addi $rD, $rA, $imm", IntSimple,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+ "addi $rD, $rA, $imm", IntSimple,
[(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
- "addis $rD, $rA, $imm", IntGeneral,
+ "addis $rD, $rA, $imm", IntSimple,
[(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
@@ -374,7 +387,7 @@ def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
[(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "neg $rT, $rA", IntGeneral,
+ "neg $rT, $rA", IntSimple,
[(set G8RC:$rT, (ineg G8RC:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
@@ -427,21 +440,21 @@ def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
}
def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsb $rA, $rS", IntGeneral,
+ "extsb $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsh $rA, $rS", IntGeneral,
+ "extsh $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsw $rA, $rS", IntGeneral,
+ "extsw $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntGeneral,
+ "extsw $rA, $rS", IntSimple,
[(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntGeneral,
+ "extsw $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
let Defs = [CARRY] in {
@@ -493,6 +506,10 @@ def RLWINM8 : MForm_2<21,
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>;
+def ISEL8 : AForm_1<31, 15,
+ (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
} // End FXU Operations.
@@ -529,6 +546,16 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp
NoEncode<"$ea_result">;
// NO LWAU!
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
}
// Zero extending loads.
@@ -568,6 +595,22 @@ def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
+
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
}
}
@@ -603,6 +646,11 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "ldux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
}
def : Pat<(PPCload ixaddr:$src),
@@ -660,6 +708,14 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
"stdu $rS, $ptroff($ptrreg)", LdStSTD,
@@ -668,10 +724,41 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
-let mayStore = 1 in
-def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
- "stdux $rS, $dst", LdStSTD,
- []>, isPPC64;
+
+def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti32 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stdux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
@@ -706,11 +793,12 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
// Extensions and truncates to/from 32-bit regs.
def : Pat<(i64 (zext GPRC:$in)),
- (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+ 0, 32)>;
def : Pat<(i64 (anyext GPRC:$in)),
- (OR4To8 GPRC:$in, GPRC:$in)>;
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
def : Pat<(i32 (trunc G8RC:$in)),
- (OR8To4 G8RC:$in, G8RC:$in)>;
+ (EXTRACT_SUBREG G8RC:$in, sub_32)>;
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
@@ -765,6 +853,10 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
+ (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
+ (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
(ADDIS8 G8RC:$in, tglobaladdr:$g)>;
def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 6c0f3d3f06e5..b0b842328196 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB))]>,
- Requires<[FPContractions]>;
+ [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fsub V_immneg0,
- (fsub (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB)))]>,
- Requires<[FPContractions]>;
+ [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
+ (fneg VRRC:$vB))))]>;
def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index d8e4b2bdf34a..a41a0279d215 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -94,6 +94,12 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
let Inst{31} = lk;
}
+class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> {
+ let LI{0-4} = bo;
+}
+
// 1.7.2 B-Form
class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
: I<opcode, OOL, IOL, asmstr, BrB> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index b45ada9db32a..47f09dca77d3 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -40,6 +40,10 @@ extern cl::opt<bool> DisablePPC64RS;
using namespace llvm;
+static cl::
+opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
+ cl::desc("Disable analysis for CTR loops"));
+
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
@@ -75,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
return new PPCScoreboardHazardRecognizer(II, DAG);
}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -186,10 +206,14 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
// Branch analysis.
+// Note: If the condition register is set to CTR or CTR8 then this is a
+// BDNZ (imm == 1) or BDZ (imm == 0) branch.
bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
@@ -221,7 +245,30 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
+ } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
+ LastInst->getOpcode() == PPC::BDNZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDZ8 ||
+ LastInst->getOpcode() == PPC::BDZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
}
+
// Otherwise, don't know what this is.
return true;
}
@@ -245,6 +292,34 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDNZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
}
// If the block ends with two PPC:Bs, handle it. The second one is not
@@ -273,7 +348,9 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 0;
--I;
}
- if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 0;
// Remove the branch.
@@ -283,7 +360,9 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I == MBB.begin()) return 1;
--I;
- if (I->getOpcode() != PPC::BCC)
+ if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 1;
// Remove the branch.
@@ -301,10 +380,16 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
@@ -312,8 +397,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
// Two-way Conditional Branch.
- BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -354,7 +444,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const{
DebugLoc DL;
- if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(SrcReg,
@@ -370,7 +460,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
.addReg(SrcReg,
@@ -386,17 +476,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
(!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
@@ -438,7 +528,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
// not cause any bug. If we need other uses of CR bits, the following
@@ -470,9 +560,9 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- PPC::CRRCRegisterClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs);
- } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// STVX VAL, 0, R0
@@ -522,7 +612,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs)const{
- if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
DestReg), FrameIdx));
@@ -531,7 +621,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::R11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
}
- } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
FrameIdx));
@@ -540,13 +630,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::X11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
}
- } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
- } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
- } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
(!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
@@ -578,7 +668,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::MTCRF8 : PPC::MTCRF), DestReg)
.addReg(ScratchReg));
}
- } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
@@ -607,9 +697,9 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- PPC::CRRCRegisterClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs);
- } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// Dest = LVX 0, R0
@@ -665,8 +755,11 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
- // Leave the CR# the same, but invert the condition.
- Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
+ Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
+ else
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
return false;
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7d49aa129e36..374213ea435b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -92,6 +92,9 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const;
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 748486c1ca26..f57f0c975ad6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -323,7 +323,7 @@ def memri : Operand<iPTR> {
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
+ let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
@@ -349,10 +349,10 @@ def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
@@ -438,6 +438,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc} ${cond:reg}, $dst"
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+
+ let Defs = [CTR], Uses = [CTR] in {
+ def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst", BrB, []>;
+ def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst", BrB, []>;
+ }
}
// Darwin ABI Calls.
@@ -445,15 +452,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
}
@@ -464,16 +471,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_SVR4 : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA_SVR4 : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB,
[(PPCcall_SVR4 (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
}
@@ -482,18 +489,18 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :Pseudo< (outs),
- (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ (ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
@@ -704,6 +711,44 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
"lfd $rD, $addr", LdStLFD,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
+
+
+// Indexed (r+r) Loads with Update (preinc).
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lfsux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lfdux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
}
}
@@ -815,12 +860,49 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
[(store GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-
-let mayStore = 1 in {
-def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
- "stwux $rS, $rA, $rB", LdStStore,
- []>;
-}
+
+def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
+ (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stfsux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
+ (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stfdux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
@@ -852,7 +934,10 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IntGeneral,
+ "addi $rD, $rA, $imm", IntSimple,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+ "addi $rD, $rA, $imm", IntSimple,
[(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
@@ -864,7 +949,7 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
[]>;
}
def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
- "addis $rD, $rA, $imm", IntGeneral,
+ "addis $rD, $rA, $imm", IntSimple,
[(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
@@ -881,10 +966,10 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
let isReMaterializable = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
- "li $rD, $imm", IntGeneral,
+ "li $rD, $imm", IntSimple,
[(set GPRC:$rD, immSExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
- "lis $rD, $imm", IntGeneral,
+ "lis $rD, $imm", IntSimple,
[(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
}
}
@@ -899,18 +984,18 @@ def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
[(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntGeneral,
+ "ori $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntGeneral,
+ "oris $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntGeneral,
+ "xori $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntGeneral,
+ "xoris $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
-def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
"cmpwi $crD, $rA, $imm", IntCompare>;
@@ -921,28 +1006,28 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
let PPC970_Unit = 1 in { // FXU Operations.
def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "nand $rA, $rS, $rB", IntGeneral,
+ "nand $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "and $rA, $rS, $rB", IntGeneral,
+ "and $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "andc $rA, $rS, $rB", IntGeneral,
+ "andc $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
+ "or $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "nor $rA, $rS, $rB", IntGeneral,
+ "nor $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "orc $rA, $rS, $rB", IntGeneral,
+ "orc $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "eqv $rA, $rS, $rB", IntGeneral,
+ "eqv $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "xor $rA, $rS, $rB", IntGeneral,
+ "xor $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"slw $rA, $rS, $rB", IntGeneral,
@@ -967,10 +1052,10 @@ def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
"cntlzw $rA, $rS", IntGeneral,
[(set GPRC:$rA, (ctlz GPRC:$rS))]>;
def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsb $rA, $rS", IntGeneral,
+ "extsb $rA, $rS", IntSimple,
[(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsh $rA, $rS", IntGeneral,
+ "extsh $rA, $rS", IntSimple,
[(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
@@ -1115,7 +1200,7 @@ def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
PPC970_MicroCode, PPC970_Unit_CRU;
def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
- "mfcr $rT, $FXM", SprMFCR>,
+ "mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
// Instructions to manipulate FPSCR. Only long double handling uses these.
@@ -1159,7 +1244,7 @@ let PPC970_Unit = 1 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
//
def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "add $rT, $rA, $rB", IntGeneral,
+ "add $rT, $rA, $rB", IntSimple,
[(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
let Defs = [CARRY] in {
def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
@@ -1194,7 +1279,7 @@ def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
PPC970_DGroup_Cracked;
}
def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "neg $rT, $rA", IntGeneral,
+ "neg $rT, $rA", IntSimple,
[(set GPRC:$rT, (ineg GPRC:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
@@ -1226,51 +1311,43 @@ let Uses = [RM] in {
def FMADD : AForm_1<63, 29,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
def FMADDS : AForm_1<59, 29,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
def FMSUB : AForm_1<63, 28,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
def FMSUBS : AForm_1<59, 28,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
def FNMADD : AForm_1<63, 31,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
def FNMADDS : AForm_1<59, 31,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
def FNMSUB : AForm_1<63, 30,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
+ (fneg F8RC:$FRB))))]>;
def FNMSUBS : AForm_1<59, 30,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
+ (fneg F4RC:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1321,6 +1398,13 @@ let Uses = [RM] in {
}
let PPC970_Unit = 1 in { // FXU Operations.
+ def ISEL : AForm_1<31, 15,
+ (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
// M-Form instructions. rotate and mask instructions.
//
let isCommutable = 1 in {
@@ -1418,6 +1502,10 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
+ (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
+ (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
(ADDIS GPRC:$in, tglobaladdr:$g)>;
def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
@@ -1427,14 +1515,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS GPRC:$in, tblockaddress:$g)>;
-// Fused negative multiply subtract, alternate pattern
-def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
- (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
- Requires<[FPContractions]>;
-def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
- (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
- Requires<[FPContractions]>;
-
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index a6528c0d7030..aba27399d6da 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -210,7 +210,7 @@ asm(
".text\n"
".align 2\n"
".globl PPC64CompilationCallback\n"
- ".section \".opd\",\"aw\"\n"
+ ".section \".opd\",\"aw\",@progbits\n"
".align 3\n"
"PPC64CompilationCallback:\n"
".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 276edcb69d19..19ec993ba00f 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -99,10 +99,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
- if (MO.getTargetFlags() & PPCII::MO_LO16)
- RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16;
- else if (MO.getTargetFlags() & PPCII::MO_HA16)
- RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16;
+ unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
+
+ switch (access) {
+ case PPCII::MO_HA16: RefKind = isDarwin ?
+ MCSymbolRefExpr::VK_PPC_DARWIN_HA16 :
+ MCSymbolRefExpr::VK_PPC_GAS_HA16;
+ break;
+ case PPCII::MO_LO16: RefKind = isDarwin ?
+ MCSymbolRefExpr::VK_PPC_DARWIN_LO16 :
+ MCSymbolRefExpr::VK_PPC_GAS_LO16;
+ break;
+ case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA;
+ break;
+ case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
+ break;
+ }
// FIXME: This isn't right, but we don't have a good way to express this in
// the MC Level, see below.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ef1357137def..ab8bf1f93a37 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -89,10 +89,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
}
+bool
+PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return requiresRegisterScavenging(MF);
+}
+
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
-PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
+PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -192,6 +199,20 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
+bool
+PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ switch (RC->getID()) {
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID:
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ return true;
+ default:
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -321,14 +342,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// address of new allocated space.
if (LP64) {
if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(Reg, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(MI.getOperand(1).getReg());
else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
@@ -342,9 +363,9 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
.addImm(maxCallFrameSize)
.addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
} else {
- BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(Reg, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index b1e6a7218ee7..152c36d699ec 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -35,7 +35,8 @@ public:
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
- virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
+ virtual const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
@@ -46,10 +47,14 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
/// requiresRegisterScavenging - We require a register scavenger.
/// FIXME (64-bit): Should be inlined.
bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 0e55313b135f..5ca387629b6c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -314,12 +314,18 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
}
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
- CR7, CR2, CR3, CR4)> {
- let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
+ CR7, CR2, CR3, CR4)>;
+
+// The CTR registers are not allocatable because they're used by the
+// decrement-and-branch instructions, and thus need to stay live across
+// multiple basic blocks.
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> {
+ let isAllocatable = 0;
+}
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
+ let isAllocatable = 0;
}
-def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
-def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
let CopyCost = -1;
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 8c0a8589052a..6a6ccb9d9852 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -25,6 +25,7 @@ def VFPU : FuncUnit; // vector floating point unit
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for PowerPC
//
+def IntSimple : InstrItinClass;
def IntGeneral : InstrItinClass;
def IntCompare : InstrItinClass;
def IntDivD : InstrItinClass;
@@ -117,17 +118,17 @@ include "PPCScheduleA2.td"
//
// opcode itinerary class
// ====== ===============
-// add IntGeneral
+// add IntSimple
// addc IntGeneral
// adde IntGeneral
-// addi IntGeneral
+// addi IntSimple
// addic IntGeneral
// addic. IntGeneral
-// addis IntGeneral
+// addis IntSimple
// addme IntGeneral
// addze IntGeneral
-// and IntGeneral
-// andc IntGeneral
+// and IntSimple
+// andc IntSimple
// andi. IntGeneral
// andis. IntGeneral
// b BrB
@@ -165,10 +166,10 @@ include "PPCScheduleA2.td"
// eciwx LdStLoad
// ecowx LdStLoad
// eieio LdStLoad
-// eqv IntGeneral
-// extsb IntGeneral
-// extsh IntGeneral
-// extsw IntRotateD
+// eqv IntSimple
+// extsb IntSimple
+// extsh IntSimple
+// extsw IntSimple
// fabs FPGeneral
// fadd FPGeneral
// fadds FPGeneral
@@ -280,13 +281,13 @@ include "PPCScheduleA2.td"
// mulld IntMulHD
// mulli IntMulLI
// mullw IntMulHW
-// nand IntGeneral
-// neg IntGeneral
-// nor IntGeneral
-// or IntGeneral
-// orc IntGeneral
-// ori IntGeneral
-// oris IntGeneral
+// nand IntSimple
+// neg IntSimple
+// nor IntSimple
+// or IntSimple
+// orc IntSimple
+// ori IntSimple
+// oris IntSimple
// rfi SprRFI
// rfid IntRFID
// rldcl IntRotateD
@@ -502,7 +503,7 @@ include "PPCScheduleA2.td"
// vupklsb VecPerm
// vupklsh VecPerm
// vxor VecGeneral
-// xor IntGeneral
-// xori IntGeneral
-// xoris IntGeneral
+// xor IntSimple
+// xori IntSimple
+// xoris IntSimple
//
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index 419faea30220..cd0fb70a24bd 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -108,6 +108,15 @@ def PPC440Itineraries : ProcessorItineraries<
IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
[GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -373,26 +382,6 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 857ba40ff622..4d4a5d0e1b2f 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -60,6 +60,17 @@ def PPCA2Itineraries : ProcessorItineraries<
IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntGeneral , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -159,6 +170,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -181,6 +203,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<BrB , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -269,6 +302,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStStore , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -379,28 +423,6 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[26, 7],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTWCX , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index bc926f7bb2b6..61e89ed32c20 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -14,6 +14,7 @@
def G3Itineraries : ProcessorItineraries<
[IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index f7ec1e01333e..e19ddfa80ea3 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -13,6 +13,7 @@
def G4Itineraries : ProcessorItineraries<
[IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 37ebfc59880b..e7446cb028a3 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -16,6 +16,7 @@ def IU4 : FuncUnit; // integer unit 4 (7450 simple)
def G4PlusItineraries : ProcessorItineraries<
[IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index d1e40cef9639..137149972680 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -13,6 +13,7 @@
def G5Itineraries : ProcessorItineraries<
[IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntSimple , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index f405b4711a52..bb193ac3d9ef 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -16,6 +16,7 @@
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include <cstdlib>
@@ -25,56 +26,19 @@
using namespace llvm;
-#if defined(__APPLE__)
-#include <mach/mach.h>
-#include <mach/mach_host.h>
-#include <mach/host_info.h>
-#include <mach/machine.h>
-
-/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
-static const char *GetCurrentPowerPCCPU() {
- host_basic_info_data_t hostInfo;
- mach_msg_type_number_t infoCount;
-
- infoCount = HOST_BASIC_INFO_COUNT;
- host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
- &infoCount);
-
- if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
-
- switch(hostInfo.cpu_subtype) {
- case CPU_SUBTYPE_POWERPC_601: return "601";
- case CPU_SUBTYPE_POWERPC_602: return "602";
- case CPU_SUBTYPE_POWERPC_603: return "603";
- case CPU_SUBTYPE_POWERPC_603e: return "603e";
- case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
- case CPU_SUBTYPE_POWERPC_604: return "604";
- case CPU_SUBTYPE_POWERPC_604e: return "604e";
- case CPU_SUBTYPE_POWERPC_620: return "620";
- case CPU_SUBTYPE_POWERPC_750: return "750";
- case CPU_SUBTYPE_POWERPC_7400: return "7400";
- case CPU_SUBTYPE_POWERPC_7450: return "7450";
- case CPU_SUBTYPE_POWERPC_970: return "970";
- default: ;
- }
-
- return "generic";
-}
-#endif
-
-
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: PPCGenSubtargetInfo(TT, CPU, FS)
, StackAlignment(16)
, DarwinDirective(PPC::DIR_NONE)
- , IsGigaProcessor(false)
+ , HasMFOCRF(false)
, Has64BitSupport(false)
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , HasISEL(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
@@ -84,9 +48,10 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
-#if defined(__APPLE__)
+#if (defined(__APPLE__) || defined(__linux__)) && \
+ (defined(__ppc__) || defined(__powerpc__))
if (CPUName == "generic")
- CPUName = GetCurrentPowerPCCPU();
+ CPUName = sys::getHostCPUName();
#endif
// Parse features string.
@@ -146,10 +111,14 @@ bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2)
- Mode = TargetSubtargetInfo::ANTIDEP_ALL;
- else
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
+ // but we can't because we can't reassign the cr registers. There is a
+ // dependence between the cr register and the RLWINM instruction used
+ // to extract its value which the anti-dependency breaker can't currently
+ // see. Maybe we should make a late-expanded pseudo to encode this dependency.
+ // (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
+
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
@@ -157,6 +126,9 @@ bool PPCSubtarget::enablePostRAScheduler(
CriticalPathRCs.push_back(&PPC::G8RCRegClass);
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+ CriticalPathRCs.push_back(&PPC::F8RCRegClass);
+ CriticalPathRCs.push_back(&PPC::VRRCRegClass);
return OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index a275029d3e5d..0207c833938b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -41,6 +41,8 @@ namespace PPC {
DIR_750,
DIR_970,
DIR_A2,
+ DIR_PWR6,
+ DIR_PWR7,
DIR_64
};
}
@@ -61,13 +63,14 @@ protected:
unsigned DarwinDirective;
/// Used by the ISel to turn in optimizations for POWER4-derived architectures
- bool IsGigaProcessor;
+ bool HasMFOCRF;
bool Has64BitSupport;
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
bool HasFSQRT;
bool HasSTFIWX;
+ bool HasISEL;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -138,7 +141,8 @@ public:
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
- bool isGigaProcessor() const { return IsGigaProcessor; }
+ bool hasMFOCRF() const { return HasMFOCRF; }
+ bool hasISEL() const { return HasISEL; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 50f3db8b27fd..980511268a31 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -17,10 +17,15 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+static cl::
+opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
+ cl::desc("Disable CTR loops for PPC"));
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -81,41 +86,37 @@ public:
return getTM<PPCTargetMachine>();
}
+ virtual bool addPreRegAlloc();
virtual bool addInstSelector();
virtual bool addPreEmitPass();
};
} // namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
- TargetPassConfig *PassConfig = new PPCPassConfig(this, PM);
+ return new PPCPassConfig(this, PM);
+}
- // Override this for PowerPC. Tail merging happily breaks up instruction issue
- // groups, which typically degrades performance.
- PassConfig->setEnableTailMerge(false);
+bool PPCPassConfig::addPreRegAlloc() {
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoops());
- return PassConfig;
+ return false;
}
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createPPCISelDag(getPPCTargetMachine()));
+ addPass(createPPCISelDag(getPPCTargetMachine()));
return false;
}
bool PPCPassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- PM->add(createPPCBranchSelectionPass());
+ addPass(createPPCBranchSelectionPass());
return false;
}
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
- // FIXME: This should be moved to TargetJITInfo!!
- if (Subtarget.isPPC64())
- // Temporary workaround for the inability of PPC64 JIT to handle jump
- // tables.
- Options.DisableJumpTables = true;
-
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
Subtarget.SetJITMode();
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 349cd890d5ee..b6763aa73802 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -2,7 +2,6 @@
TODO:
* gpr0 allocation
-* implement do-loop -> bdnz transform
* lmw/stmw pass a la arm load store optimizer for prolog/epilog
===-------------------------------------------------------------------------===
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
index a101aa4a4495..2d0560d275f9 100644
--- a/lib/Target/PowerPC/TargetInfo/Makefile
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
LIBRARYNAME = LLVMPowerPCInfo
# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 093255e6af2d..cbfa4cf35ba2 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -964,6 +964,12 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
//===---------------------------------------------------------------------===//
+unsigned f(unsigned x) { return ((x & 7) + 1) & 15; }
+The & 15 part should be optimized away, it doesn't change the result. Currently
+not optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
This was noticed in the entryblock for grokdeclarator in 403.gcc:
%tmp = icmp eq i32 %decl_context, 4
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index ae4af0f44250..efb10db4c0a7 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -23,5 +23,7 @@ add_llvm_target(SparcCodeGen
SparcSelectionDAGInfo.cpp
)
+add_dependencies(LLVMSparcCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 883aa3a497c4..7bf8c3f85eca 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -279,14 +279,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
//returns true if the Reg or its alias is in the RegSet.
bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
{
- if (RegSet.count(Reg))
- return true;
- // check Aliased Registers
- for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
- *Alias; ++ Alias)
- if (RegSet.count(*Alias))
+ // Check Reg and all aliased Registers.
+ for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
+ AI.isValid(); ++AI)
+ if (RegSet.count(*AI))
return true;
-
return false;
}
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index c14b3d4a0065..25548625e760 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -187,7 +187,9 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'r':
break;
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 210705e2d47a..6b593c95bb10 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -22,10 +22,9 @@ namespace llvm {
class SparcSubtarget;
class SparcFrameLowering : public TargetFrameLowering {
- const SparcSubtarget &STI;
public:
- explicit SparcFrameLowering(const SparcSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+ explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index c3e6f1606794..79f7ebd82dee 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -90,7 +90,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ DAG.getTarget(), RVLocs, *DAG.getContext());
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
@@ -160,7 +160,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
const unsigned StackOffset = 92;
@@ -345,21 +345,26 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
}
SDValue
-SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
// Sparc target does not yet support tail call optimization.
isTailCall = false;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
// Get the size of the outgoing arguments stack space requirement.
@@ -590,7 +595,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ DAG.getTarget(), RVLocs, *DAG.getContext());
RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
@@ -689,9 +694,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
- addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
- addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
- addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+ addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
+ addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
+ addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
// Turn FP extload into load/fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -1259,7 +1264,7 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- return std::make_pair(0U, SP::IntRegsRegisterClass);
+ return std::make_pair(0U, &SP::IntRegsRegClass);
}
}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index cf430485cfec..09148ea54027 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -76,12 +76,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index faff468a587d..f8674d0bd660 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -303,13 +303,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == SP::IntRegsRegisterClass)
+ if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill));
- else if (RC == SP::FPRegsRegisterClass)
+ else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill));
- else if (RC == SP::DFPRegsRegisterClass)
+ else if (RC == &SP::DFPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill));
else
@@ -324,11 +324,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == SP::IntRegsRegisterClass)
+ if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
- else if (RC == SP::FPRegsRegisterClass)
+ else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
- else if (RC == SP::DFPRegsRegisterClass)
+ else if (RC == &SP::DFPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
else
llvm_unreachable("Can't load this register from stack slot");
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 63574681b085..ff8d3c533f3d 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void SparcRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index cc253077a980..9ee12ed7f579 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -34,7 +34,8 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
- TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+ InstrInfo(Subtarget),
+ TLInfo(*this), TSInfo(*this),
FrameLowering(Subtarget) {
}
@@ -59,7 +60,7 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool SparcPassConfig::addInstSelector() {
- PM->add(createSparcISelDag(getSparcTargetMachine()));
+ addPass(createSparcISelDag(getSparcTargetMachine()));
return false;
}
@@ -67,8 +68,8 @@ bool SparcPassConfig::addInstSelector() {
/// passes immediately before machine code is emitted. This should return
/// true if -print-machineinstrs should print out the code after the passes.
bool SparcPassConfig::addPreEmitPass(){
- PM->add(createSparcFPMoverPass(getSparcTargetMachine()));
- PM->add(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
+ addPass(createSparcFPMoverPass(getSparcTargetMachine()));
+ addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
return true;
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index b203dfa48921..b2cc624e454c 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -28,9 +28,9 @@ namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
SparcSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
+ SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
- SparcInstrInfo InstrInfo;
SparcFrameLowering FrameLowering;
public:
SparcTargetMachine(const Target &T, StringRef TT,
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index acb74765c193..cc6dc1e25998 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -117,8 +117,8 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
&& TypeBitWidth == rhs.TypeBitWidth);
}
-const TargetAlignElem TargetData::InvalidAlignmentElem =
- TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+const TargetAlignElem
+TargetData::InvalidAlignmentElem = { (AlignTypeEnum)0xFF, 0, 0, 0 };
//===----------------------------------------------------------------------===//
// TargetData Class Implementation
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 440f9ad00de9..f1d1d07c38ae 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -21,20 +21,25 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// TargetInstrInfo
-//===----------------------------------------------------------------------===//
+//
+// Methods that depend on CodeGen are implemented in
+// TargetInstrInfoImpl.cpp. Invoking them without linking libCodeGen raises a
+// link error.
+// ===----------------------------------------------------------------------===//
TargetInstrInfo::~TargetInstrInfo() {
}
const TargetRegisterClass*
TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
if (OpNum >= MCID.getNumOperands())
return 0;
short RegClass = MCID.OpInfo[OpNum].RegClass;
if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
- return TRI->getPointerRegClass(RegClass);
+ return TRI->getPointerRegClass(MF, RegClass);
// Instructions like INSERT_SUBREG do not have fixed register classes.
if (RegClass < 0)
@@ -44,54 +49,6 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
return TRI->getRegClass(RegClass);
}
-unsigned
-TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
- const MachineInstr *MI) const {
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
- unsigned Class = MI->getDesc().getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (UOps)
- return UOps;
-
- // The # of u-ops is dynamically determined. The specific target should
- // override this function to return the right number.
- return 1;
-}
-
-int
-TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- if (!ItinData || ItinData->isEmpty())
- return -1;
-
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- unsigned UseClass = UseMI->getDesc().getSchedClass();
- return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
- return ItinData->getStageLatency(MI->getDesc().getSchedClass());
-}
-
-bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI,
- unsigned DefIdx) const {
- if (!ItinData || ItinData->isEmpty())
- return false;
-
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
- return (DefCycle != -1 && DefCycle <= 1);
-}
-
/// insertNoop - Insert a noop into the instruction stream at the specified
/// point.
void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -99,7 +56,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
llvm_unreachable("Target didn't implement insertNoop!");
}
-
/// Measure the specified inline asm to determine an approximation of its
/// length.
/// Comments (which run till the next SeparatorString or newline) do not
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index ec95ad4deeab..8e215a763721 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { }
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "__cxa_atexit",
+ "__cxa_guard_abort",
+ "__cxa_guard_acquire",
+ "__cxa_guard_release",
+ "__memcpy_chk",
"acos",
- "acosl",
"acosf",
+ "acosl",
"asin",
- "asinl",
"asinf",
+ "asinl",
"atan",
- "atanl",
- "atanf",
"atan2",
- "atan2l",
"atan2f",
+ "atan2l",
+ "atanf",
+ "atanl",
"ceil",
- "ceill",
"ceilf",
+ "ceill",
"copysign",
"copysignf",
"copysignl",
"cos",
- "cosl",
"cosf",
"cosh",
- "coshl",
"coshf",
+ "coshl",
+ "cosl",
"exp",
- "expl",
- "expf",
"exp2",
- "exp2l",
"exp2f",
+ "exp2l",
+ "expf",
+ "expl",
"expm1",
- "expm1l",
"expm1f",
+ "expm1l",
"fabs",
- "fabsl",
"fabsf",
+ "fabsl",
+ "fiprintf",
"floor",
- "floorl",
"floorf",
- "fiprintf",
+ "floorl",
"fmod",
- "fmodl",
"fmodf",
+ "fmodl",
+ "fputc",
"fputs",
"fwrite",
"iprintf",
"log",
- "logl",
- "logf",
- "log2",
- "log2l",
- "log2f",
"log10",
- "log10l",
"log10f",
+ "log10l",
"log1p",
- "log1pl",
"log1pf",
+ "log1pl",
+ "log2",
+ "log2f",
+ "log2l",
+ "logf",
+ "logl",
+ "memchr",
+ "memcmp",
"memcpy",
"memmove",
"memset",
@@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"pow",
"powf",
"powl",
+ "putchar",
+ "puts",
"rint",
"rintf",
"rintl",
@@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"roundf",
"roundl",
"sin",
- "sinl",
"sinf",
"sinh",
- "sinhl",
"sinhf",
+ "sinhl",
+ "sinl",
"siprintf",
"sqrt",
- "sqrtl",
"sqrtf",
+ "sqrtl",
+ "strcat",
+ "strchr",
+ "strcpy",
+ "strlen",
+ "strncat",
+ "strncmp",
+ "strncpy",
+ "strnlen",
"tan",
- "tanl",
"tanf",
"tanh",
- "tanhl",
"tanhf",
+ "tanhl",
+ "tanl",
"trunc",
"truncf",
- "truncl",
- "__cxa_atexit",
- "__cxa_guard_abort",
- "__cxa_guard_acquire",
- "__cxa_guard_release"
+ "truncl"
};
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
-static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+static void initialize(TargetLibraryInfo &TLI, const Triple &T,
+ const char **StandardNames) {
initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+#ifndef NDEBUG
+ // Verify that the StandardNames array is in alphabetical order.
+ for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+ if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+ llvm_unreachable("TargetLibraryInfo function names must be sorted");
+ }
+#endif // !NDEBUG
// memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
if (T.isMacOSX()) {
@@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
// Default to everything being available.
memset(AvailableArray, -1, sizeof(AvailableArray));
- initialize(*this, Triple());
+ initialize(*this, Triple(), StandardNames);
}
TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
// Default to everything being available.
memset(AvailableArray, -1, sizeof(AvailableArray));
- initialize(*this, T);
+ initialize(*this, T, StandardNames);
}
TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
@@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
CustomNames = TLI.CustomNames;
}
+bool TargetLibraryInfo::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char **Start = &StandardNames[0];
+ const char **End = &StandardNames[LibFunc::NumLibFuncs];
+ const char **I = std::lower_bound(Start, End, funcName);
+ if (I != End && *I == funcName) {
+ F = (LibFunc::Func)(I - Start);
+ return true;
+ }
+ return false;
+}
/// disableAllFunctions - This disables all builtins, which is used for options
/// like -fno-builtin.
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 2570e0d0972b..b74a0bd25d72 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -152,7 +152,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// a mergable string section, or general .data if it contains relocations.
if (GVar->isConstant()) {
// If the initializer for the global contains something that requires a
- // relocation, then we may have to drop this into a wriable data section
+ // relocation, then we may have to drop this into a writable data section
// even though it is marked const.
switch (C->getRelocationInfo()) {
case Constant::NoRelocation:
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index b9b2526876fd..382571982b96 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,7 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/GlobalAlias.h"
#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -75,25 +77,58 @@ CodeModel::Model TargetMachine::getCodeModel() const {
return CodeGenInfo->getCodeModel();
}
+/// Get the IR-specified TLS model for Var.
+static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) {
+ switch (Var->getThreadLocalMode()) {
+ case GlobalVariable::NotThreadLocal:
+ llvm_unreachable("getSelectedTLSModel for non-TLS variable");
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ return TLSModel::GeneralDynamic;
+ case GlobalVariable::LocalDynamicTLSModel:
+ return TLSModel::LocalDynamic;
+ case GlobalVariable::InitialExecTLSModel:
+ return TLSModel::InitialExec;
+ case GlobalVariable::LocalExecTLSModel:
+ return TLSModel::LocalExec;
+ }
+ llvm_unreachable("invalid TLS model");
+}
+
TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
- bool isLocal = GV->hasLocalLinkage();
- bool isDeclaration = GV->isDeclaration();
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+ const GlobalVariable *Var = cast<GlobalVariable>(GV);
+
+ bool isLocal = Var->hasLocalLinkage();
+ bool isDeclaration = Var->isDeclaration();
+ bool isPIC = getRelocationModel() == Reloc::PIC_;
+ bool isPIE = Options.PositionIndependentExecutable;
// FIXME: what should we do for protected and internal visibility?
// For variables, is internal different from hidden?
- bool isHidden = GV->hasHiddenVisibility();
+ bool isHidden = Var->hasHiddenVisibility();
- if (getRelocationModel() == Reloc::PIC_ &&
- !Options.PositionIndependentExecutable) {
+ TLSModel::Model Model;
+ if (isPIC && !isPIE) {
if (isLocal || isHidden)
- return TLSModel::LocalDynamic;
+ Model = TLSModel::LocalDynamic;
else
- return TLSModel::GeneralDynamic;
+ Model = TLSModel::GeneralDynamic;
} else {
if (!isDeclaration || isHidden)
- return TLSModel::LocalExec;
+ Model = TLSModel::LocalExec;
else
- return TLSModel::InitialExec;
+ Model = TLSModel::InitialExec;
}
+
+ // If the user specified a more specific model, use that.
+ TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
+ if (SelectedModel > Model)
+ return SelectedModel;
+
+ return Model;
}
/// getOptLevel - Returns the optimization level: None, Less,
@@ -127,4 +162,3 @@ void TargetMachine::setFunctionSections(bool V) {
void TargetMachine::setDataSections(bool V) {
DataSections = V;
}
-
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 1716423eeeac..2395f2ba12ac 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -46,6 +46,50 @@ void PrintReg::print(raw_ostream &OS) const {
}
}
+void PrintRegUnit::print(raw_ostream &OS) const {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
+
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
+
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+}
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+ if (!RC || RC->isAllocatable())
+ return RC;
+
+ const unsigned *SubClass = RC->getSubClassMask();
+ for (unsigned Base = 0, BaseE = getNumRegClasses();
+ Base < BaseE; Base += 32) {
+ unsigned Idx = Base;
+ for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
+ unsigned Offset = CountTrailingZeros_32(Mask);
+ const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
+ if (SubRC->isAllocatable())
+ return SubRC;
+ Mask >>= Offset;
+ Idx += Offset + 1;
+ }
+ }
+ return NULL;
+}
+
/// getMinimalPhysRegClass - Returns the Register Class of a physical
/// register of the given type, picking the most sub register class of
/// the right type that contains this physreg.
@@ -71,6 +115,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
const TargetRegisterClass *RC, BitVector &R){
+ assert(RC->isAllocatable() && "invalid for nonallocatable sets");
ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
for (unsigned i = 0; i != Order.size(); ++i)
R.set(Order[i]);
@@ -80,7 +125,10 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
const TargetRegisterClass *RC) const {
BitVector Allocatable(getNumRegs());
if (RC) {
- getAllocatableSetForRC(MF, RC, Allocatable);
+ // A register class with no allocatable subclass returns an empty set.
+ const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+ if (SubClass)
+ getAllocatableSetForRC(MF, SubClass, Allocatable);
} else {
for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
E = regclass_end(); I != E; ++I)
@@ -95,6 +143,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
return Allocatable;
}
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+ const uint32_t *B,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+ if (unsigned Common = *A++ & *B++)
+ return TRI->getRegClass(I + CountTrailingZeros_32(Common));
+ return 0;
+}
+
const TargetRegisterClass *
TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
const TargetRegisterClass *B) const {
@@ -106,15 +164,83 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
// Register classes are ordered topologically, so the largest common
// sub-class it the common sub-class with the smallest ID.
- const unsigned *SubA = A->getSubClassMask();
- const unsigned *SubB = B->getSubClassMask();
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+}
- // We could start the search from max(A.ID, B.ID), but we are only going to
- // execute 2-3 iterations anyway.
- for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32)
- if (unsigned Common = *SubA++ & *SubB++)
- return getRegClass(Base + CountTrailingZeros_32(Common));
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ assert(A && B && "Missing register class");
+ assert(Idx && "Bad sub-register index");
+
+ // Find Idx in the list of super-register indices.
+ for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+ if (RCI.getSubReg() == Idx)
+ // The bit mask contains all register classes that are projected into B
+ // by Idx. Find a class that is also a sub-class of A.
+ return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+ return 0;
+}
- // No common sub-class exists.
- return NULL;
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+ const TargetRegisterClass *RCB, unsigned SubB,
+ unsigned &PreA, unsigned &PreB) const {
+ assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+ // Search all pairs of sub-register indices that project into RCA and RCB
+ // respectively. This is quadratic, but usually the sets are very small. On
+ // most targets like X86, there will only be a single sub-register index
+ // (e.g., sub_16bit projecting into GR16).
+ //
+ // The worst case is a register class like DPR on ARM.
+ // We have indices dsub_0..dsub_7 projecting into that class.
+ //
+ // It is very common that one register class is a sub-register of the other.
+ // Arrange for RCA to be the larger register so the answer will be found in
+ // the first iteration. This makes the search linear for the most common
+ // case.
+ const TargetRegisterClass *BestRC = 0;
+ unsigned *BestPreA = &PreA;
+ unsigned *BestPreB = &PreB;
+ if (RCA->getSize() < RCB->getSize()) {
+ std::swap(RCA, RCB);
+ std::swap(SubA, SubB);
+ std::swap(BestPreA, BestPreB);
+ }
+
+ // Also terminate the search one we have found a register class as small as
+ // RCA.
+ unsigned MinSize = RCA->getSize();
+
+ for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+ unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+ for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+ // Check if a common super-register class exists for this index pair.
+ const TargetRegisterClass *RC =
+ firstCommonClass(IA.getMask(), IB.getMask(), this);
+ if (!RC || RC->getSize() < MinSize)
+ continue;
+
+ // The indexes must compose identically: PreA+SubA == PreB+SubB.
+ unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+ if (FinalA != FinalB)
+ continue;
+
+ // Is RC a better candidate than BestRC?
+ if (BestRC && RC->getSize() >= BestRC->getSize())
+ continue;
+
+ // Yes, RC is the smallest super-register seen so far.
+ BestRC = RC;
+ *BestPreA = IA.getSubReg();
+ *BestPreB = IB.getSubReg();
+
+ // Bail early if we reached MinSize. We won't find a better candidate.
+ if (BestRC->getSize() == MinSize)
+ return BestRC;
+ }
+ }
+ return BestRC;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 08c732c3886e..fbbaa9500c99 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -65,6 +65,10 @@ private:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
+ bool MatchInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCInst> &MCInsts);
+
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
@@ -117,7 +121,7 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
-static bool isImmSExti16i8Value(uint64_t Value) {
+static bool isImmSExti16i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
@@ -135,12 +139,12 @@ static bool isImmZExtu32u8Value(uint64_t Value) {
static bool isImmSExti64i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
static bool isImmSExti64i32Value(uint64_t Value) {
return (( Value <= 0x000000007FFFFFFFULL)||
- (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
namespace {
@@ -187,7 +191,7 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
-
+
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
virtual void print(raw_ostream &OS) const {}
@@ -309,28 +313,45 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isMem() const { return Kind == Memory; }
- bool isMem8() const {
+ bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}
- bool isMem16() const {
+ bool isMem16() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 16);
}
- bool isMem32() const {
+ bool isMem32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32);
}
- bool isMem64() const {
+ bool isMem64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64);
}
- bool isMem80() const {
+ bool isMem80() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 80);
}
- bool isMem128() const {
+ bool isMem128() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 128);
}
- bool isMem256() const {
+ bool isMem256() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 256);
}
+ bool isMemVX32() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
+ }
+ bool isMemVY32() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
+ }
+ bool isMemVX64() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
+ }
+ bool isMemVY64() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
+ }
+
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
@@ -356,26 +377,38 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
- void addMem8Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem8Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem16Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMem64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem16Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem80Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem128Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem256Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem80Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMemVX32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem128Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMemVY32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem256Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMemVX64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
+ }
+ void addMemVY64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -467,7 +500,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) {
bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
- return Op.isMem() &&
+ return Op.isMem() &&
(Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
@@ -611,7 +644,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
Parser.Lex();
-
+
if (getLexer().is(AsmToken::Identifier)) {
// Parse BaseReg
if (ParseRegister(BaseReg, Start, End)) {
@@ -638,11 +671,11 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
// Handle '[' Scale*IndexReg ']'
Parser.Lex();
SMLoc IdxRegLoc = Parser.getTok().getLoc();
- if (ParseRegister(IndexReg, IdxRegLoc, End))
- return ErrorOperand(IdxRegLoc, "Expected register");
+ if (ParseRegister(IndexReg, IdxRegLoc, End))
+ return ErrorOperand(IdxRegLoc, "Expected register");
Scale = Val;
} else
- return ErrorOperand(Loc, "Unepxeted token");
+ return ErrorOperand(Loc, "Unexpected token");
}
if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) {
@@ -655,8 +688,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().is(AsmToken::Star)) {
Parser.Lex();
SMLoc IdxRegLoc = Parser.getTok().getLoc();
- if (ParseRegister(IndexReg, IdxRegLoc, End))
- return ErrorOperand(IdxRegLoc, "Expected register");
+ if (ParseRegister(IndexReg, IdxRegLoc, End))
+ return ErrorOperand(IdxRegLoc, "Expected register");
Scale = Val;
} else if (getLexer().is(AsmToken::RBrac)) {
const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
@@ -668,7 +701,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
End = Parser.getTok().getLoc();
if (!IndexReg)
ParseRegister(IndexReg, Start, End);
- else if (getParser().ParseExpression(Disp, End)) return 0;
+ else if (getParser().ParseExpression(Disp, End)) return 0;
}
}
@@ -881,7 +914,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getParser().ParseAbsoluteExpression(ScaleVal)){
Error(Loc, "expected scale expression");
return 0;
- }
+ }
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
@@ -916,15 +949,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// If we have both a base register and an index register make sure they are
// both 64-bit or 32-bit registers.
+ // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
if (BaseReg != 0 && IndexReg != 0) {
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
- !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
IndexReg != X86::RIZ) {
Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
return 0;
}
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
- !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
IndexReg != X86::EIZ){
Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
return 0;
@@ -944,7 +980,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
-
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -1204,20 +1240,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Intel syntax
X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[2];
- Operands.pop_back();
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[2];
+ Operands.pop_back();
}
} else {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[1];
- Operands.erase(Operands.begin() + 1);
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
}
}
}
-
+
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
@@ -1476,6 +1512,18 @@ bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
+ SmallVector<MCInst, 2> Insts;
+ bool Error = MatchInstruction(IDLoc, Operands, Insts);
+ if (!Error)
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+ Out.EmitInstruction(Insts[i]);
+ return Error;
+}
+
+bool X86AsmParser::
+MatchInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCInst> &MCInsts) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
@@ -1491,7 +1539,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
const char *Repl =
StringSwitch<const char*>(Op->getToken())
@@ -1520,12 +1568,12 @@ MatchAndEmitInstruction(SMLoc IDLoc,
case Match_Success:
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
- // individual transformations can chain off each other.
+ // individual transformations can chain off each other.
while (processInstruction(Inst, Operands))
;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
return false;
case Match_MissingFeature:
Error(IDLoc, "instruction requires a CPU feature not currently enabled");
@@ -1558,12 +1606,12 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// Otherwise, we assume that this may be an integer instruction, which comes
// in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
-
+
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
unsigned Match1, Match2, Match3, Match4;
-
+
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
Tmp[Base.size()] = Suffixes[1];
Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
@@ -1583,7 +1631,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
return false;
}
@@ -1673,10 +1721,10 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if(Parser.getTok().getString() == "noprefix") {
- // FIXME : Handle noprefix
- Parser.Lex();
+ // FIXME : Handle noprefix
+ Parser.Lex();
} else
- return true;
+ return true;
}
return false;
}
@@ -1691,19 +1739,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
const MCExpr *Value;
if (getParser().ParseExpression(Value))
return true;
-
+
getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
-
+
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
Parser.Lex();
}
}
-
+
Parser.Lex();
return false;
}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index f612e2365ec3..b886d46501b4 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -52,6 +52,8 @@ endif()
add_llvm_target(X86CodeGen ${sources})
+add_dependencies(LLVMX86CodeGen intrinsics_gen)
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 8278bde7c218..5039887e1a2e 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -322,7 +322,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
OperandType type = (OperandType)operand.type;
+ bool isBranch = false;
+ uint64_t pcrel = 0;
if (type == TYPE_RELv) {
+ isBranch = true;
+ pcrel = insn.startLocation +
+ insn.immediateOffset + insn.immediateSize;
switch (insn.displacementSize) {
default:
break;
@@ -351,15 +356,15 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
// Special case those X86 instructions that use the imm8 as a set of
// bits, bit count, etc. and are not sign-extend.
if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
- Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
- Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
- Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
- Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
- Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
- Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
- Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
- Opcode != X86::VINSERTPSrr)
- type = TYPE_MOFFS8;
+ Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
+ Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
+ Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
+ Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
+ Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
+ Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
+ Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
+ Opcode != X86::VINSERTPSrr)
+ type = TYPE_MOFFS8;
break;
case ENCODING_IW:
type = TYPE_MOFFS16;
@@ -373,8 +378,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
}
- bool isBranch = false;
- uint64_t pcrel = 0;
switch (type) {
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
@@ -495,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
} else {
baseReg = MCOperand::CreateReg(0);
}
-
+
+ // Check whether we are handling VSIB addressing mode for GATHER.
+ // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
+ // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
+ // I don't see a way to get the correct IndexReg in readSIB:
+ // We can tell whether it is VSIB or SIB after instruction ID is decoded,
+ // but instruction ID may not be decoded yet when calling readSIB.
+ uint32_t Opcode = mcInst.getOpcode();
+ bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
+ Opcode == X86::VGATHERDPDYrm ||
+ Opcode == X86::VGATHERQPDrm ||
+ Opcode == X86::VGATHERDPSrm ||
+ Opcode == X86::VGATHERQPSrm ||
+ Opcode == X86::VPGATHERDQrm ||
+ Opcode == X86::VPGATHERDQYrm ||
+ Opcode == X86::VPGATHERQQrm ||
+ Opcode == X86::VPGATHERDDrm ||
+ Opcode == X86::VPGATHERQDrm);
+ bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
+ Opcode == X86::VGATHERDPSYrm ||
+ Opcode == X86::VGATHERQPSYrm ||
+ Opcode == X86::VPGATHERQQYrm ||
+ Opcode == X86::VPGATHERDDYrm ||
+ Opcode == X86::VPGATHERQDYrm);
+ if (IndexIs128 || IndexIs256) {
+ unsigned IndexOffset = insn.sibIndex -
+ (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
+ SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
+ insn.sibIndex = (SIBIndex)(IndexBase +
+ (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
+ }
+
if (insn.sibIndex != SIB_INDEX_NONE) {
switch (insn.sibIndex) {
default:
@@ -506,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
indexReg = MCOperand::CreateReg(X86::x); break;
EA_BASES_32BIT
EA_BASES_64BIT
+ REGS_XMM
+ REGS_YMM
#undef ENTRY
}
} else {
@@ -726,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateRegister(mcInst, insn.vvvv);
return false;
case ENCODING_DUP:
- return translateOperand(mcInst,
- insn.spec->operands[operand.type - TYPE_DUP0],
+ return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
insn, Dis);
}
}
@@ -753,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst,
insn.numImmediatesTranslated = 0;
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- if (insn.spec->operands[index].encoding != ENCODING_NONE) {
- if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
+ if (insn.operands[index].encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
return true;
}
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index c11f51c6a9ac..0dbfa260014b 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -20,7 +20,7 @@
// 2. Read the opcode, and determine what kind of opcode it is. The
// disassembler distinguishes four kinds of opcodes, which are enumerated in
// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
-// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
//
// 3. Depending on the opcode type, look in one of four ClassDecision structures
@@ -74,8 +74,8 @@
#ifndef X86DISASSEMBLER_H
#define X86DISASSEMBLER_H
-#define INSTRUCTION_SPECIFIER_FIELDS \
- const char* name;
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
#define INSTRUCTION_IDS \
unsigned instructionIDs;
@@ -88,7 +88,7 @@
#include "llvm/MC/MCDisassembler.h"
namespace llvm {
-
+
class MCInst;
class MCInstrInfo;
class MCSubtargetInfo;
@@ -96,7 +96,7 @@ class MemoryObject;
class raw_ostream;
struct EDInstInfo;
-
+
namespace X86Disassembler {
/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 602087756b23..0c929122aeee 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- switch (insn->spec->operands[index].encoding) {
+ switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
break;
case ENCODING_REG:
case ENCODING_RM:
if (readModRM(insn))
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_CB:
@@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM5 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 31)
return -1;
- if (insn->spec->operands[index].type == TYPE_XMM128 ||
- insn->spec->operands[index].type == TYPE_XMM256)
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+ x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
sawRegImm = 1;
break;
case ENCODING_IW:
@@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = 0; /* Mark that we have found a VVVV operand. */
if (!hasVVVV)
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_DUP:
@@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn,
insn->instructionID == 0 ||
readOperands(insn))
return -1;
+
+ insn->operands = &x86OperandSets[insn->spec->operands][0];
insn->length = insn->readerCursor - insn->startLocation;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index fae309b45d02..797703f80335 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -19,17 +19,18 @@
#ifdef __cplusplus
extern "C" {
#endif
-
-#define INSTRUCTION_SPECIFIER_FIELDS
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
#define INSTRUCTION_IDS \
unsigned instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
-
+
#undef INSTRUCTION_SPECIFIER_FIELDS
#undef INSTRUCTION_IDS
-
+
/*
* Accessor functions for various fields of an Intel instruction
*/
@@ -43,7 +44,7 @@ extern "C" {
#define rFromREX(rex) (((rex) & 0x4) >> 2)
#define xFromREX(rex) (((rex) & 0x2) >> 1)
#define bFromREX(rex) ((rex) & 0x1)
-
+
#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
@@ -237,7 +238,7 @@ extern "C" {
ENTRY(YMM13) \
ENTRY(YMM14) \
ENTRY(YMM15)
-
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -245,7 +246,7 @@ extern "C" {
ENTRY(DS) \
ENTRY(FS) \
ENTRY(GS)
-
+
#define REGS_DEBUG \
ENTRY(DR0) \
ENTRY(DR1) \
@@ -266,12 +267,12 @@ extern "C" {
ENTRY(CR6) \
ENTRY(CR7) \
ENTRY(CR8)
-
+
#define ALL_EA_BASES \
EA_BASES_16BIT \
EA_BASES_32BIT \
EA_BASES_64BIT
-
+
#define ALL_SIB_BASES \
REGS_32BIT \
REGS_64BIT
@@ -290,7 +291,7 @@ extern "C" {
ENTRY(RIP)
/*
- * EABase - All possible values of the base field for effective-address
+ * EABase - All possible values of the base field for effective-address
* computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
* distinguish between bases (EA_BASE_*) and registers that just happen to be
* referred to when Mod == 0b11 (EA_REG_*).
@@ -305,20 +306,23 @@ typedef enum {
#undef ENTRY
EA_max
} EABase;
-
-/*
+
+/*
* SIBIndex - All possible values of the SIB index field.
* Borrows entries from ALL_EA_BASES with the special case that
* sib is synonymous with NONE.
+ * Vector SIB: index can be XMM or YMM.
*/
typedef enum {
SIB_INDEX_NONE,
#define ENTRY(x) SIB_INDEX_##x,
ALL_EA_BASES
+ REGS_XMM
+ REGS_YMM
#undef ENTRY
SIB_INDEX_max
} SIBIndex;
-
+
/*
* SIBBase - All possible values of the SIB base field.
*/
@@ -350,7 +354,7 @@ typedef enum {
#undef ENTRY
MODRM_REG_max
} Reg;
-
+
/*
* SegmentOverride - All possible segment overrides.
*/
@@ -364,7 +368,7 @@ typedef enum {
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
} SegmentOverride;
-
+
/*
* VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
*/
@@ -428,16 +432,16 @@ struct InternalInstruction {
void* dlogArg;
/* General instruction information */
-
+
/* The mode to disassemble for (64-bit, protected, real) */
DisassemblerMode mode;
/* The start of the instruction, usable with the reader */
uint64_t startLocation;
/* The length of the instruction, in bytes */
size_t length;
-
+
/* Prefix state */
-
+
/* 1 if the prefix byte corresponding to the entry is present; 0 if not */
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
@@ -453,7 +457,7 @@ struct InternalInstruction {
uint64_t necessaryPrefixLocation;
/* The segment override type */
SegmentOverride segmentOverride;
-
+
/* Sizes of various critical pieces of data, in bytes */
uint8_t registerSize;
uint8_t addressSize;
@@ -464,9 +468,9 @@ struct InternalInstruction {
needed to find relocation entries for adding symbolic operands */
uint8_t displacementOffset;
uint8_t immediateOffset;
-
+
/* opcode state */
-
+
/* The value of the two-byte escape prefix (usually 0x0f) */
uint8_t twoByteEscape;
/* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
@@ -475,16 +479,16 @@ struct InternalInstruction {
uint8_t opcode;
/* The ModR/M byte of the instruction, if it is an opcode extension */
uint8_t modRMExtension;
-
+
/* decode state */
-
+
/* The type of opcode, used for indexing into the array of decode tables */
OpcodeType opcodeType;
/* The instruction ID, extracted from the decode table */
uint16_t instructionID;
/* The specifier for the instruction, from the instruction info table */
const struct InstructionSpecifier *spec;
-
+
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
@@ -492,12 +496,12 @@ struct InternalInstruction {
/* The VEX.vvvv field, which contains a third register operand for some AVX
instructions */
Reg vvvv;
-
+
/* The ModR/M byte, which contains most register operands and some portion of
all memory operands */
BOOL consumedModRM;
uint8_t modRM;
-
+
/* The SIB byte, used for more complex 32- or 64-bit memory operands */
BOOL consumedSIB;
uint8_t sib;
@@ -505,19 +509,19 @@ struct InternalInstruction {
/* The displacement, used for memory operands */
BOOL consumedDisplacement;
int32_t displacement;
-
+
/* Immediates. There can be two in some cases */
uint8_t numImmediatesConsumed;
uint8_t numImmediatesTranslated;
uint64_t immediates[2];
-
+
/* A register or immediate operand encoded into the opcode */
BOOL consumedOpcodeModifier;
uint8_t opcodeModifier;
Reg opcodeRegister;
-
+
/* Portions of the ModR/M byte */
-
+
/* These fields determine the allowable values for the ModR/M fields, which
depend on operand and address widths */
EABase eaBaseBase;
@@ -530,11 +534,13 @@ struct InternalInstruction {
EADisplacement eaDisplacement;
/* The reg field always encodes a register */
Reg reg;
-
+
/* SIB state */
SIBIndex sibIndex;
uint8_t sibScale;
SIBBase sibBase;
+
+ const struct OperandSpecifier *operands;
};
/* decodeInstruction - Decode one instruction and store the decoding results in
@@ -568,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn,
* @param line - The line number that printed the debug message.
* @param s - The message to print.
*/
-
+
void x86DisassemblerDebug(const char *file,
unsigned line,
const char *s);
const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
-
+
#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 13e113609bf3..b0a0e1e78ef7 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -119,7 +119,7 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")
-#define ENUM_ENTRY(n, r, d) n,
+#define ENUM_ENTRY(n, r, d) n,
typedef enum {
INSTRUCTION_CONTEXTS
IC_max
@@ -148,11 +148,11 @@ typedef enum {
* If a ModR/M byte is not required, "required" is left unset, and the values
* for each instructionID are identical.
*/
-
+
typedef uint16_t InstrUID;
/*
- * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
* consumer to determine the number of entries in it.
*
* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
@@ -172,7 +172,7 @@ typedef uint16_t InstrUID;
ENUM_ENTRY(MODRM_SPLITREG) \
ENUM_ENTRY(MODRM_FULL)
-#define ENUM_ENTRY(n) n,
+#define ENUM_ENTRY(n) n,
typedef enum {
MODRMTYPES
MODRM_max
@@ -180,13 +180,13 @@ typedef enum {
#undef ENUM_ENTRY
/*
- * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
* instruction each possible value of the ModR/M byte corresponds to. Once
* this information is known, we have narrowed down to a single instruction.
*/
struct ModRMDecision {
uint8_t modrm_type;
-
+
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_IDS
};
@@ -210,7 +210,7 @@ struct ContextDecision {
struct OpcodeDecision opcodeDecisions[IC_max];
};
-/*
+/*
* Physical encodings of instruction operands.
*/
@@ -244,14 +244,14 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
"in type")
-#define ENUM_ENTRY(n, d) n,
+#define ENUM_ENTRY(n, d) n,
typedef enum {
ENCODINGS
ENCODING_max
} OperandEncoding;
#undef ENUM_ENTRY
-/*
+/*
* Semantic interpretations of instruction operands.
*/
@@ -332,14 +332,14 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
-#define ENUM_ENTRY(n, d) n,
+#define ENUM_ENTRY(n, d) n,
typedef enum {
TYPES
TYPE_max
} OperandType;
#undef ENUM_ENTRY
-/*
+/*
* OperandSpecifier - The specification for how to extract and interpret one
* operand.
*/
@@ -374,8 +374,7 @@ typedef enum {
struct InstructionSpecifier {
uint8_t modifierType;
uint8_t modifierBase;
- struct OperandSpecifier operands[X86_MAX_OPERANDS];
-
+
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_SPECIFIER_FIELDS
};
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index f532019acdff..64ac5e685f76 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -96,7 +96,17 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PSHUFHWmi:
case X86::VPSHUFHWmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ DecodePSHUFHWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFHWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFHWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PSHUFLWri:
@@ -106,7 +116,17 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PSHUFLWmi:
case X86::VPSHUFLWmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ DecodePSHUFLWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFLWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFLWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
@@ -487,6 +507,16 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VPERMQYri:
+ case X86::VPERMPDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMQYmi:
+ case X86::VPERMPDYmi:
+ DecodeVPERMMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index a0bb6dc6d60f..db597fbfca9f 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -94,40 +94,83 @@ namespace X86II {
MO_PLT,
/// MO_TLSGD - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
+ /// the offset of the GOT entry with the TLS index structure that contains
+ /// the module number and variable offset for the symbol. Used in the
+ /// general dynamic TLS access model.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSGD
MO_TLSGD,
+ /// MO_TLSLD - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to to
+ /// __tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol. Used in the x86-64 local dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLD
+ MO_TLSLD,
+
+ /// MO_TLSLDM - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS index for the module that
+ /// contains the symbol. When this index is passed to a call to to
+ /// ___tls_get_addr, the function will return the base address of the TLS
+ /// block for the symbol. Used in the IA32 local dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSLDM
+ MO_TLSLDM,
+
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
+ /// the offset of the GOT entry with the thread-pointer offset for the
+ /// symbol. Used in the x86-64 initial exec TLS access model.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @GOTTPOFF
MO_GOTTPOFF,
/// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
+ /// the absolute address of the GOT entry with the negative thread-pointer
+ /// offset for the symbol. Used in the non-PIC IA32 initial exec TLS access
+ /// model.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @INDNTPOFF
MO_INDNTPOFF,
/// MO_TPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
+ /// the thread-pointer offset for the symbol. Used in the x86-64 local
+ /// exec TLS access model.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TPOFF
MO_TPOFF,
+ /// MO_DTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the TLS offset of the symbol. Used
+ /// in the local dynamic TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @DTPOFF
+ MO_DTPOFF,
+
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
+ /// the negative thread-pointer offset for the symbol. Used in the IA32
+ /// local exec TLS access model.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @NTPOFF
MO_NTPOFF,
+ /// MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is
+ /// the offset of the GOT entry with the negative thread-pointer offset for
+ /// the symbol. Used in the PIC IA32 initial exec TLS access model.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTNTPOFF
+ MO_GOTNTPOFF,
+
/// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "__imp_FOO" symbol. This is used for
/// dllimport linkage on windows.
@@ -438,17 +481,17 @@ namespace X86II {
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
- static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
- static inline bool hasImm(uint64_t TSFlags) {
+ inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
- static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8:
@@ -463,7 +506,7 @@ namespace X86II {
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ inline unsigned isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8PCRel:
@@ -486,9 +529,11 @@ namespace X86II {
/// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
/// counted as one operand.
///
- static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
+ inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
switch (TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this form");
+ case X86II::MRMInitReg:
+ // FIXME: Remove this form.
+ return -1;
default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
case X86II::Pseudo:
case X86II::RawFrm:
@@ -546,7 +591,7 @@ namespace X86II {
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
/// higher) register? e.g. r8, xmm8, xmm13, etc.
- static inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ inline bool isX86_64ExtendedReg(unsigned RegNo) {
switch (RegNo) {
default: break;
case X86::R8: case X86::R9: case X86::R10: case X86::R11:
@@ -568,7 +613,7 @@ namespace X86II {
return false;
}
- static inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ inline bool isX86_64NonExtLowByteReg(unsigned reg) {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index afa545cbb314..b0acd7d5a101 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -35,19 +35,6 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
clEnumValEnd));
-static const char *const x86_asm_table[] = {
- "{si}", "S",
- "{di}", "D",
- "{ax}", "a",
- "{cx}", "c",
- "{memory}", "memory",
- "{flags}", "",
- "{dirflag}", "",
- "{fpsr}", "",
- "{fpcr}", "",
- "{cc}", "cc",
- 0,0};
-
void X86MCAsmInfoDarwin::anchor() { }
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
@@ -55,7 +42,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
if (is64Bit)
PointerSize = 8;
- AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
@@ -88,7 +74,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
if (T.getArch() == Triple::x86_64)
PointerSize = 8;
- AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
@@ -106,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
- // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
- // .words.
- if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+ // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
+ // into two .words.
+ if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
+ T.getArch() == Triple::x86)
Data64bitsDirective = 0;
}
@@ -137,7 +123,6 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
PrivateGlobalPrefix = ".L";
}
- AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
@@ -151,7 +136,6 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
PrivateGlobalPrefix = ".L";
}
- AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 80990e5822bd..4a38324d08e1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -139,6 +139,7 @@ public:
MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new X86MCCodeEmitter(MCII, STI, Ctx);
@@ -569,7 +570,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
}
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
+ unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
case X86II::MRMDestMem: {
@@ -602,11 +613,11 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
// dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp++).getReg()))
VEX_R = 0x0;
if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, 1);
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
if (X86II::isX86_64ExtendedReg(
MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
@@ -616,7 +627,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ // Instruction format for 4VOp3:
+ // src1(ModR/M), MemAddr, src3(VEX_4V)
+ // CurOp points to start of the MemoryOperand,
+ // it skips TIED_TO operands if exist, then increments past src1.
+ // CurOp + X86::AddrNumOperands will point to src3.
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
break;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
@@ -961,11 +977,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// FIXME: This should be handled during MCInst lowering.
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = 0;
- if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
++CurOp;
- else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
+ else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
// Keep track of the current byte being emitted.
unsigned CurByte = 0;
@@ -1037,7 +1056,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SrcRegNum = CurOp + X86::AddrNumOperands;
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- SrcRegNum++;
+ ++SrcRegNum;
EmitMemModRMByte(MI, CurOp,
GetX86RegNum(MI.getOperand(SrcRegNum)),
@@ -1050,15 +1069,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SrcRegNum = CurOp + 1;
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- SrcRegNum++;
+ ++SrcRegNum;
- if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
- SrcRegNum++;
+ if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
+ ++SrcRegNum;
EmitRegModRMByte(MI.getOperand(SrcRegNum),
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
- // 2 operands skipped with HasMemOp4, comensate accordingly
+ // 2 operands skipped with HasMemOp4, compensate accordingly
CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
if (HasVEX_4VOp3)
++CurOp;
@@ -1071,7 +1090,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
++AddrOperands;
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
}
- if(HasMemOp4) // Skip second register source (encoded in I8IMM)
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
++FirstMemOp;
EmitByte(BaseOpcode, CurByte, OS);
@@ -1089,7 +1108,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
- CurOp++;
+ ++CurOp;
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp++),
(TSFlags & X86II::FormMask)-X86II::MRM0r,
@@ -1100,7 +1119,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
- CurOp++;
+ ++CurOp;
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
TSFlags, CurByte, OS, Fixups);
@@ -1149,22 +1168,23 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
// If there is a remaining operand, it must be a trailing immediate. Emit it
- // according to the right size for the instruction.
- if (CurOp != NumOps) {
+ // according to the right size for the instruction. Some instructions
+ // (SSE4a extrq and insertq) have two trailing immediates.
+ while (CurOp != NumOps && NumOps - CurOp <= 2) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
- : CurOp);
- CurOp++;
- bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
- unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
- RegNum |= GetX86RegNum(MO) << 4;
+ : CurOp);
+ ++CurOp;
+ unsigned RegNum = GetX86RegNum(MO) << 4;
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ RegNum |= 1 << 7;
// If there is an additional 5th operand it must be an immediate, which
// is encoded in bits[3:0]
- if(CurOp != NumOps) {
+ if (CurOp != NumOps) {
const MCOperand &MIMM = MI.getOperand(CurOp++);
- if(MIMM.isImm()) {
+ if (MIMM.isImm()) {
unsigned Val = MIMM.getImm();
assert(Val < 16 && "Immediate operand value out of range");
RegNum |= Val;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 9896cbe53632..46500699ebee 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -76,6 +76,7 @@ namespace X86_MC {
}
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index a802333002d8..8b87c1f9c8ad 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -64,13 +64,13 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
- int NewImm = Imm;
+ unsigned NewImm = Imm;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + l);
@@ -80,48 +80,55 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
- ShuffleMask.push_back(0);
- ShuffleMask.push_back(1);
- ShuffleMask.push_back(2);
- ShuffleMask.push_back(3);
- for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back(4+(Imm & 3));
- Imm >>= 2;
+void DecodePSHUFHWMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + 4 + (NewImm & 3));
+ NewImm >>= 2;
+ }
}
}
-void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
- for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back((Imm & 3));
- Imm >>= 2;
+void DecodePSHUFLWMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
}
- ShuffleMask.push_back(4);
- ShuffleMask.push_back(5);
- ShuffleMask.push_back(6);
- ShuffleMask.push_back(7);
}
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
- int NewImm = Imm;
+ unsigned NewImm = Imm;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- // Part that reads from dest.
- for (unsigned i = 0; i != NumLaneElts/2; ++i) {
- ShuffleMask.push_back(NewImm % NumLaneElts + l);
- NewImm /= NumLaneElts;
- }
- // Part that reads from src.
- for (unsigned i = 0; i != NumLaneElts/2; ++i) {
- ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + l);
- NewImm /= NumLaneElts;
+ // each half of a lane comes from different source
+ for (unsigned s = 0; s != NumElts*2; s += NumElts) {
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
+ NewImm /= NumLaneElts;
+ }
}
if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
@@ -130,7 +137,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// and punpckh*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -150,7 +157,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// and punpckl*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -167,19 +174,26 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
+void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
if (Imm & 0x88)
return; // Not a shuffle
unsigned HalfSize = VT.getVectorNumElements()/2;
- unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
- unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
- for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
- ShuffleMask.push_back(i);
- for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
- ShuffleMask.push_back(i);
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
+ for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ }
+}
+
+/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
+/// No VT provided since it only works on 256-bit, 4 element vectors.
+void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm >> (2*i)) & 3);
+ }
}
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 5b8c6ef62e29..70d8171a8154 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -35,31 +35,35 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// and punpckh*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
+void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// and punpckl*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
+void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
-void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
+void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
+/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
+/// No VT provided since it only works on 256-bit, 4 element vectors.
+void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index ecc7b59c6fa0..dce5b4d2b008 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -26,7 +26,7 @@ class FunctionPass;
class JITCodeEmitter;
class X86TargetMachine;
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
@@ -36,6 +36,11 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
/// register for PIC on x86-32.
FunctionPass* createGlobalBaseRegPass();
+/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
+/// to local-dynamic TLS variables so that the TLS base address for the module
+/// is only fetched once per execution path through the function.
+FunctionPass *createCleanupLocalDynamicTLSPass();
+
/// createX86FloatingPointStackifierPass - This function returns a pass which
/// converts floating point register references and pseudo instructions into
/// floating point stack references and physical instructions.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index b6591d441969..6c1a816c9ff2 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -86,21 +86,24 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
-def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
- "Enable carry-less multiplication instructions">;
-def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
+def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
+ "Enable packed carry-less multiplication instructions",
+ [FeatureSSE2]>;
+def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
"Enable three-operand fused multiple-add",
[FeatureAVX]>;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add",
- [FeatureAVX]>;
+ [FeatureAVX, FeatureSSE4A]>;
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
- "Enable XOP instructions">;
+ "Enable XOP instructions",
+ [FeatureAVX, FeatureSSE4A]>;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
- "Enable AES instructions">;
+ "Enable AES instructions",
+ [FeatureSSE2]>;
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
@@ -128,10 +131,10 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
"Intel Atom processors">;
class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, GenericItineraries, Features>;
+ : ProcessorModel<Name, GenericModel, Features>;
class AtomProc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, AtomItineraries, Features>;
+ : ProcessorModel<Name, AtomModel, Features>;
def : Proc<"generic", []>;
def : Proc<"i386", []>;
@@ -169,25 +172,23 @@ def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeatureCLMUL]>;
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
-// FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeatureCLMUL]>;
+def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
+ FeatureAES, FeaturePCLMUL]>;
// Ivy Bridge
-def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeatureCLMUL,
+def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
+ FeatureAES, FeaturePCLMUL,
FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
// Haswell
-// FIXME: Disabling AVX/AVX2/FMA3 for now since it's not ready.
-def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeatureCLMUL, FeatureRDRAND,
+def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
+ FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase,
FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2]>;
+ FeatureBMI2, FeatureFMA]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
@@ -211,21 +212,20 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
+def : Proc<"amdfam10", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem]>;
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeatureLZCNT, FeaturePOPCNT]>;
-// FIXME: Disabling AVX/FMA4 for now since it's not ready.
// Bulldozer
-def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
- FeatureAES, FeatureCLMUL,
- FeatureXOP, FeatureLZCNT, FeaturePOPCNT]>;
+def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePCLMUL,
+ FeatureLZCNT, FeaturePOPCNT]>;
// Enhanced Bulldozer
-def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
- FeatureAES, FeatureCLMUL,
- FeatureXOP, FeatureF16C, FeatureLZCNT,
+def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePCLMUL,
+ FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 7db7ccbedcfe..db71e2751555 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,10 +20,10 @@
#include "X86TargetMachine.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "llvm/CallingConv.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -186,10 +186,14 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
O << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
+ case X86II::MO_TLSLD: O << "@TLSLD"; break;
+ case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
+ case X86II::MO_DTPOFF: O << "@DTPOFF"; break;
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
+ case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
case X86II::MO_GOT: O << "@GOT"; break;
case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
@@ -403,7 +407,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
if (MO.isImm()) {
O << MO.getImm();
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index a6ed9ba0060d..35386cd5803d 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
virtual const char *getPassName() const {
return "X86 AT&T-Style Assembly Printer";
}
-
+
const X86Subtarget &getSubtarget() const { return *Subtarget; }
virtual void EmitStartOfAsmFile(Module &M);
virtual void EmitEndOfAsmFile(Module &M);
-
+
virtual void EmitInstruction(const MachineInstr *MI);
-
+
void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
// These methods are used by the tablegen'erated instruction printer.
@@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
bool runOnMachineFunction(MachineFunction &F);
-
+
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index e01ff41ed198..6a6125bb6b2c 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -17,4 +17,3 @@ using namespace llvm;
X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
}
-
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0cec95a57abc..471eb31131ae 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -1,4 +1,4 @@
-//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
+//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -33,7 +33,7 @@ public:
void addExternalFunction(MCSymbol* Symbol) {
Externals.insert(Symbol);
}
-
+
typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
externals_iterator externals_begin() const { return Externals.begin(); }
externals_iterator externals_end() const { return Externals.end(); }
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index d148989e97f9..a6d2709b372d 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -29,10 +29,13 @@ def RetCC_X86Common : CallingConv<[
// up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
// for functions that return two i8 values are currently expected to pack the
// values into an i16 (which uses AX, and thus AL:AH).
- CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
- CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
- CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
- CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+ //
+ // For code that doesn't care about the ABI, we allow returning more than two
+ // integer values in registers.
+ CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX, RCX]>>,
// Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
@@ -413,7 +416,7 @@ def CC_X86 : CallingConv<[
// Callee-saved Registers.
//===----------------------------------------------------------------------===//
-def CSR_Ghc : CalleeSavedRegs<(add)>;
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ee3de9a3f6fe..d7050495f89c 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -53,12 +53,12 @@ namespace {
public:
static char ID;
explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(false),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
Emitter(X86TargetMachine &tm, CodeEmitter &mce,
const X86InstrInfo &ii, const TargetData &td, bool is64)
- : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -68,8 +68,20 @@ namespace {
return "X86 Machine Code Emitter";
}
+ void emitOpcodePrefix(uint64_t TSFlags, int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const;
+
+ void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const;
+
+ void emitSegmentOverridePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI) const;
+
void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
-
+
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineModuleInfo>();
@@ -115,17 +127,17 @@ template<class CodeEmitter>
bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
MMI = &getAnalysis<MachineModuleInfo>();
MCE.setModuleInfo(MMI);
-
+
II = TM.getInstrInfo();
TD = TM.getTargetData();
Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
-
+
do {
- DEBUG(dbgs() << "JITTing function '"
+ DEBUG(dbgs() << "JITTing function '"
<< MF.getFunction()->getName() << "'\n");
MCE.startFunction(MF);
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
@@ -149,18 +161,18 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
static unsigned determineREX(const MachineInstr &MI) {
unsigned REX = 0;
const MCInstrDesc &Desc = MI.getDesc();
-
+
// Pseudo instructions do not need REX prefix byte.
if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
return 0;
if (Desc.TSFlags & X86II::REX_W)
REX |= 1 << 3;
-
+
unsigned NumOps = Desc.getNumOperands();
if (NumOps) {
bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
-
+ Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
unsigned i = isTwoAddr ? 1 : 0;
for (unsigned e = NumOps; i != e; ++i) {
@@ -171,7 +183,7 @@ static unsigned determineREX(const MachineInstr &MI) {
REX |= 0x40;
}
}
-
+
switch (Desc.TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
@@ -362,7 +374,7 @@ void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
}
template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
unsigned Index,
unsigned Base) {
// SIB byte is in the same format as the ModRMByte...
@@ -378,8 +390,8 @@ void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
}
}
-/// isDisp8 - Return true if this signed displacement fits in a 8-bit
-/// sign-extended field.
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
static bool isDisp8(int Value) {
return Value == (signed char)Value;
}
@@ -388,10 +400,10 @@ static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
const TargetMachine &TM) {
// For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
// mechanism as 32-bit mode.
- if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
!TM.getSubtarget<X86Subtarget>().isTargetDarwin())
return false;
-
+
// Return true if this is a reference to a stub containing the address of the
// global, not the global itself.
return isGlobalStubReference(GVOp.getTargetFlags());
@@ -417,7 +429,7 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
if (RelocOp->isGlobal()) {
// In 64-bit static small code model, we could potentially emit absolute.
// But it's probably not beneficial. If the MCE supports using RIP directly
- // do it, otherwise fallback to absolute (this is determined by IsPCRel).
+ // do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
@@ -441,7 +453,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
const MachineOperand &Op3 = MI.getOperand(Op+3);
int DispVal = 0;
const MachineOperand *DispForReloc = 0;
-
+
// Figure out what sort of displacement we have to handle here.
if (Op3.isGlobal()) {
DispForReloc = &Op3;
@@ -469,7 +481,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
const MachineOperand &IndexReg = MI.getOperand(Op+2);
unsigned BaseReg = Base.getReg();
-
+
// Handle %rip relative addressing.
if (BaseReg == X86::RIP ||
(Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
@@ -486,7 +498,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
// Is a SIB byte needed?
- // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
// resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
// 2-7) and absolute references.
unsigned BaseRegNo = -1U;
@@ -494,7 +506,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
BaseRegNo = X86_MC::getX86RegNum(BaseReg);
if (// The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
+ IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
// present.
@@ -508,7 +520,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
return;
}
-
+
// If the base is not EBP/ESP and there is no displacement, use simple
// indirect register encoding, this handles addresses like [EAX]. The
// encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -517,20 +529,20 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
return;
}
-
+
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (!DispForReloc && isDisp8(DispVal)) {
MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
emitConstant(DispVal, 1);
return;
}
-
+
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
return;
}
-
+
// Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
assert(IndexReg.getReg() != X86::ESP &&
IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
@@ -563,7 +575,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base, see Intel
+ // Handle the SIB byte for the case where there is no base, see Intel
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
@@ -596,94 +608,116 @@ static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II,
return Desc;
}
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
- const MCInstrDesc *Desc) {
- DEBUG(dbgs() << MI);
-
- // If this is a pseudo instruction, lower it.
- switch (Desc->getOpcode()) {
- case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
- case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
- case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
- case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
- case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
- case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
- case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
- case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
- case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
- case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
- case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
- case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
- case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
- case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
- case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
- case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
- case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
- }
-
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) {
+ const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
- MCE.processDebugLoc(MI.getDebugLoc(), true);
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) {
+ const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
- unsigned Opcode = Desc->Opcode;
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) {
+ const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+#endif
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const {
// Emit the lock opcode prefix as needed.
if (Desc->TSFlags & X86II::LOCK)
MCE.emitByte(0xF0);
// Emit segment override opcode prefix as needed.
- switch (Desc->TSFlags & X86II::SegOvrMask) {
- case X86II::FS:
- MCE.emitByte(0x64);
- break;
- case X86II::GS:
- MCE.emitByte(0x65);
- break;
- default: llvm_unreachable("Invalid segment!");
- case 0: break; // No segment override!
- }
+ emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
// Emit the repeat opcode prefix as needed.
if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
MCE.emitByte(0xF3);
- // Emit the operand size opcode prefix as needed.
- if (Desc->TSFlags & X86II::OpSize)
- MCE.emitByte(0x66);
-
// Emit the address size opcode prefix as needed.
- if (Desc->TSFlags & X86II::AdSize)
+ bool need_address_override;
+ if (TSFlags & X86II::AdSize) {
+ need_address_override = true;
+ } else if (MemOperand == -1) {
+ need_address_override = false;
+ } else if (Is64BitMode) {
+ assert(!Is16BitMemOperand(MI, MemOperand));
+ need_address_override = Is32BitMemOperand(MI, MemOperand);
+ } else {
+ assert(!Is64BitMemOperand(MI, MemOperand));
+ need_address_override = Is16BitMemOperand(MI, MemOperand);
+ }
+
+ if (need_address_override)
MCE.emitByte(0x67);
+ // Emit the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ MCE.emitByte(0x66);
+
bool Need0FPrefix = false;
switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::TB: // Two-byte opcode prefix
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- case X86II::A6: // 0F A6
- case X86II::A7: // 0F A7
- Need0FPrefix = true;
- break;
- case X86II::REP: break; // already handled.
- case X86II::T8XS: // F3 0F 38
- case X86II::XS: // F3 0F
- MCE.emitByte(0xF3);
- Need0FPrefix = true;
- break;
- case X86II::T8XD: // F2 0F 38
- case X86II::TAXD: // F2 0F 3A
- case X86II::XD: // F2 0F
- MCE.emitByte(0xF2);
- Need0FPrefix = true;
- break;
- case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
- case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
- MCE.emitByte(0xD8+
- (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
- >> X86II::Op0Shift));
- break; // Two-byte opcode prefix
- default: llvm_unreachable("Invalid prefix!");
- case 0: break; // No prefix!
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::T8XS: // F3 0F 38
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ case X86II::TAXD: // F2 0F 3A
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: llvm_unreachable("Invalid prefix!");
+ case 0: break; // No prefix!
}
// Handle REX prefix.
@@ -697,50 +731,446 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
MCE.emitByte(0x0F);
switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::T8XD: // F2 0F 38
- case X86II::T8XS: // F3 0F 38
- case X86II::T8: // 0F 38
- MCE.emitByte(0x38);
- break;
- case X86II::TAXD: // F2 0F 38
- case X86II::TA: // 0F 3A
- MCE.emitByte(0x3A);
- break;
- case X86II::A6: // 0F A6
- MCE.emitByte(0xA6);
- break;
- case X86II::A7: // 0F A7
- MCE.emitByte(0xA7);
- break;
+ case X86II::T8XD: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TAXD: // F2 0F 38
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
+ }
+}
+
+// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+// 0-7 and the difference between the 2 groups is given by the REX prefix.
+// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+// in 1's complement form, example:
+//
+// ModRM field => XMM9 => 1
+// VEX.VVVV => XMM9 => ~9
+//
+// See table 4-35 of Intel AVX Programming Reference for details.
+static unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = X86_MC::getX86RegNum(MI.getOperand(OpNum).getReg());
+ if (X86II::isX86_64ExtendedReg(SrcReg))
+ SrcRegNum |= 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI) const {
+ switch (TSFlags & X86II::SegOvrMask) {
+ default: llvm_unreachable("Invalid segment!");
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: llvm_unreachable("Unknown segment register!");
+ case 0: break;
+ case X86::CS: MCE.emitByte(0x2E); break;
+ case X86::SS: MCE.emitByte(0x36); break;
+ case X86::DS: MCE.emitByte(0x3E); break;
+ case X86::ES: MCE.emitByte(0x26); break;
+ case X86::FS: MCE.emitByte(0x64); break;
+ case X86::GS: MCE.emitByte(0x65); break;
+ }
+ }
+ break;
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ VEX_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ VEX_L = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: llvm_unreachable("Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::T8XS: // F3 0F 38
+ VEX_PP = 0x2;
+ VEX_5M = 0x2;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::TAXD: // F2 0F 3A
+ VEX_PP = 0x3;
+ VEX_5M = 0x3;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+
+ // Set the vector length to 256-bit if YMM0-YMM15 is used
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ if (MI.getOperand(i).isImplicit())
+ continue;
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+ VEX_L = 1;
+ }
+
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ // Duplicate register.
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ break;
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MachineOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem:
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_R = 0x0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 1);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ // src1(VEX_4V), MemAddr
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
+ case X86II::MRMSrcReg:
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ CurOp++;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
+ break;
+ }
+
+ // Emit segment override opcode prefix as needed.
+ emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
+ MCE.emitByte(0xC5);
+ MCE.emitByte(LastByte | (VEX_R << 7));
+ return;
+ }
+
+ // 3 byte VEX prefix
+ MCE.emitByte(XOP ? 0x8F : 0xC4);
+ MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M);
+ MCE.emitByte(LastByte | (VEX_W << 7));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+ const MCInstrDesc *Desc) {
+ DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
+ case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
+ case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
+ case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
+ case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
+ case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
+ case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
+ case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
+ case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
+ case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
+ case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
+ case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
+ case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
+ case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
+ case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
+ case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
+ case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
}
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ unsigned Opcode = Desc->Opcode;
+
// If this is a two-address instruction, skip one of the register operands.
unsigned NumOps = Desc->getNumOperands();
unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
+ if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
++CurOp;
- else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
+ else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+
+ uint64_t TSFlags = Desc->TSFlags;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ const unsigned MemOp4_I8IMMOperand = 2;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
+ else
+ emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
- switch (Desc->TSFlags & X86II::FormMask) {
+ switch (TSFlags & X86II::FormMask) {
default:
llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
case X86II::Pseudo:
// Remember the current PC offset, this is the PIC relocation
// base address.
switch (Opcode) {
- default:
+ default:
llvm_unreachable("pseudo instructions should be removed before code"
" emission");
- break;
// Do nothing for Int_MemBarrier - it's just a comment. Add a debug
// to make it slightly easier to see.
case X86::Int_MemBarrier:
DEBUG(dbgs() << "#MEMBARRIER\n");
break;
-
+
case TargetOpcode::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -752,7 +1182,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
-
+
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
break;
@@ -774,7 +1204,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
if (CurOp == NumOps)
break;
-
+
const MachineOperand &MO = MI.getOperand(CurOp++);
DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
@@ -787,13 +1217,13 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
emitPCRelativeBlockAddress(MO.getMBB());
break;
}
-
+
if (MO.isGlobal()) {
emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
MO.getOffset(), 0);
break;
}
-
+
if (MO.isSymbol()) {
emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
break;
@@ -804,7 +1234,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
break;
}
-
+
assert(MO.isImm() && "Unknown RawFrm operand!");
if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
// Fix up immediate operand for pc relative calls.
@@ -815,21 +1245,21 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
break;
}
-
+
case X86II::AddRegFrm: {
MCE.emitByte(BaseOpcode +
X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg()));
-
+
if (CurOp == NumOps)
break;
-
+
const MachineOperand &MO1 = MI.getOperand(CurOp++);
unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
if (MO1.isImm()) {
emitConstant(MO1.getImm(), Size);
break;
}
-
+
unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
: (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
if (Opcode == X86::MOV64ri64i32)
@@ -855,46 +1285,57 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg()));
CurOp += 2;
- if (CurOp != NumOps)
- emitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(Desc->TSFlags));
break;
}
case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
emitMemModRMByte(MI, CurOp,
- X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
- .getReg()));
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps)
- emitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(Desc->TSFlags));
+ X86_MC::getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
break;
}
- case X86II::MRMSrcReg:
+ case X86II::MRMSrcReg: {
MCE.emitByte(BaseOpcode);
- emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
+ if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
+ ++SrcRegNum;
+
+ emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(),
X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
- CurOp += 2;
- if (CurOp != NumOps)
- emitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(Desc->TSFlags));
+ // 2 operands skipped with HasMemOp4, compensate accordingly
+ CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
+ if (HasVEX_4VOp3)
+ ++CurOp;
break;
-
+ }
case X86II::MRMSrcMem: {
int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
+
+ MCE.emitByte(BaseOpcode);
intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
X86II::getSizeOfImm(Desc->TSFlags) : 0;
-
- MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, CurOp+1,
+ emitMemModRMByte(MI, FirstMemOp,
X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
CurOp += AddrOperands + 1;
- if (CurOp != NumOps)
- emitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(Desc->TSFlags));
+ if (HasVEX_4VOp3)
+ ++CurOp;
break;
}
@@ -902,20 +1343,22 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r: {
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ ++CurOp;
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
(Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
if (CurOp == NumOps)
break;
-
+
const MachineOperand &MO1 = MI.getOperand(CurOp++);
unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
if (MO1.isImm()) {
emitConstant(MO1.getImm(), Size);
break;
}
-
+
unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
: (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
if (Opcode == X86::MOV64ri32)
@@ -937,8 +1380,10 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m: {
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ ++CurOp;
intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
- (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
+ (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
MCE.emitByte(BaseOpcode);
@@ -948,14 +1393,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
if (CurOp == NumOps)
break;
-
+
const MachineOperand &MO = MI.getOperand(CurOp++);
unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
if (MO.isImm()) {
emitConstant(MO.getImm(), Size);
break;
}
-
+
unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
: (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
if (Opcode == X86::MOV64mi32)
@@ -980,7 +1425,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
++CurOp;
break;
-
+
case X86II::MRM_C1:
MCE.emitByte(BaseOpcode);
MCE.emitByte(0xC1);
@@ -1003,6 +1448,33 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
break;
}
+ while (CurOp != NumOps && NumOps - CurOp <= 2) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte.
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
+ : CurOp);
+ ++CurOp;
+ unsigned RegNum = X86_MC::getX86RegNum(MO.getReg()) << 4;
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
+ RegNum |= 1 << 7;
+ // If there is an additional 5th operand it must be an immediate, which
+ // is encoded in bits[3:0]
+ if (CurOp != NumOps) {
+ const MachineOperand &MIMM = MI.getOperand(CurOp++);
+ if (MIMM.isImm()) {
+ unsigned Val = MIMM.getImm();
+ assert(Val < 16 && "Immediate operand value out of range");
+ RegNum |= Val;
+ }
+ }
+ emitConstant(RegNum, 1);
+ } else {
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86II::getSizeOfImm(Desc->TSFlags));
+ }
+ }
+
if (!MI.isVariadic() && CurOp != NumOps) {
#ifndef NDEBUG
dbgs() << "Cannot encode all operands of: " << MI << "\n";
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 69752c5c5ddc..e5952aae16de 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -57,7 +57,9 @@ class X86FastISel : public FastISel {
bool X86ScalarSSEf32;
public:
- explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
- return false;
+ return false;
if (VT == MVT::f32 && !X86ScalarSSEf32)
- return false;
+ return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
return false;
@@ -183,37 +185,37 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
case MVT::i1:
case MVT::i8:
Opc = X86::MOV8rm;
- RC = X86::GR8RegisterClass;
+ RC = &X86::GR8RegClass;
break;
case MVT::i16:
Opc = X86::MOV16rm;
- RC = X86::GR16RegisterClass;
+ RC = &X86::GR16RegClass;
break;
case MVT::i32:
Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
break;
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
break;
case MVT::f32:
if (X86ScalarSSEf32) {
Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
- RC = X86::FR32RegisterClass;
+ RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
- RC = X86::RFP32RegisterClass;
+ RC = &X86::RFP32RegClass;
}
break;
case MVT::f64:
if (X86ScalarSSEf64) {
Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
- RC = X86::FR64RegisterClass;
+ RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
- RC = X86::RFP64RegisterClass;
+ RC = &X86::RFP64RegClass;
}
break;
case MVT::f80:
@@ -240,7 +242,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
default: return false;
case MVT::i1: {
// Mask out all but lowest bit.
- unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
Val = AndResult;
@@ -547,13 +549,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
if (Subtarget->isPICStyleRIPRel())
StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
}
LoadReg = createResultReg(RC);
@@ -743,7 +745,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
@@ -1258,7 +1260,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
if (V->getType()->isFloatTy()) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
- unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+ unsigned ResultReg = createResultReg(&X86::FR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::CVTSS2SDrr), ResultReg)
.addReg(OpReg);
@@ -1277,7 +1279,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
if (V->getType()->isDoubleTy()) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
- unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+ unsigned ResultReg = createResultReg(&X86::FR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::CVTSD2SSrr), ResultReg)
.addReg(OpReg);
@@ -1314,8 +1316,9 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
if (!Subtarget->is64Bit()) {
// If we're on x86-32; we can't extract an i8 from a general register.
// First issue a copy to GR16_ABCD or GR32_ABCD.
- const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
- ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
+ (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
+ (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
unsigned CopyReg = createResultReg(CopyRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
CopyReg).addReg(InputReg);
@@ -1423,7 +1426,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return DoSelectCall(&I, "memset");
}
case Intrinsic::stackprotector: {
- // Emit code inline code to store the stack guard onto the stack.
+ // Emit code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
const Value *Op1 = I.getArgOperand(0); // The guard's value.
@@ -1484,7 +1487,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return false;
// The call to CreateRegs builds two sequential registers, to store the
- // both the the returned values.
+ // both the returned values.
unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
.addReg(Reg1).addReg(Reg2);
@@ -1515,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
return DoSelectCall(I, 0);
}
+static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
+ const ImmutableCallSite &CS) {
+ if (Subtarget.is64Bit())
+ return 0;
+ if (Subtarget.isTargetWindows())
+ return 0;
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+ return 0;
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
+ return 0;
+ if (CS.paramHasAttr(1, Attribute::InReg))
+ return 0;
+ return 4;
+}
+
// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
const CallInst *CI = cast<CallInst>(I);
@@ -1548,12 +1567,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- SmallVector<uint64_t, 4> Offsets;
GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
- Outs, TLI, &Offsets);
+ Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- *FuncInfo.MF, FTy->isVarArg(),
- Outs, FTy->getContext());
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
if (!CanLowerReturn)
return false;
@@ -1667,7 +1685,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
- I->getParent()->getContext());
+ I->getParent()->getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64())
@@ -1693,7 +1711,6 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt: {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
@@ -1737,6 +1754,14 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
ArgVT = VA.getLocVT();
break;
}
+ case CCValAssign::VExt:
+ // VExt has not been implemented, so this should be impossible to reach
+ // for now. However, fallback to Selection DAG isel once implemented.
+ return false;
+ case CCValAssign::Indirect:
+ // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
+ // support this.
+ return false;
}
if (VA.isRegLoc()) {
@@ -1838,27 +1863,24 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
MIB.addGlobalAddress(GV, 0, OpFlags);
}
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
- MIB.addReg(X86::EBX);
+ MIB.addReg(X86::EBX, RegState::Implicit);
if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
- MIB.addReg(X86::AL);
+ MIB.addReg(X86::AL, RegState::Implicit);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i]);
-
- // Add a register mask with the call-preserved registers.
- // Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+ MIB.addReg(RegArgs[i], RegState::Implicit);
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- unsigned NumBytesCallee = 0;
- if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() &&
- CS.paramHasAttr(1, Attribute::StructRet))
- NumBytesCallee = 4;
+ const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesCallee);
@@ -1889,7 +1911,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
SmallVector<unsigned, 4> UsedRegs;
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
- I->getParent()->getContext());
+ I->getParent()->getContext());
unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1903,7 +1925,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
RVLocs[i].getLocReg() == X86::ST1)) {
if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
CopyVT = MVT::f80;
- CopyReg = createResultReg(X86::RFP80RegisterClass);
+ CopyReg = createResultReg(&X86::RFP80RegClass);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
CopyReg);
@@ -2001,37 +2023,37 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
default: return false;
case MVT::i8:
Opc = X86::MOV8rm;
- RC = X86::GR8RegisterClass;
+ RC = &X86::GR8RegClass;
break;
case MVT::i16:
Opc = X86::MOV16rm;
- RC = X86::GR16RegisterClass;
+ RC = &X86::GR16RegClass;
break;
case MVT::i32:
Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
break;
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
break;
case MVT::f32:
if (X86ScalarSSEf32) {
Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
- RC = X86::FR32RegisterClass;
+ RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
- RC = X86::RFP32RegisterClass;
+ RC = &X86::RFP32RegClass;
}
break;
case MVT::f64:
if (X86ScalarSSEf64) {
Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
- RC = X86::FR64RegisterClass;
+ RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
- RC = X86::RFP64RegisterClass;
+ RC = &X86::RFP64RegClass;
}
break;
case MVT::f80:
@@ -2120,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
- case MVT::f32:
- if (X86ScalarSSEf32) {
- Opc = X86::FsFLD0SS;
- RC = X86::FR32RegisterClass;
- } else {
- Opc = X86::LD_Fp032;
- RC = X86::RFP32RegisterClass;
- }
- break;
- case MVT::f64:
- if (X86ScalarSSEf64) {
- Opc = X86::FsFLD0SD;
- RC = X86::FR64RegisterClass;
- } else {
- Opc = X86::LD_Fp064;
- RC = X86::RFP64RegisterClass;
- }
- break;
- case MVT::f80:
- // No f80 support yet.
- return false;
+ default: return false;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
}
unsigned ResultReg = createResultReg(RC);
@@ -2160,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
- X86InstrInfo &XII = (X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo&)TII;
unsigned Size = TD.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
@@ -2179,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
namespace llvm {
- FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
- return new X86FastISel(funcInfo);
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new X86FastISel(funcInfo, libInfo);
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index ed1707da13d8..955c75aa563f 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -130,7 +130,7 @@ namespace {
// The hardware keeps track of how many FP registers are live, so we have
// to model that exactly. Usually, each live register corresponds to an
// FP<n> register, but when dealing with calls, returns, and inline
- // assembly, it is sometimes neccesary to have live scratch registers.
+ // assembly, it is sometimes necessary to have live scratch registers.
unsigned Stack[8]; // FP<n> Registers in each stack slot...
unsigned StackTop; // The current top of the FP stack.
@@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
// Change from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(0); // Remove the explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// Result gets pushed on the stack.
pushReg(DestReg);
}
@@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
-
+
// Convert from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
@@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
MI->RemoveOperand(1);
MI->getOperand(0).setReg(getSTReg(Op1));
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// If we kill the second operand, make sure to pop it from the stack.
if (Op0 != Op1 && KillsOp1) {
// Get this value off of the register stack.
@@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Assert that the top of stack contains the right FP register.
assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
"Top of stack not the right register for RET!");
-
+
// Ok, everything is good, mark the value as not being on the stack
// anymore so that our assertion about the stack being empty at end of
// block doesn't fire.
StackTop = 0;
return;
}
-
+
// Otherwise, we are returning two values:
// 2) If returning the same value for both, we only have one thing in the FP
// stack. Consider: RET FP1, FP1
if (StackTop == 1) {
assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
"Stack misconfiguration for RET!");
-
+
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
-
+
/// Okay we know we have two different FPx operands now:
assert(StackTop == 2 && "Must have two values live!");
-
+
/// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
/// in ST(1). In this case, emit an fxch.
if (getStackEntry(0) == SecondFPRegOp) {
assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
moveToTop(FirstFPRegOp, MI);
}
-
+
/// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
/// ST(1). Just remove both from our understanding of the stack and return.
assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 000e3757cf76..22386885b6be 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -45,14 +45,14 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = TM.getRegisterInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- RI->needsStackRealignment(MF) ||
+ RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit());
+ MMI.callsUnwindInit() || MMI.callsEHReturn());
}
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
@@ -125,8 +125,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- for (const uint16_t *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
- Uses.insert(*AsI);
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
}
const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
@@ -369,7 +369,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
/// getCompactUnwindRegNum - Get the compact unwind number for a given
/// register. The number corresponds to the enum lists in
/// compact_unwind_encoding.h.
-static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
+static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) {
for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
@@ -398,13 +398,13 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
// 4 3
// 5 3
//
- static const unsigned CU32BitRegs[] = {
+ static const uint16_t CU32BitRegs[] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
- static const unsigned CU64BitRegs[] = {
+ static const uint16_t CU64BitRegs[] = {
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
};
- const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+ const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
@@ -466,13 +466,13 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
static uint32_t
encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
bool Is64Bit) {
- static const unsigned CU32BitRegs[] = {
+ static const uint16_t CU32BitRegs[] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
- static const unsigned CU64BitRegs[] = {
+ static const uint16_t CU64BitRegs[] = {
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
};
- const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+ const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
// Encode the registers in the order they were saved, 3-bits per register. The
// registers are numbered from 1 to CU_NUM_SAVED_REGS.
@@ -650,6 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
+ unsigned BasePtr = RegInfo->getBaseRegister();
DebugLoc DL;
// If we're forcing a stack realignment we can't rely on just the frame
@@ -721,10 +722,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- if (RegInfo->needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ if (RegInfo->needsStackRealignment(MF)) {
+ // Callee-saved registers are pushed on stack before the stack
+ // is realigned.
+ FrameSize -= X86FI->getCalleeSavedFrameSize();
+ NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+ } else {
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ }
// Get the offset of the stack slot for the EBP register, which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
@@ -781,19 +786,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(FramePtr);
-
- // Realign stack
- if (RegInfo->needsStackRealignment(MF)) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
- .addReg(StackPtr)
- .addImm(-MaxAlign)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
@@ -823,6 +815,27 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+ // Realign stack after we pushed callee-saved registers (so that we'll be
+ // able to calculate their offsets from the frame pointer).
+
+ // NOTE: We push the registers before realigning the stack, so
+ // vector callee-saved (xmm) registers may be saved w/o proper
+ // alignment in this way. However, currently these regs are saved in
+ // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
+ // this shouldn't be a problem.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert(HasFP && "There should be a frame pointer if stack is realigned.");
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ .addReg(StackPtr)
+ .addImm(-MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+
DL = MBB.findDebugLoc(MBBI);
// If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -913,6 +926,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
UseLEA, TII, *RegInfo);
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF)) {
+ // Update the frame pointer with the current stack pointer.
+ unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
@@ -997,10 +1022,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- if (RegInfo->needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
-
- NumBytes = FrameSize - CSSize;
+ if (RegInfo->needsStackRealignment(MF)) {
+ // Callee-saved registers were pushed on stack before the stack
+ // was realigned.
+ FrameSize -= CSSize;
+ NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+ } else {
+ NumBytes = FrameSize - CSSize;
+ }
// Pop EBP.
BuildMI(MBB, MBBI, DL,
@@ -1010,7 +1039,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
// Skip the callee-saved pop instructions.
- MachineBasicBlock::iterator LastCSPop = MBBI;
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
@@ -1021,6 +1049,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
+ MachineBasicBlock::iterator FirstCSPop = MBBI;
DL = MBBI->getDebugLoc();
@@ -1032,28 +1061,16 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// If dynamic alloca is used, then reset esp to point to the last callee-saved
// slot before popping them off! Same applies for the case, when stack was
// realigned.
- if (RegInfo->needsStackRealignment(MF)) {
- // We cannot use LEA here, because stack pointer was realigned. We need to
- // deallocate local frame back.
- if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
- *RegInfo);
- MBBI = prior(LastCSPop);
- }
-
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(FramePtr);
- } else if (MFI->hasVarSizedObjects()) {
- if (CSSize) {
- unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
- MachineInstr *MI =
- addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
- MBB.insert(MBBI, MI);
+ if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+ if (RegInfo->needsStackRealignment(MF))
+ MBBI = FirstCSPop;
+ if (CSSize != 0) {
+ unsigned Opc = getLEArOpcode(Is64Bit);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
} else {
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(FramePtr);
}
} else if (NumBytes) {
@@ -1124,8 +1141,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
MachineInstr *NewMI = prior(MBBI);
- for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
+ NewMI->copyImplicitOps(MBBI);
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
@@ -1142,16 +1158,25 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
- const X86RegisterInfo *RI =
+ const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
const MachineFrameInfo *MFI = MF.getFrameInfo();
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
uint64_t StackSize = MFI->getStackSize();
- if (RI->needsStackRealignment(MF)) {
+ if (RegInfo->hasBasePointer(MF)) {
+ assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
if (FI < 0) {
// Skip the saved EBP.
- Offset += RI->getSlotSize();
+ return Offset + RegInfo->getSlotSize();
+ } else {
+ assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
+ return Offset + StackSize;
+ }
+ } else if (RegInfo->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ return Offset + RegInfo->getSlotSize();
} else {
assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
return Offset + StackSize;
@@ -1162,7 +1187,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
return Offset + StackSize;
// Skip the saved EBP.
- Offset += RI->getSlotSize();
+ Offset += RegInfo->getSlotSize();
// Skip the RETADDR move area
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1174,6 +1199,22 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
return Offset;
}
+int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ // We can't calculate offset from frame pointer if the stack is realigned,
+ // so enforce usage of stack/base pointer. The base pointer is used when we
+ // have dynamic allocas in addition to dynamic realignment.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ else if (RegInfo->needsStackRealignment(MF))
+ FrameReg = RegInfo->getStackRegister();
+ else
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
+
bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -1307,6 +1348,10 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
"Slot for EBP register must be last in order to be found!");
(void)FrameIdx;
}
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
}
static bool
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index d55a49763a4d..dc515dc39c79 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -60,6 +60,8 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
};
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8e2b1d6b5dd2..27195b4522a6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -60,7 +60,7 @@ namespace {
int Base_FrameIndex;
unsigned Scale;
- SDValue IndexReg;
+ SDValue IndexReg;
int32_t Disp;
SDValue Segment;
const GlobalValue *GV;
@@ -80,11 +80,11 @@ namespace {
bool hasSymbolicDisplacement() const {
return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
}
-
+
bool hasBaseOrIndexReg() const {
return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
-
+
/// isRIPRelative - Return true if this addressing mode is already RIP
/// relative.
bool isRIPRelative() const {
@@ -94,7 +94,7 @@ namespace {
return RegNode->getReg() == X86::RIP;
return false;
}
-
+
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
Base_Reg = Reg;
@@ -104,7 +104,7 @@ namespace {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
if (Base_Reg.getNode() != 0)
- Base_Reg.getNode()->dump();
+ Base_Reg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
@@ -113,7 +113,7 @@ namespace {
if (IndexReg.getNode() != 0)
IndexReg.getNode()->dump();
else
- dbgs() << "nul";
+ dbgs() << "nul";
dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
@@ -187,6 +187,7 @@ namespace {
private:
SDNode *Select(SDNode *N);
+ SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
@@ -212,21 +213,21 @@ namespace {
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
-
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
-
+
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
- inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
@@ -425,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
-
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
@@ -461,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
++NumLoadMoved;
continue;
}
-
+
// Lower fpround and fpextend nodes that target the FP stack to be store and
// load to the stack. This is a gross hack. We would like to simply mark
// these as being illegal, but when we do that, legalize produces these when
@@ -472,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
-
+
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
@@ -495,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getConstantOperandVal(1))
continue;
}
-
+
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
@@ -504,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
MemVT = SrcIsSSE ? SrcVT : DstVT;
-
+
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
DebugLoc dl = N->getDebugLoc();
-
+
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
@@ -523,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// To avoid invalidating 'I', back it up to the convert node.
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-
+
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
CurDAG->DeleteNode(N);
- }
+ }
}
@@ -583,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
-
+
// load gs:0 -> GS segment register.
// load fs:0 -> FS segment register.
//
@@ -592,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
- Subtarget->isTargetELF())
+ Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -601,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
return false;
}
-
+
return true;
}
@@ -991,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case ISD::SHL:
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
break;
-
+
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
unsigned Val = CN->getZExtValue();
@@ -1166,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
-
+
// Try again after commuting the operands.
if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
!MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
@@ -1202,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM = Backup;
}
break;
-
+
case ISD::AND: {
// Perform some heroic transforms on an and of a constant-count shift
// with a constant to enable use of the scaled offset field.
@@ -1274,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
-
+
if (Parent &&
// This list of opcodes are all the nodes that have an "addr:$ptr" operand
// that are not a MemSDNode, and thus don't have proper addrspace info.
@@ -1289,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (AddrSpace == 257)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
-
+
if (MatchAddress(N, AM))
return false;
@@ -1335,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
// Check to see if the top elements are all zeros (or bitcast of zeros).
- N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
N.getOperand(0).getOperand(0).hasOneUse() &&
@@ -1410,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
// If it isn't worth using an LEA, reject it.
if (Complexity <= 2)
return false;
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1421,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
@@ -1434,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
} else {
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1448,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsProfitableToFold(N, P, P) ||
!IsLegalToFold(N, P, P, OptLevel))
return false;
-
+
return SelectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
@@ -1699,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
-
+
// Optimize common patterns for __sync_or_and_fetch and similar arith
// operations where the result is not used. This allows us to use the "lock"
// version of the arithmetic instruction.
@@ -1726,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
default:
return 0;
}
-
+
bool isCN = false;
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
isCN = true;
Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
}
-
+
unsigned Opc = 0;
switch (NVT.getSimpleVT().SimpleTy) {
default: return 0;
@@ -1771,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
}
break;
}
-
+
assert(Opc != 0 && "Invalid arith lock transform!");
DebugLoc dl = Node->getDebugLoc();
@@ -1851,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
/// is suitable for doing the {load; increment or decrement; store} to modify
/// transformation.
-static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue StoredVal, SelectionDAG *CurDAG,
LoadSDNode* &LoadNode, SDValue &InputChain) {
@@ -1875,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
// Return LoadNode by reference.
LoadNode = cast<LoadSDNode>(Load);
// is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
- EVT LdVT = LoadNode->getMemoryVT();
- if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
LdVT != MVT::i8)
return false;
// Is store the only read of the loaded value?
if (!Load.hasOneUse())
return false;
-
+
// Is the address of the store the same as the load?
if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
LoadNode->getOffset() != StoreNode->getOffset())
@@ -1905,6 +1906,20 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
ChainCheck = true;
continue;
}
+
+ // Make sure using Op as part of the chain would not cause a cycle here.
+ // In theory, we could check whether the chain node is a predecessor of
+ // the load. But that can be very expensive. Instead visit the uses and
+ // make sure they all have smaller node id than the load.
+ int LoadId = LoadNode->getNodeId();
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = UI->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != 0)
+ continue;
+ if (UI->getNodeId() > LoadId)
+ return false;
+ }
+
ChainOps.push_back(Op);
}
@@ -1938,12 +1953,44 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
+/// SelectGather - Customized ISel for GATHER operations.
+///
+SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+ // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
+ SDValue Chain = Node->getOperand(0);
+ SDValue VSrc = Node->getOperand(2);
+ SDValue Base = Node->getOperand(3);
+ SDValue VIdx = Node->getOperand(4);
+ SDValue VMask = Node->getOperand(5);
+ ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
+ if (!Scale)
+ return 0;
+
+ SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
+ MVT::Other);
+
+ // Memory Operands: Base, Scale, Index, Disp, Segment
+ SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i32);
+ const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+ Disp, Segment, VMask, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ VTs, Ops, array_lengthof(Ops));
+ // Node has 2 outputs: VDst and MVT::Other.
+ // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
+ // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
+ // of ResNode.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
+ return ResNode;
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-
+
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
@@ -1953,23 +2000,82 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_avx2_gather_d_pd:
+ case Intrinsic::x86_avx2_gather_d_pd_256:
+ case Intrinsic::x86_avx2_gather_q_pd:
+ case Intrinsic::x86_avx2_gather_q_pd_256:
+ case Intrinsic::x86_avx2_gather_d_ps:
+ case Intrinsic::x86_avx2_gather_d_ps_256:
+ case Intrinsic::x86_avx2_gather_q_ps:
+ case Intrinsic::x86_avx2_gather_q_ps_256:
+ case Intrinsic::x86_avx2_gather_d_q:
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ case Intrinsic::x86_avx2_gather_q_q:
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ case Intrinsic::x86_avx2_gather_d_d:
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ case Intrinsic::x86_avx2_gather_q_d:
+ case Intrinsic::x86_avx2_gather_q_d_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
+ case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
+ case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
+ case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
+ case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
+ case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
+ case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
+ case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
+ case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
+ }
+ SDNode *RetVal = SelectGather(Node, Opc);
+ if (RetVal)
+ // We already called ReplaceUses inside SelectGather.
+ return NULL;
+ break;
+ }
+ }
+ break;
+ }
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+
case X86ISD::ATOMOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMOR6432);
case X86ISD::ATOMXOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMXOR6432);
case X86ISD::ATOMADD64_DAG:
- return SelectAtomic64(Node, X86::ATOMADD6432);
case X86ISD::ATOMSUB64_DAG:
- return SelectAtomic64(Node, X86::ATOMSUB6432);
case X86ISD::ATOMNAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMNAND6432);
case X86ISD::ATOMAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMAND6432);
- case X86ISD::ATOMSWAP64_DAG:
- return SelectAtomic64(Node, X86::ATOMSWAP6432);
+ case X86ISD::ATOMSWAP64_DAG: {
+ unsigned Opc;
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
+ case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
+ case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
+ case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
+ case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
+ case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
+ case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
+ }
+ SDNode *RetVal = SelectAtomic64(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
@@ -2013,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
break;
- unsigned ShlOp, Op = 0;
+ unsigned ShlOp, Op;
EVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
@@ -2036,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ShlOp = X86::SHL32ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
@@ -2046,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ShlOp = X86::SHL64ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
@@ -2062,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
-
+
unsigned LoReg;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -2071,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
}
-
+
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
-
+
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
-
+
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
return NULL;
}
-
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@@ -2128,7 +2236,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
- N0, SDValue()).getValue(1);
+ N0, SDValue()).getValue(1);
if (foldedLoad) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
@@ -2168,7 +2276,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
+ LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
@@ -2181,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
@@ -2332,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- case X86ISD::CMP: {
+ case X86ISD::CMP:
+ case X86ISD::SUB: {
+ // Sometimes a SUB is used to perform comparison.
+ if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+ // This node is not a CMP.
+ break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -2449,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// a simple increment or decrement through memory of that value, if the
// uses of the modified value and its address are suitable.
// The DEC64m tablegen pattern is currently not able to match the case where
- // the EFLAGS on the original DEC are used. (This also applies to
+ // the EFLAGS on the original DEC are used. (This also applies to
// {INC,DEC}X{64,32,16,8}.)
// We'll need to improve tablegen to allow flags to be transferred from a
// node in the pattern to the result node. probably with a new keyword
@@ -2481,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
- EVT LdVT = LoadNode->getMemoryVT();
+ EVT LdVT = LoadNode->getMemoryVT();
unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
@@ -2494,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return Result;
}
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPESTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ SDValue N3 = Node->getOperand(3);
+ SDValue N4 = Node->getOperand(4);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ X86::EAX, N1, SDValue()).getValue(1);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
+ N3, InFlag).getValue(1);
+
+ SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
+ X86::PCMPESTRIrr;
+ InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPISTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
+ X86::PCMPISTRIrr;
+ SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
}
SDNode *ResNode = SelectCode(Node);
@@ -2521,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return true;
break;
}
-
+
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
@@ -2530,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return false;
}
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 04299f300801..ea66a6115d7e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -49,6 +49,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <bitset>
+#include <cctype>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -62,41 +63,33 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
/// simple subregister reference. Idx is an index in the 128 bits we
/// want. It need not be aligned to a 128-bit bounday. That makes
/// lowering EXTRACT_VECTOR_ELT operations easier.
-static SDValue Extract128BitVector(SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl) {
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, DebugLoc dl) {
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+ assert(VT.is256BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
- int Factor = VT.getSizeInBits()/128;
+ unsigned Factor = VT.getSizeInBits()/128;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
VT.getVectorNumElements()/Factor);
// Extract from UNDEF is UNDEF.
if (Vec.getOpcode() == ISD::UNDEF)
- return DAG.getNode(ISD::UNDEF, dl, ResultVT);
-
- if (isa<ConstantSDNode>(Idx)) {
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ return DAG.getUNDEF(ResultVT);
- // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
- // we can match to VEXTRACTF128.
- unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
- // This is the index of the first element of the 128-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
- * ElemsPerChunk);
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
- SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
- SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
- VecIdx);
-
- return Result;
- }
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
- return SDValue();
+ return Result;
}
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
@@ -104,34 +97,41 @@ static SDValue Extract128BitVector(SDValue Vec,
/// simple superregister reference. Idx is an index in the 128 bits
/// we want. It need not be aligned to a 128-bit bounday. That makes
/// lowering INSERT_VECTOR_ELT operations easier.
-static SDValue Insert128BitVector(SDValue Result,
- SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
+ unsigned IdxVal, SelectionDAG &DAG,
DebugLoc dl) {
- if (isa<ConstantSDNode>(Idx)) {
- EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
- EVT ElVT = VT.getVectorElementType();
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- EVT ResultVT = Result.getValueType();
+ EVT VT = Vec.getValueType();
+ assert(VT.is128BitVector() && "Unexpected vector size!");
- // Insert the relevant 128 bits.
- unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
+ EVT ElVT = VT.getVectorElementType();
+ EVT ResultVT = Result.getValueType();
- // This is the index of the first element of the 128-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
- * ElemsPerChunk);
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
- SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
- VecIdx);
- return Result;
- }
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
+ * ElemsPerChunk);
- return SDValue();
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
}
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
@@ -140,10 +140,12 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
if (Subtarget->isTargetEnvMacho()) {
if (is64Bit)
- return new X8664_MachoTargetObjectFile();
+ return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
}
+ if (Subtarget->isTargetLinux())
+ return new X86LinuxTargetObjectFile();
if (Subtarget->isTargetELF())
return new TargetLoweringObjectFileELF();
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
@@ -162,7 +164,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
TD = getTargetData();
// Set up the TargetLowering object.
- static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
+ static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
@@ -171,11 +173,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
- // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling.
- if (Subtarget->is64Bit())
+ // For Atom, always use ILP scheduling.
+ if (Subtarget->isAtom())
+ setSchedulingPreference(Sched::ILP);
+ else if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
- else if (Subtarget->isAtom())
- setSchedulingPreference(Sched::Hybrid);
else
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
@@ -215,11 +217,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// Set up the register classes.
- addRegisterClass(MVT::i8, X86::GR8RegisterClass);
- addRegisterClass(MVT::i16, X86::GR16RegisterClass);
- addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ addRegisterClass(MVT::i8, &X86::GR8RegClass);
+ addRegisterClass(MVT::i16, &X86::GR16RegClass);
+ addRegisterClass(MVT::i32, &X86::GR32RegClass);
if (Subtarget->is64Bit())
- addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+ addRegisterClass(MVT::i64, &X86::GR64RegClass);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -345,7 +347,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (unsigned i = 0, e = 4; i != e; ++i) {
+ for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
@@ -492,7 +494,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setShouldFoldAtomicFences(true);
// Expand certain atomics
- for (unsigned i = 0, e = 4; i != e; ++i) {
+ for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
@@ -567,8 +569,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
- addRegisterClass(MVT::f32, X86::FR32RegisterClass);
- addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+ addRegisterClass(MVT::f32, &X86::FR32RegClass);
+ addRegisterClass(MVT::f64, &X86::FR64RegClass);
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS , MVT::f64, Custom);
@@ -599,8 +601,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
} else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
- addRegisterClass(MVT::f32, X86::FR32RegisterClass);
- addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, &X86::FR32RegClass);
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
@@ -632,8 +634,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
// Set up the FP register classes.
- addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
- addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+ addRegisterClass(MVT::f32, &X86::RFP32RegClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
setOperationAction(ISD::UNDEF, MVT::f32, Expand);
@@ -660,7 +662,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Long double always uses X87.
if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+ addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
@@ -705,8 +707,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= MVT::LAST_VECTOR_VALUETYPE; ++VT) {
setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
@@ -729,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
@@ -764,8 +767,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction((MVT::SimpleValueType)VT,
(MVT::SimpleValueType)InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
@@ -776,7 +779,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
- addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+ addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
@@ -813,7 +816,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
- addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
@@ -826,18 +829,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
- addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
// registers cannot be used even for integer operations.
- addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
- addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
- addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
- addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
+ addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
+ addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
+ addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
setOperationAction(ISD::ADD, MVT::v16i8, Legal);
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
@@ -867,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
-
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
+ for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT,
- VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
@@ -903,24 +896,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v2i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1007,16 +999,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42())
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
-
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
- addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
- addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
- addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
+ addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
+ addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
+ addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
+ addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
+ addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
@@ -1040,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
-
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
@@ -1070,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ if (Subtarget->hasFMA()) {
+ setOperationAction(ISD::FMA, MVT::v8f32, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f64, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f32, Custom);
+ setOperationAction(ISD::FMA, MVT::v2f64, Custom);
+ setOperationAction(ISD::FMA, MVT::f32, Custom);
+ setOperationAction(ISD::FMA, MVT::f64, Custom);
+ }
+
if (Subtarget->hasAVX2()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
@@ -1121,60 +1112,60 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// Custom lower several nodes for 256-bit types.
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
if (VT.is128BitVector())
- setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v4i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
}
}
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
// of this type with custom code.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+ for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
+ VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
@@ -1218,17 +1209,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
- if (Subtarget->hasBMI())
- setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(ISD::XOR);
computeRegisterProperties();
@@ -1243,6 +1238,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setPrefLoopAlignment(4); // 2^4 bytes.
benefitFromCodePlacementOpt = true;
+ // Predictable cmov don't hurt on atom because it's in-order.
+ predictableSelectIsExpensive = !Subtarget->isAtom();
+
setPrefFunctionAlignment(4); // 2^4 bytes.
}
@@ -1276,7 +1274,6 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
break;
}
}
- return;
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -1411,18 +1408,19 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
- RRC = (Subtarget->is64Bit()
- ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+ RRC = Subtarget->is64Bit() ?
+ (const TargetRegisterClass*)&X86::GR64RegClass :
+ (const TargetRegisterClass*)&X86::GR32RegClass;
break;
case MVT::x86mmx:
- RRC = X86::VR64RegisterClass;
+ RRC = &X86::VR64RegClass;
break;
case MVT::f32: case MVT::f64:
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
case MVT::v4f64:
- RRC = X86::VR128RegisterClass;
+ RRC = &X86::VR128RegClass;
break;
}
return std::make_pair(RRC, Cost);
@@ -1457,7 +1455,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
+ MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -1501,6 +1499,16 @@ X86TargetLowering::LowerReturn(SDValue Chain,
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
+ // Promote values to the appropriate types
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::AExt)
+ ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
+ else if (VA.getLocInfo() == CCValAssign::BCvt)
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
+
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
@@ -1638,7 +1646,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -1655,7 +1663,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue Val;
// If this is a call to a function that returns an fp value on the floating
- // point stack, we must guarantee the the value is popped from the stack, so
+ // point stack, we must guarantee the value is popped from the stack, so
// a CopyFromReg is not good enough - the copy instruction may be eliminated
// if the return value is not used. We use the FpPOP_RETVAL instruction
// instead.
@@ -1699,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
-static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+enum StructReturnType {
+ NotStructReturn,
+ RegStructReturn,
+ StackStructReturn
+};
+static StructReturnType
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
- return false;
+ return NotStructReturn;
- return Outs[0].Flags.isSRet();
+ const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
}
/// ArgsAreStructReturn - Determines whether a function uses struct
/// return semantics.
-static bool
-ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+static StructReturnType
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
- return false;
+ return NotStructReturn;
- return Ins[0].Flags.isSRet();
+ const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
@@ -1850,19 +1874,19 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
else if (Is64Bit && RegVT == MVT::i64)
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
else if (RegVT == MVT::f32)
- RC = X86::FR32RegisterClass;
+ RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
- RC = X86::FR64RegisterClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
- RC = X86::VR256RegisterClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
- RC = X86::VR128RegisterClass;
+ RC = &X86::FR64RegClass;
+ else if (RegVT.is256BitVector())
+ RC = &X86::VR256RegClass;
+ else if (RegVT.is128BitVector())
+ RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
- RC = X86::VR64RegisterClass;
+ RC = &X86::VR64RegClass;
else
llvm_unreachable("Unknown argument type!");
@@ -2004,7 +2028,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass);
+ &X86::GR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2020,7 +2044,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
- unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
@@ -2031,7 +2055,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
- X86::VR128RegisterClass);
+ &X86::VR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
@@ -2054,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
- ArgsAreStructReturn(Ins))
+ argsAreStructReturn(Ins) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -2127,19 +2151,24 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
}
SDValue
-X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool &isTailCall = CLI.IsTailCall;
+ bool isVarArg = CLI.IsVarArg;
+
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
bool IsWindows = Subtarget->isTargetWindows();
- bool IsStructRet = CallIsStructReturn(Outs);
+ StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
if (MF.getTarget().Options.DisableTailCalls)
@@ -2148,8 +2177,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, OutVals, Ins, DAG);
+ isVarArg, SR != NotStructReturn,
+ MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -2231,7 +2261,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
- if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
@@ -2282,27 +2312,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDValue InFlag;
- // Tail call byval lowering might overwrite argument registers so in case of
- // tail call optimization the copies to registers are lowered later.
- if (!isTailCall)
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
- Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc(), getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy())));
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
@@ -2340,12 +2355,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
- Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
- DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
+ DAG.getConstant(NumXMMRegs, MVT::i8)));
}
-
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
@@ -2359,8 +2372,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
- // Do not flag preceding copytoreg stuff together with the following stuff.
- InFlag = SDValue();
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -2400,19 +2411,20 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
- // Copy arguments to their registers.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
- InFlag =SDValue();
-
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
FPDiff, dl);
}
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
@@ -2514,14 +2526,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // Add an implicit use GOT pointer in EBX.
- if (!isTailCall && Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
- // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
- if (Is64Bit && isVarArg && !IsWin64)
- Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
-
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
@@ -2551,7 +2555,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
- IsStructRet)
+ SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2743,7 +2747,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -2764,7 +2768,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
@@ -2778,12 +2782,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext());
+ getTargetMachine(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext());
+ getTargetMachine(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -2810,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64()) {
@@ -2872,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
FastISel *
-X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return X86::createFastISel(funcInfo);
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return X86::createFastISel(funcInfo, libInfo);
}
@@ -2911,6 +2916,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::UNPCKH:
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
+ case X86ISD::VPERMI:
return true;
}
}
@@ -3051,10 +3057,12 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_NS;
- } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ }
+ if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
- } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ }
+ if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_LE;
@@ -3170,12 +3178,12 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
return false;
}
-/// isSequentialOrUndefInRange - Return true if every element in Mask, begining
+/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (L, L+Pos]. or is undef.
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
- int Pos, int Size, int Low) {
- for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
if (!isUndefOrEqual(Mask[i], Low))
return false;
return true;
@@ -3194,8 +3202,8 @@ static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT) {
- if (VT != MVT::v8i16)
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+ if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
return false;
// Lower quadword copied in order or undef.
@@ -3204,16 +3212,27 @@ static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT) {
// Upper quadword shuffled.
for (unsigned i = 4; i != 8; ++i)
- if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ if (!isUndefOrInRange(Mask[i], 4, 8))
return false;
+ if (VT == MVT::v16i16) {
+ // Lower quadword copied in order or undef.
+ if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
+ return false;
+
+ // Upper quadword shuffled.
+ for (unsigned i = 12; i != 16; ++i)
+ if (!isUndefOrInRange(Mask[i], 12, 16))
+ return false;
+ }
+
return true;
}
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT) {
- if (VT != MVT::v8i16)
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+ if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
return false;
// Upper quadword copied in order.
@@ -3222,9 +3241,20 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT) {
// Lower quadword shuffled.
for (unsigned i = 0; i != 4; ++i)
- if (Mask[i] >= 4)
+ if (!isUndefOrInRange(Mask[i], 0, 4))
return false;
+ if (VT == MVT::v16i16) {
+ // Upper quadword copied in order.
+ if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
+ return false;
+
+ // Lower quadword shuffled.
+ for (unsigned i = 8; i != 12; ++i)
+ if (!isUndefOrInRange(Mask[i], 8, 12))
+ return false;
+ }
+
return true;
}
@@ -3374,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3393,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3410,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3418,11 +3448,11 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4)
return false;
- for (unsigned i = 0; i != NumElems/2; ++i)
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i + NumElems))
return false;
- for (unsigned i = NumElems/2; i != NumElems; ++i)
+ for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
@@ -3432,23 +3462,71 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
unsigned NumElems = VT.getVectorNumElements();
- if ((NumElems != 2 && NumElems != 4)
- || VT.getSizeInBits() > 128)
+ if (NumElems != 2 && NumElems != 4)
return false;
- for (unsigned i = 0; i != NumElems/2; ++i)
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
- for (unsigned i = 0; i != NumElems/2; ++i)
- if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems))
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask[i + e], i + NumElems))
return false;
return true;
}
+//
+// Some special combinations that can be optimized.
+//
+static
+SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ DebugLoc dl = SVOp->getDebugLoc();
+
+ if (VT != MVT::v8i32 && VT != MVT::v8f32)
+ return SDValue();
+
+ ArrayRef<int> Mask = SVOp->getMask();
+
+ // These are the special masks that may be optimized.
+ static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ bool MatchEvenMask = true;
+ bool MatchOddMask = true;
+ for (int i=0; i<8; ++i) {
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
+ MatchEvenMask = false;
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
+ MatchOddMask = false;
+ }
+ static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
+ static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
+
+ const int *CompactionMask;
+ if (MatchEvenMask)
+ CompactionMask = CompactionMaskEven;
+ else if (MatchOddMask)
+ CompactionMask = CompactionMaskOdd;
+ else
+ return SDValue();
+
+ SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
+
+ SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
+ UndefNode, CompactionMask);
+ SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
+ UndefNode, CompactionMask);
+ static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
+ return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
+}
+
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
@@ -3606,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3628,7 +3706,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || VT.getSizeInBits() != 256)
+ if (!HasAVX || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3720,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// element of vector 2 and the other elements to come from vector 1 in order.
static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
- unsigned NumOps = VT.getVectorNumElements();
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
+
+ unsigned NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
@@ -3788,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (!HasAVX || !VT.is256BitVector())
+ return false;
- if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts != 4)
return false;
for (unsigned i = 0; i != NumElts/2; ++i)
@@ -3806,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned e = VT.getVectorNumElements() / 2;
@@ -3880,9 +3961,8 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
if (Elt < 0) continue;
- Elt %= NumLaneElts;
- unsigned ShAmt = i << Shift;
- if (ShAmt >= 8) ShAmt -= 8;
+ Elt &= NumLaneElts - 1;
+ unsigned ShAmt = (i << Shift) % 8;
Mask |= Elt << ShAmt;
}
@@ -3892,30 +3972,48 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
+ "Unsupported vector type for PSHUFHW");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
unsigned Mask = 0;
- // 8 nodes, but we only care about the last 4.
- for (unsigned i = 7; i >= 4; --i) {
- int Val = N->getMaskElt(i);
- if (Val >= 0)
- Mask |= (Val - 4);
- if (i != 4)
- Mask <<= 2;
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ // 8 nodes per lane, but we only care about the last 4.
+ for (unsigned i = 0; i < 4; ++i) {
+ int Elt = N->getMaskElt(l+i+4);
+ if (Elt < 0) continue;
+ Elt &= 0x3; // only 2-bits.
+ Mask |= Elt << (i * 2);
+ }
}
+
return Mask;
}
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
+ "Unsupported vector type for PSHUFHW");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
unsigned Mask = 0;
- // 8 nodes, but we only care about the first 4.
- for (int i = 3; i >= 0; --i) {
- int Val = N->getMaskElt(i);
- if (Val >= 0)
- Mask |= Val;
- if (i != 0)
- Mask <<= 2;
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ // 8 nodes per lane, but we only care about the first 4.
+ for (unsigned i = 0; i < 4; ++i) {
+ int Elt = N->getMaskElt(l+i);
+ if (Elt < 0) continue;
+ Elt &= 0x3; // only 2-bits
+ Mask |= Elt << (i * 2);
+ }
}
+
return Mask;
}
@@ -4016,13 +4114,14 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i != NumElems; ++i) {
- int idx = SVOp->getMaskElt(i);
- if (idx < 0)
- MaskVec.push_back(idx);
- else if (idx < (int)NumElems)
- MaskVec.push_back(idx + NumElems);
- else
- MaskVec.push_back(idx - NumElems);
+ int Idx = SVOp->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElems)
+ Idx += NumElems;
+ else
+ Idx -= NumElems;
+ }
+ MaskVec.push_back(Idx);
}
return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
SVOp->getOperand(0), &MaskVec[0]);
@@ -4033,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
return false;
@@ -4090,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) {
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
@@ -4107,7 +4206,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
- for (unsigned i = NumElems/2; i != NumElems; ++i)
+ for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i+NumElems))
return false;
return true;
@@ -4159,11 +4258,12 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
+ unsigned Size = VT.getSizeInBits();
// Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (VT.getSizeInBits() == 128) { // SSE
+ if (Size == 128) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -4171,7 +4271,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
- } else if (VT.getSizeInBits() == 256) { // AVX
+ } else if (Size == 256) { // AVX
if (Subtarget->hasAVX2()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
@@ -4183,7 +4283,9 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
- }
+ } else
+ llvm_unreachable("Unexpected vector type");
+
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -4194,25 +4296,22 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- assert((VT.is128BitVector() || VT.is256BitVector())
- && "Expected a 128-bit or 256-bit vector type");
+ unsigned Size = VT.getSizeInBits();
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 256) {
+ if (Size == 256) {
if (HasAVX2) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
} else { // AVX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
- Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
- Vec = Insert128BitVector(InsV, Vec,
- DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
}
- } else {
+ } else if (Size == 128) {
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- }
+ } else
+ llvm_unreachable("Unexpected vector type");
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -4255,9 +4354,8 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
- unsigned Half = NumElems/2;
SmallVector<int, 8> Mask;
- for (unsigned i = 0; i != Half; ++i) {
+ for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
Mask.push_back(i + Half);
Mask.push_back(i + NumElems + Half);
}
@@ -4289,15 +4387,14 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
EVT VT = V.getValueType();
DebugLoc dl = V.getDebugLoc();
- assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
- && "Vector size not supported");
+ unsigned Size = VT.getSizeInBits();
- if (VT.getSizeInBits() == 128) {
+ if (Size == 128) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
&SplatMask[0]);
- } else {
+ } else if (Size == 256) {
// To use VPERMILPS to splat scalars, the second half of indicies must
// refer to the higher part, which is a duplication of the lower one,
// because VPERMILPS can only handle in-lane permutations.
@@ -4307,7 +4404,8 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
&SplatMask[0]);
- }
+ } else
+ llvm_unreachable("Vector size not supported");
return DAG.getNode(ISD::BITCAST, dl, VT, V);
}
@@ -4328,9 +4426,8 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
if (Size == 256) {
- unsigned Idx = (EltNo >= NumElems/2) ? NumElems/2 : 0;
- V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
- if (Idx > 0)
+ V1 = Extract128BitVector(V1, EltNo, DAG, dl);
+ if (EltNo >= NumElems/2)
EltNo -= NumElems/2;
}
@@ -4346,10 +4443,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// into the low and high part. This is necessary because we want
// to use VPERM* to shuffle the vectors
if (Size == 256) {
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1,
- DAG.getConstant(0, MVT::i32), DAG, dl);
- V1 = Insert128BitVector(InsV, V1,
- DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
}
return getLegalSplat(DAG, V1, EltNo);
@@ -4377,7 +4471,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
/// target specific opcode. Returns true if the Mask could be calculated.
/// Sets IsUnary to true if only uses one source.
-static bool getTargetShuffleMask(SDNode *N, EVT VT,
+static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
@@ -4408,12 +4502,17 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT,
break;
case X86ISD::PSHUFHW:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::VPERMI:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
@@ -4473,20 +4572,21 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
- unsigned NumElems = VT.getVectorNumElements();
+ MVT ShufVT = V.getValueType().getSimpleVT();
+ unsigned NumElems = ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
SDValue ImmN;
bool IsUnary;
- if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
+ if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
if (Elt < 0)
- return DAG.getUNDEF(VT.getVectorElementType());
+ return DAG.getUNDEF(ShufVT.getVectorElementType());
SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
- : N->getOperand(1);
+ : N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -4631,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
- if (SVOp->getValueType(0).getSizeInBits() > 128)
+ if (!SVOp->getValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -4726,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
+ assert(VT.is128BitVector() && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -4794,7 +4894,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
int EltNo = (Offset - StartOffset) >> 2;
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
@@ -4802,7 +4902,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
false, false, false, 0);
SmallVector<int, 8> Mask;
- for (int i = 0; i < NumElems; ++i)
+ for (unsigned i = 0; i != NumElems; ++i)
Mask.push_back(EltNo);
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
@@ -4866,8 +4966,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->isInvariant(), LDBase->getAlignment());
- } else if (NumElems == 4 && LastLoadedElt == 1 &&
- DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
+ }
+ if (NumElems == 4 && LastLoadedElt == 1 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode =
@@ -4896,6 +4997,9 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for broadcast.");
+
SDValue Ld;
bool ConstSplatVal;
@@ -4930,8 +5034,17 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
return SDValue();
SDValue Sc = Op.getOperand(0);
- if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR)
- return SDValue();
+ if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
+ Sc.getOpcode() != ISD::BUILD_VECTOR) {
+
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+
+ // Use the register form of the broadcast instruction available on AVX2.
+ if (VT.is256BitVector())
+ Sc = Extract128BitVector(Sc, 0, DAG, dl);
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
+ }
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
@@ -4946,8 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
}
}
- bool Is256 = VT.getSizeInBits() == 256;
- bool Is128 = VT.getSizeInBits() == 128;
+ bool Is256 = VT.is256BitVector();
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
@@ -4957,9 +5069,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
assert(!CVT.isVector() && "Must not broadcast a vector type");
unsigned ScalarSize = CVT.getSizeInBits();
- if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
- (Is128 && (ScalarSize == 32))) {
-
+ if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) {
const Constant *C = 0;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
@@ -4971,40 +5081,32 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
SDValue CP = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
}
- // The scalar source must be a normal load.
- if (!ISD::isNormalLoad(Ld.getNode()))
- return SDValue();
-
- // Reject loads that have uses of the chain result
- if (Ld->hasAnyUseOfValue(1))
- return SDValue();
-
+ bool IsLoad = ISD::isNormalLoad(Ld.getNode());
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
- // VBroadcast to YMM
- if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
+ // Handle AVX2 in-register broadcasts.
+ if (!IsLoad && Subtarget->hasAVX2() &&
+ (ScalarSize == 32 || (Is256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
- // VBroadcast to XMM
- if (Is128 && (ScalarSize == 32))
+ // The scalar source must be a normal load.
+ if (!IsLoad)
+ return SDValue();
+
+ if (ScalarSize == 32 || (Is256 && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
- // double since there is vbroadcastsd xmm
+ // double since there is no vbroadcastsd xmm
if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
- // VBroadcast to YMM
- if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
-
- // VBroadcast to XMM
- if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64))
+ if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
@@ -5102,8 +5204,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Mask.push_back(Idx);
for (unsigned i = 1; i != VecElts; ++i)
Mask.push_back(i);
- Item = DAG.getVectorShuffle(VecVT, dl, Item,
- DAG.getUNDEF(Item.getValueType()),
+ Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
&Mask[0]);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5120,12 +5221,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5134,12 +5235,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
- Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32),
- DAG, dl);
+ Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
} else {
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5171,7 +5271,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
- for (unsigned i = 0; i < NumElems; i++)
+ for (unsigned i = 0; i != NumElems; ++i)
MaskVec.push_back(i == Idx ? 0 : 1);
return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
}
@@ -5199,7 +5299,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SmallVector<SDValue, 32> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
@@ -5212,10 +5312,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
NumElems/2);
// Recreate the wider vector with the lower and upper part.
- SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower,
- DAG.getConstant(0, MVT::i32), DAG, dl);
- return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32),
- DAG, dl);
+ return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
}
// Let legalizer expand 2-wide build_vectors.
@@ -5283,7 +5380,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ if (Values.size() > 1 && VT.is128BitVector()) {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
@@ -5344,62 +5441,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
-// them in a MMX register. This is better than doing a stack convert.
-static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
-
- assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
- ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
- int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
- SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- InVec = Op.getOperand(1);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
- InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
- } else {
- InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
- SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- Mask[0] = 0; Mask[1] = 2;
- VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
- }
- return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
-}
-
// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT ResVT = Op.getValueType();
- assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+ assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = ResVT.getVectorNumElements();
- SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1,
- DAG.getConstant(0, MVT::i32), DAG, dl);
- return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
- DAG, dl);
+ return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
SDValue
X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
- EVT ResVT = Op.getValueType();
-
assert(Op.getNumOperands() == 2);
- assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
- "Unsupported CONCAT_VECTORS for value type");
-
- // We support concatenate two MMX registers and place them in a MMX register.
- // This is better than doing a stack convert.
- if (ResVT.is128BitVector())
- return LowerMMXCONCAT_VECTORS(Op, DAG);
// 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
@@ -5407,75 +5466,64 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
}
// Try to lower a shuffle node into a simple blend instruction.
-static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
+static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = Op.getValueType();
- EVT InVT = V1.getValueType();
- int MaskSize = VT.getVectorNumElements();
- int InSize = InVT.getVectorNumElements();
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ unsigned NumElems = VT.getVectorNumElements();
if (!Subtarget->hasSSE41())
return SDValue();
- if (MaskSize != InSize)
- return SDValue();
-
- int ISDNo = 0;
+ unsigned ISDNo = 0;
MVT OpTy;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v8i16:
- ISDNo = X86ISD::BLENDPW;
- OpTy = MVT::v8i16;
- break;
+ ISDNo = X86ISD::BLENDPW;
+ OpTy = MVT::v8i16;
+ break;
case MVT::v4i32:
case MVT::v4f32:
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v4f32;
- break;
+ ISDNo = X86ISD::BLENDPS;
+ OpTy = MVT::v4f32;
+ break;
case MVT::v2i64:
case MVT::v2f64:
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v2f64;
- break;
+ ISDNo = X86ISD::BLENDPD;
+ OpTy = MVT::v2f64;
+ break;
case MVT::v8i32:
case MVT::v8f32:
- if (!Subtarget->hasAVX())
- return SDValue();
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v8f32;
- break;
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPS;
+ OpTy = MVT::v8f32;
+ break;
case MVT::v4i64:
case MVT::v4f64:
- if (!Subtarget->hasAVX())
- return SDValue();
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v4f64;
- break;
- case MVT::v16i16:
- if (!Subtarget->hasAVX2())
- return SDValue();
- ISDNo = X86ISD::BLENDPW;
- OpTy = MVT::v16i16;
- break;
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPD;
+ OpTy = MVT::v4f64;
+ break;
}
assert(ISDNo && "Invalid Op Number");
unsigned MaskVals = 0;
- for (int i = 0; i < MaskSize; ++i) {
+ for (unsigned i = 0; i != NumElems; ++i) {
int EltIdx = SVOp->getMaskElt(i);
- if (EltIdx == i || EltIdx == -1)
+ if (EltIdx == (int)i || EltIdx < 0)
MaskVals |= (1<<i);
- else if (EltIdx == (i + MaskSize))
+ else if (EltIdx == (int)(i + NumElems))
continue; // Bit is set to zero;
- else return SDValue();
+ else
+ return SDValue();
}
V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
@@ -5629,13 +5677,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
bool TwoInputs = V1Used && V2Used;
for (unsigned i = 0; i != 8; ++i) {
int EltIdx = MaskVals[i] * 2;
- if (TwoInputs && (EltIdx >= 16)) {
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- continue;
- }
- pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+ int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
+ int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
@@ -5649,13 +5694,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.clear();
for (unsigned i = 0; i != 8; ++i) {
int EltIdx = MaskVals[i] * 2;
- if (EltIdx < 16) {
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- continue;
- }
- pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+ int Idx0 = (EltIdx < 16) ? 0x80 : EltIdx - 16;
+ int Idx1 = (EltIdx < 16) ? 0x80 : EltIdx - 15;
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
@@ -5731,10 +5773,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
int EltIdx = MaskVals[i];
if (EltIdx < 0)
continue;
- SDValue ExtOp = (EltIdx < 8)
- ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
- DAG.getIntPtrConstant(EltIdx))
- : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+ SDValue ExtOp = (EltIdx < 8) ?
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+ DAG.getIntPtrConstant(EltIdx)) :
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
DAG.getIntPtrConstant(EltIdx - 8));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
DAG.getIntPtrConstant(i));
@@ -5755,21 +5797,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
- // FIXME: kill V2Only once shuffles are canonizalized by getNode.
- bool V1Only = true;
- bool V2Only = true;
- for (unsigned i = 0; i < 16; ++i) {
- int EltIdx = MaskVals[i];
- if (EltIdx < 0)
- continue;
- if (EltIdx < 16)
- V2Only = false;
- else
- V1Only = false;
- }
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
if (TLI.getSubtarget()->hasSSSE3()) {
@@ -5781,23 +5813,16 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// Otherwise, we have elements from both input vectors, and must zero out
// elements that come from V2 in the first mask, and V1 in the second mask
// so that we can OR them together.
- bool TwoInputs = !(V1Only || V2Only);
for (unsigned i = 0; i != 16; ++i) {
int EltIdx = MaskVals[i];
- if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- continue;
- }
+ if (EltIdx < 0 || EltIdx >= 16)
+ EltIdx = 0x80;
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
- // If all the elements are from V2, assign it to V1 and return after
- // building the first pshufb.
- if (V2Only)
- V1 = V2;
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
- if (!TwoInputs)
+ if (V2IsUndef)
return V1;
// Calculate the shuffle mask for the second input, shuffle it, and
@@ -5805,11 +5830,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
pshufbMask.clear();
for (unsigned i = 0; i != 16; ++i) {
int EltIdx = MaskVals[i];
- if (EltIdx < 16) {
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- continue;
- }
- pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
@@ -5822,7 +5844,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// the 16 different words that comprise the two doublequadword input vectors.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
- SDValue NewV = V2Only ? V2 : V1;
+ SDValue NewV = V1;
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
int Elt1 = MaskVals[i*2+1];
@@ -5832,9 +5854,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
continue;
// This word of the result is already in the correct place, skip it.
- if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
- continue;
- if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+ if ((Elt0 == i*2) && (Elt1 == i*2+1))
continue;
SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
@@ -5896,41 +5916,37 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG, DebugLoc dl) {
- EVT VT = SVOp->getValueType(0);
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
- unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT NewVT;
- switch (VT.getSimpleVT().SimpleTy) {
+ MVT NewVT;
+ unsigned Scale;
+ switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected!");
- case MVT::v4f32: NewVT = MVT::v2f64; break;
- case MVT::v4i32: NewVT = MVT::v2i64; break;
- case MVT::v8i16: NewVT = MVT::v4i32; break;
- case MVT::v16i8: NewVT = MVT::v4i32; break;
+ case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
+ case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
+ case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
}
- int Scale = NumElems / NewWidth;
SmallVector<int, 8> MaskVec;
- for (unsigned i = 0; i < NumElems; i += Scale) {
+ for (unsigned i = 0; i != NumElems; i += Scale) {
int StartIdx = -1;
- for (int j = 0; j < Scale; ++j) {
+ for (unsigned j = 0; j != Scale; ++j) {
int EltIdx = SVOp->getMaskElt(i+j);
if (EltIdx < 0)
continue;
- if (StartIdx == -1)
- StartIdx = EltIdx - (EltIdx % Scale);
- if (EltIdx != StartIdx + j)
+ if (StartIdx < 0)
+ StartIdx = (EltIdx / Scale);
+ if (EltIdx != (int)(StartIdx*Scale + j))
return SDValue();
}
- if (StartIdx == -1)
- MaskVec.push_back(-1);
- else
- MaskVec.push_back(StartIdx / Scale);
+ MaskVec.push_back(StartIdx);
}
- V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0));
+ SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1));
return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
}
@@ -5973,6 +5989,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
/// which could not be matched by any known target speficic shuffle
static SDValue
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+
+ SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
@@ -5981,14 +6002,15 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
DebugLoc dl = SVOp->getDebugLoc();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
- SDValue Shufs[2];
+ SDValue Output[2];
SmallVector<int, 16> Mask;
for (unsigned l = 0; l < 2; ++l) {
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands (recorded in InputUsed).
// If building a suitable shuffle vector proves too hard, then bail
- // out with useBuildVector set.
+ // out with UseBuildVector set.
+ bool UseBuildVector = false;
int InputUsed[2] = { -1, -1 }; // Not yet discovered.
unsigned LaneStart = l * NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
@@ -6020,38 +6042,61 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
if (OpNo >= array_lengthof(InputUsed)) {
- // More than two input vectors used! Give up.
- return SDValue();
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ UseBuildVector = true;
+ break;
}
// Add the mask index for the new shuffle vector.
Mask.push_back(Idx + OpNo * NumLaneElems);
}
- if (InputUsed[0] < 0) {
+ if (UseBuildVector) {
+ SmallVector<SDValue, 16> SVOps;
+ for (unsigned i = 0; i != NumLaneElems; ++i) {
+ // The mask element. This indexes into the input.
+ int Idx = SVOp->getMaskElt(i+LaneStart);
+ if (Idx < 0) {
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // The input vector this mask element indexes into.
+ int Input = Idx / NumElems;
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NumElems;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ SVOp->getOperand(Input),
+ DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the output using a BUILD_VECTOR.
+ Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0],
+ SVOps.size());
+ } else if (InputUsed[0] < 0) {
// No input vectors were used! The result is undefined.
- Shufs[l] = DAG.getUNDEF(NVT);
+ Output[l] = DAG.getUNDEF(NVT);
} else {
SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
- DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32),
- DAG, dl);
+ (InputUsed[0] % 2) * NumLaneElems,
+ DAG, dl);
// If only one input was used, use an undefined vector for the other.
SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
- DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32),
- DAG, dl);
+ (InputUsed[1] % 2) * NumLaneElems, DAG, dl);
// At least one input vector was used. Create a new shuffle vector.
- Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
+ Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
}
Mask.clear();
}
// Concatenate the result back
- SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0],
- DAG.getConstant(0, MVT::i32), DAG, dl);
- return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32),
- DAG, dl);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]);
}
/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
@@ -6063,7 +6108,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
DebugLoc dl = SVOp->getDebugLoc();
EVT VT = SVOp->getValueType(0);
- assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+ assert(VT.is128BitVector() && "Unsupported vector size");
std::pair<int, int> Locs[4];
int Mask1[] = { -1, -1, -1, -1 };
@@ -6107,7 +6152,9 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
- } else if (NumLo == 3 || NumHi == 3) {
+ }
+
+ if (NumLo == 3 || NumHi == 3) {
// Otherwise, we must have three elements from one vector, call it X, and
// one element from the other, call it Y. First, use a shufps to build an
// intermediate vector with the one element from Y and the element from X
@@ -6143,17 +6190,17 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
Mask1[2] = HiIndex & 1 ? 6 : 4;
Mask1[3] = HiIndex & 1 ? 4 : 6;
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
- } else {
- Mask1[0] = HiIndex & 1 ? 2 : 0;
- Mask1[1] = HiIndex & 1 ? 0 : 2;
- Mask1[2] = PermMask[2];
- Mask1[3] = PermMask[3];
- if (Mask1[2] >= 0)
- Mask1[2] += 4;
- if (Mask1[3] >= 0)
- Mask1[3] += 4;
- return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
}
+
+ Mask1[0] = HiIndex & 1 ? 2 : 0;
+ Mask1[1] = HiIndex & 1 ? 0 : 2;
+ Mask1[2] = PermMask[2];
+ Mask1[3] = PermMask[3];
+ if (Mask1[2] >= 0)
+ Mask1[2] += 4;
+ if (Mask1[3] >= 0)
+ Mask1[3] += 4;
+ return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
@@ -6302,7 +6349,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
if (NumElems == 4)
- // If we don't care about the second element, procede to use movss.
+ // If we don't care about the second element, proceed to use movss.
if (SVOp->getMaskElt(1) != -1)
return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
}
@@ -6360,7 +6407,8 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
- if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
+ VT == MVT::v16i16 || VT == MVT::v32i8) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
@@ -6564,11 +6612,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// new vector_shuffle with the corrected mask.p
SmallVector<int, 8> NewMask(M.begin(), M.end());
NormalizeMask(NewMask, NumElems);
- if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) {
+ if (isUNPCKLMask(NewMask, VT, HasAVX2, true))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- } else if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) {
+ if (isUNPCKHMask(NewMask, VT, HasAVX2, true))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
- }
}
if (Commuted) {
@@ -6605,12 +6652,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
- if (isPSHUFHWMask(M, VT))
+ if (isPSHUFHWMask(M, VT, HasAVX2))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
getShufflePSHUFHWImmediate(SVOp),
DAG);
- if (isPSHUFLWMask(M, VT))
+ if (isPSHUFLWMask(M, VT, HasAVX2))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
getShufflePSHUFLWImmediate(SVOp),
DAG);
@@ -6647,7 +6694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG);
+ SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
if (BlendOp.getNode())
return BlendOp;
@@ -6689,7 +6736,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
- if (NumElems == 4 && VT.getSizeInBits() == 128)
+ if (NumElems == 4 && VT.is128BitVector())
return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
// Handle general 256-bit shuffles
@@ -6705,7 +6752,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ if (!Op.getOperand(0).getValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -6714,7 +6761,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
- } else if (VT.getSizeInBits() == 16) {
+ }
+
+ if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
if (Idx == 0)
@@ -6729,7 +6778,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
- } else if (VT == MVT::f32) {
+ }
+
+ if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
// result has a single use which is a store or a bitcast to i32. And in
@@ -6749,7 +6800,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
- } else if (VT == MVT::i32 || VT == MVT::i64) {
+ }
+
+ if (VT == MVT::i32 || VT == MVT::i64) {
// ExtractPS/pextrq works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
return Op;
@@ -6769,22 +6822,22 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
- if (VecVT.getSizeInBits() == 256) {
+ if (VecVT.is256BitVector()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
unsigned NumElems = VecVT.getVectorNumElements();
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
- bool Upper = IdxVal >= NumElems/2;
- Vec = Extract128BitVector(Vec,
- DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl);
+ Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
+ if (IdxVal >= NumElems/2)
+ IdxVal -= NumElems/2;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
- Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx);
+ DAG.getConstant(IdxVal, MVT::i32));
}
- assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+ assert(VecVT.is128BitVector() && "Unexpected vector length");
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
@@ -6811,7 +6864,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
- } else if (VT.getSizeInBits() == 32) {
+ }
+
+ if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
@@ -6823,7 +6878,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
- } else if (VT.getSizeInBits() == 64) {
+ }
+
+ if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
@@ -6856,7 +6913,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return SDValue();
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
@@ -6876,7 +6933,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
- } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ }
+
+ if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
@@ -6889,8 +6948,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
- } else if ((EltVT == MVT::i32 || EltVT == MVT::i64) &&
- isa<ConstantSDNode>(N2)) {
+ }
+
+ if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
// PINSR* works with constant index.
return Op;
}
@@ -6909,23 +6969,22 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
// If this is a 256-bit vector result, first extract the 128-bit vector,
// insert the element into the extracted half and then place it back.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
// Get the desired 128-bit vector half.
unsigned NumElems = VT.getVectorNumElements();
unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
- bool Upper = IdxVal >= NumElems/2;
- SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32);
- SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl);
+ SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired half.
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V,
- N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2);
+ bool Upper = IdxVal >= NumElems/2;
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
+ DAG.getConstant(Upper ? IdxVal-NumElems/2 : IdxVal, MVT::i32));
// Insert the changed part back to the 256-bit vector
- return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
+ return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
if (Subtarget->hasSSE41())
@@ -6954,7 +7013,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
- if (OpVT.getSizeInBits() > 128) {
+ if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
EVT VT128 = EVT::getVectorVT(*Context,
OpVT.getVectorElementType(),
@@ -6963,19 +7022,16 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
// Insert the 128-bit vector.
- return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
- DAG.getConstant(0, MVT::i32),
- DAG, dl);
+ return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
- if (Op.getValueType() == MVT::v1i64 &&
+ if (OpVT == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
- "Expected an SSE type!");
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
@@ -6989,9 +7045,11 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue Vec = Op.getNode()->getOperand(0);
SDValue Idx = Op.getNode()->getOperand(1);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 128
- && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
- return Extract128BitVector(Vec, Idx, DAG, dl);
+ if (Op.getNode()->getValueType(0).is128BitVector() &&
+ Vec.getNode()->getValueType(0).is256BitVector() &&
+ isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ return Extract128BitVector(Vec, IdxVal, DAG, dl);
}
}
return SDValue();
@@ -7008,9 +7066,11 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 256
- && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
- return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+ if (Op.getNode()->getValueType(0).is256BitVector() &&
+ SubVec.getNode()->getValueType(0).is128BitVector() &&
+ isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
}
}
return SDValue();
@@ -7219,7 +7279,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
- unsigned char OperandFlags) {
+ unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
DebugLoc dl = GA->getDebugLoc();
@@ -7227,12 +7287,16 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
+
+ X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
+ : X86ISD::TLSADDR;
+
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
- Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
} else {
SDValue Ops[] = { Chain, TGA };
- Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7264,11 +7328,49 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
X86::RAX, X86II::MO_TLSGD);
}
-// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
-// "local exec" model.
+static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+
+ // Get the start address of the TLS block for this module.
+ X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
+ .getInfo<X86MachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+ SDValue Base;
+ if (is64Bit) {
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
+ X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ } else {
+ SDValue InFlag;
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+ Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
+ X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ }
+
+ // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
+ // of Base.
+
+ // Build x@dtpoff.
+ unsigned char OperandFlags = X86II::MO_DTPOFF;
+ unsigned WrapperKind = X86ISD::Wrapper;
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
+ SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+ // Add x@dtpoff with the base.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
- bool is64Bit) {
+ bool is64Bit, bool isPIC) {
DebugLoc dl = GA->getDebugLoc();
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
@@ -7286,25 +7388,36 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
unsigned WrapperKind = X86ISD::Wrapper;
if (model == TLSModel::LocalExec) {
OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
- } else if (is64Bit) {
- assert(model == TLSModel::InitialExec);
- OperandFlags = X86II::MO_GOTTPOFF;
- WrapperKind = X86ISD::WrapperRIP;
+ } else if (model == TLSModel::InitialExec) {
+ if (is64Bit) {
+ OperandFlags = X86II::MO_GOTTPOFF;
+ WrapperKind = X86ISD::WrapperRIP;
+ } else {
+ OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
+ }
} else {
- assert(model == TLSModel::InitialExec);
- OperandFlags = X86II::MO_INDNTPOFF;
+ llvm_unreachable("Unexpected model");
}
- // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
- // exec)
+ // emit "addl x@ntpoff,%eax" (local exec)
+ // or "addl x@indntpoff,%eax" (initial exec)
+ // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
- if (model == TLSModel::InitialExec)
+ if (model == TLSModel::InitialExec) {
+ if (isPIC && !is64Bit) {
+ Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT),
+ Offset);
+ }
+
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, false,
+ 0);
+ }
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
@@ -7318,29 +7431,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
- // TODO: implement the "local dynamic" model
- // TODO: implement the "initial exec"model for pic executables
-
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->resolveAliasedGlobal(false);
-
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
- case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+ case TLSModel::LocalDynamic:
+ return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
+ Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
- Subtarget->is64Bit());
+ Subtarget->is64Bit(),
+ getTargetMachine().getRelocationModel() == Reloc::PIC_);
}
- } else if (Subtarget->isTargetDarwin()) {
+ llvm_unreachable("Unknown TLS model.");
+ }
+
+ if (Subtarget->isTargetDarwin()) {
// Darwin only has one model of TLS. Lower to that.
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
@@ -7383,7 +7493,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
Chain.getValue(1));
- } else if (Subtarget->isTargetWindows()) {
+ }
+
+ if (Subtarget->isTargetWindows()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7429,7 +7541,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
false, false, false, 0);
SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
- getPointerTy());
+ getPointerTy());
IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
@@ -7600,9 +7712,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
#ifdef __SSE3__
- haddpd %xmm0, %xmm0
+ haddpd %xmm0, %xmm0
#else
- pshufd $0x4e, %xmm0, %xmm1
+ pshufd $0x4e, %xmm0, %xmm1
addpd %xmm1, %xmm0
#endif
*/
@@ -7693,12 +7805,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
// Handle final rounding.
EVT DestVT = Op.getValueType();
- if (DestVT.bitsLT(MVT::f64)) {
+ if (DestVT.bitsLT(MVT::f64))
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
DAG.getIntPtrConstant(0));
- } else if (DestVT.bitsGT(MVT::f64)) {
+ if (DestVT.bitsGT(MVT::f64))
return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
- }
// Handle final rounding.
return Sub;
@@ -7719,10 +7830,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
EVT DstVT = Op.getValueType();
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
- else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+ if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
- else if (Subtarget->is64Bit() &&
- SrcVT == MVT::i64 && DstVT == MVT::f32)
+ if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
@@ -7899,9 +8009,9 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);
- else
- // The node is the result.
- return FIST;
+
+ // The node is the result.
+ return FIST;
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
@@ -7916,9 +8026,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);
- else
- // The node is the result.
- return FIST;
+
+ // The node is the result.
+ return FIST;
}
SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -7931,7 +8041,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
EltVT = VT.getVectorElementType();
Constant *C;
if (EltVT == MVT::f64) {
- C = ConstantVector::getSplat(2,
+ C = ConstantVector::getSplat(2,
ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
} else {
C = ConstantVector::getSplat(4,
@@ -7965,15 +8075,15 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
if (VT.isVector()) {
- MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
- DAG.getNode(ISD::BITCAST, dl, XORVT,
- Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
- } else {
- return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+ DAG.getNode(ISD::BITCAST, dl, XORVT,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
}
+
+ return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
@@ -8172,7 +8282,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
// Otherwise use a regular EFLAGS-setting instruction.
switch (Op.getNode()->getOpcode()) {
default: llvm_unreachable("unexpected operator!");
- case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::SUB:
+ Opcode = X86ISD::SUB;
+ break;
case ISD::OR: Opcode = X86ISD::OR; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@@ -8198,6 +8310,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
+ if (Opcode == X86ISD::CMP) {
+ SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
+ Op.getOperand(1));
+ // We can't replace usage of SUB with CMP.
+ // The SUB node will be removed later because there is no use of it.
+ return SDValue(New.getNode(), 0);
+ }
+
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
@@ -8217,9 +8337,41 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return EmitTest(Op0, X86CC, DAG);
DebugLoc dl = Op0.getDebugLoc();
+ if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
+ Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+ // Use SUB instead of CMP to enable CSE between SUB and CMP.
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+ SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
+ Op0, Op1);
+ return SDValue(Sub.getNode(), 1);
+ }
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
+/// Convert a comparison if required by the subtarget.
+SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
+ SelectionDAG &DAG) const {
+ // If the subtarget does not support the FUCOMI instruction, floating-point
+ // comparisons have to be converted.
+ if (Subtarget->hasCMov() ||
+ Cmp.getOpcode() != X86ISD::CMP ||
+ !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
+ !Cmp.getOperand(1).getValueType().isFloatingPoint())
+ return Cmp;
+
+ // The instruction selector will select an FUCOM instruction instead of
+ // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
+ // build an SDNode sequence that transfers the result from FPSW into EFLAGS:
+ // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
+ DebugLoc dl = Cmp.getDebugLoc();
+ SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
+ SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
+ SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
+ DAG.getConstant(8, MVT::i8));
+ SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+ return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
+}
+
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
@@ -8341,6 +8493,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
@@ -8350,24 +8503,22 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
DebugLoc dl = Op.getDebugLoc();
SDValue CC = Op.getOperand(2);
- SDValue Idx0 = DAG.getConstant(0, MVT::i32);
- SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
- SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
- SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
- SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
MVT EltVT = VT.getVectorElementType().getSimpleVT();
@@ -8389,10 +8540,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
- unsigned SSECC = 8;
+#ifndef NDEBUG
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
+ unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
@@ -8405,7 +8558,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
@@ -8419,33 +8572,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
- SDValue UNORD, EQ;
- UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(0, MVT::i8));
- return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
- } else if (SetCCOpcode == ISD::SETONE) {
- SDValue ORD, NEQ;
- ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(4, MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+ } else {
+ assert(SetCCOpcode == ISD::SETONE);
+ CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- llvm_unreachable("Illegal FP comparison");
+
+ SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC0, MVT::i8));
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC1, MVT::i8));
+ return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
@@ -8453,17 +8606,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
}
// Break 256-bit integer vector compare into smaller ones.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0;
+ unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false;
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
@@ -8480,10 +8633,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
- return SDValue();
- if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
- return SDValue();
+ if (VT == MVT::v2i64) {
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
+ return SDValue();
+ }
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
@@ -8510,7 +8665,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
- if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
+ Opc == X86ISD::SAHF)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
@@ -8542,6 +8698,16 @@ static bool isAllOnes(SDValue V) {
return C && C->isAllOnesValue();
}
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+ if (V.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue VOp0 = V.getOperand(0);
+ unsigned InBits = VOp0.getValueSizeInBits();
+ unsigned Bits = V.getValueSizeInBits();
+ return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
@@ -8572,8 +8738,25 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
SDValue CmpOp0 = Cmp.getOperand(0);
+ // Apply further optimizations for special cases
+ // (select (x != 0), -1, 0) -> neg & sbb
+ // (select (x == 0), 0, -1) -> neg & sbb
+ if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
+ if (YC->isNullValue() &&
+ (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
+ SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
+ DAG.getConstant(0, CmpOp0.getValueType()),
+ CmpOp0);
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ return Res;
+ }
+
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
SDValue Res = // Res = 0 or -1.
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
@@ -8654,9 +8837,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate.
- if (Cond.getOpcode() == ISD::TRUNCATE)
- Cond = Cond.getOperand(0);
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
@@ -8679,7 +8862,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// a < b ? 0 : -1 -> RES = setcc_carry
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
- if (Cond.getOpcode() == X86ISD::CMP) {
+ if (Cond.getOpcode() == X86ISD::SUB) {
+ Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
@@ -8918,6 +9102,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
@@ -8947,6 +9132,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
@@ -8960,9 +9146,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate.
- if (Cond.getOpcode() == ISD::TRUNCATE)
- Cond = Cond.getOperand(0);
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
@@ -8980,6 +9166,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
+ Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
@@ -9018,7 +9205,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
const Function *F = MF.getFunction();
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; I++)
+ I != E; ++I)
if (I->hasNestAttr())
report_fatal_error("Cannot use segmented stacks with functions that "
"have nested arguments.");
@@ -9201,12 +9388,15 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
if (isa<ConstantSDNode>(ShAmt)) {
+ // Constant may be a TargetConstant. Use a regular constant.
+ uint32_t ShiftAmt = cast<ConstantSDNode>(ShAmt)->getZExtValue();
switch (Opc) {
default: llvm_unreachable("Unknown target vector shift node");
case X86ISD::VSHLI:
case X86ISD::VSRLI:
case X86ISD::VSRAI:
- return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+ return DAG.getNode(Opc, dl, VT, SrcOp,
+ DAG.getConstant(ShiftAmt, MVT::i32));
}
}
@@ -9223,10 +9413,15 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue ShOps[4];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
- ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+
+ // The return type has to be a 128-bit type with the same element
+ // type as the input type.
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
@@ -9261,8 +9456,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc = 0;
- ISD::CondCode CC = ISD::SETCC_INVALID;
+ unsigned Opc;
+ ISD::CondCode CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
@@ -9336,245 +9531,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // XOP comparison intrinsics
- case Intrinsic::x86_xop_vpcomltb:
- case Intrinsic::x86_xop_vpcomltw:
- case Intrinsic::x86_xop_vpcomltd:
- case Intrinsic::x86_xop_vpcomltq:
- case Intrinsic::x86_xop_vpcomltub:
- case Intrinsic::x86_xop_vpcomltuw:
- case Intrinsic::x86_xop_vpcomltud:
- case Intrinsic::x86_xop_vpcomltuq:
- case Intrinsic::x86_xop_vpcomleb:
- case Intrinsic::x86_xop_vpcomlew:
- case Intrinsic::x86_xop_vpcomled:
- case Intrinsic::x86_xop_vpcomleq:
- case Intrinsic::x86_xop_vpcomleub:
- case Intrinsic::x86_xop_vpcomleuw:
- case Intrinsic::x86_xop_vpcomleud:
- case Intrinsic::x86_xop_vpcomleuq:
- case Intrinsic::x86_xop_vpcomgtb:
- case Intrinsic::x86_xop_vpcomgtw:
- case Intrinsic::x86_xop_vpcomgtd:
- case Intrinsic::x86_xop_vpcomgtq:
- case Intrinsic::x86_xop_vpcomgtub:
- case Intrinsic::x86_xop_vpcomgtuw:
- case Intrinsic::x86_xop_vpcomgtud:
- case Intrinsic::x86_xop_vpcomgtuq:
- case Intrinsic::x86_xop_vpcomgeb:
- case Intrinsic::x86_xop_vpcomgew:
- case Intrinsic::x86_xop_vpcomged:
- case Intrinsic::x86_xop_vpcomgeq:
- case Intrinsic::x86_xop_vpcomgeub:
- case Intrinsic::x86_xop_vpcomgeuw:
- case Intrinsic::x86_xop_vpcomgeud:
- case Intrinsic::x86_xop_vpcomgeuq:
- case Intrinsic::x86_xop_vpcomeqb:
- case Intrinsic::x86_xop_vpcomeqw:
- case Intrinsic::x86_xop_vpcomeqd:
- case Intrinsic::x86_xop_vpcomeqq:
- case Intrinsic::x86_xop_vpcomequb:
- case Intrinsic::x86_xop_vpcomequw:
- case Intrinsic::x86_xop_vpcomequd:
- case Intrinsic::x86_xop_vpcomequq:
- case Intrinsic::x86_xop_vpcomneb:
- case Intrinsic::x86_xop_vpcomnew:
- case Intrinsic::x86_xop_vpcomned:
- case Intrinsic::x86_xop_vpcomneq:
- case Intrinsic::x86_xop_vpcomneub:
- case Intrinsic::x86_xop_vpcomneuw:
- case Intrinsic::x86_xop_vpcomneud:
- case Intrinsic::x86_xop_vpcomneuq:
- case Intrinsic::x86_xop_vpcomfalseb:
- case Intrinsic::x86_xop_vpcomfalsew:
- case Intrinsic::x86_xop_vpcomfalsed:
- case Intrinsic::x86_xop_vpcomfalseq:
- case Intrinsic::x86_xop_vpcomfalseub:
- case Intrinsic::x86_xop_vpcomfalseuw:
- case Intrinsic::x86_xop_vpcomfalseud:
- case Intrinsic::x86_xop_vpcomfalseuq:
- case Intrinsic::x86_xop_vpcomtrueb:
- case Intrinsic::x86_xop_vpcomtruew:
- case Intrinsic::x86_xop_vpcomtrued:
- case Intrinsic::x86_xop_vpcomtrueq:
- case Intrinsic::x86_xop_vpcomtrueub:
- case Intrinsic::x86_xop_vpcomtrueuw:
- case Intrinsic::x86_xop_vpcomtrueud:
- case Intrinsic::x86_xop_vpcomtrueuq: {
- unsigned CC = 0;
- unsigned Opc = 0;
-
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_xop_vpcomltb:
- case Intrinsic::x86_xop_vpcomltw:
- case Intrinsic::x86_xop_vpcomltd:
- case Intrinsic::x86_xop_vpcomltq:
- CC = 0;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomltub:
- case Intrinsic::x86_xop_vpcomltuw:
- case Intrinsic::x86_xop_vpcomltud:
- case Intrinsic::x86_xop_vpcomltuq:
- CC = 0;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomleb:
- case Intrinsic::x86_xop_vpcomlew:
- case Intrinsic::x86_xop_vpcomled:
- case Intrinsic::x86_xop_vpcomleq:
- CC = 1;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomleub:
- case Intrinsic::x86_xop_vpcomleuw:
- case Intrinsic::x86_xop_vpcomleud:
- case Intrinsic::x86_xop_vpcomleuq:
- CC = 1;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomgtb:
- case Intrinsic::x86_xop_vpcomgtw:
- case Intrinsic::x86_xop_vpcomgtd:
- case Intrinsic::x86_xop_vpcomgtq:
- CC = 2;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomgtub:
- case Intrinsic::x86_xop_vpcomgtuw:
- case Intrinsic::x86_xop_vpcomgtud:
- case Intrinsic::x86_xop_vpcomgtuq:
- CC = 2;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomgeb:
- case Intrinsic::x86_xop_vpcomgew:
- case Intrinsic::x86_xop_vpcomged:
- case Intrinsic::x86_xop_vpcomgeq:
- CC = 3;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomgeub:
- case Intrinsic::x86_xop_vpcomgeuw:
- case Intrinsic::x86_xop_vpcomgeud:
- case Intrinsic::x86_xop_vpcomgeuq:
- CC = 3;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomeqb:
- case Intrinsic::x86_xop_vpcomeqw:
- case Intrinsic::x86_xop_vpcomeqd:
- case Intrinsic::x86_xop_vpcomeqq:
- CC = 4;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomequb:
- case Intrinsic::x86_xop_vpcomequw:
- case Intrinsic::x86_xop_vpcomequd:
- case Intrinsic::x86_xop_vpcomequq:
- CC = 4;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomneb:
- case Intrinsic::x86_xop_vpcomnew:
- case Intrinsic::x86_xop_vpcomned:
- case Intrinsic::x86_xop_vpcomneq:
- CC = 5;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomneub:
- case Intrinsic::x86_xop_vpcomneuw:
- case Intrinsic::x86_xop_vpcomneud:
- case Intrinsic::x86_xop_vpcomneuq:
- CC = 5;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomfalseb:
- case Intrinsic::x86_xop_vpcomfalsew:
- case Intrinsic::x86_xop_vpcomfalsed:
- case Intrinsic::x86_xop_vpcomfalseq:
- CC = 6;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomfalseub:
- case Intrinsic::x86_xop_vpcomfalseuw:
- case Intrinsic::x86_xop_vpcomfalseud:
- case Intrinsic::x86_xop_vpcomfalseuq:
- CC = 6;
- Opc = X86ISD::VPCOMU;
- break;
- case Intrinsic::x86_xop_vpcomtrueb:
- case Intrinsic::x86_xop_vpcomtruew:
- case Intrinsic::x86_xop_vpcomtrued:
- case Intrinsic::x86_xop_vpcomtrueq:
- CC = 7;
- Opc = X86ISD::VPCOM;
- break;
- case Intrinsic::x86_xop_vpcomtrueub:
- case Intrinsic::x86_xop_vpcomtrueuw:
- case Intrinsic::x86_xop_vpcomtrueud:
- case Intrinsic::x86_xop_vpcomtrueuq:
- CC = 7;
- Opc = X86ISD::VPCOMU;
- break;
- }
-
- SDValue LHS = Op.getOperand(1);
- SDValue RHS = Op.getOperand(2);
- return DAG.getNode(Opc, dl, Op.getValueType(), LHS, RHS,
- DAG.getConstant(CC, MVT::i8));
- }
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ // SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
- return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
- return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
- return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_phsub_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ Opcode = X86ISD::FHADD;
+ break;
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ Opcode = X86ISD::FHSUB;
+ break;
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ Opcode = X86ISD::HADD;
+ break;
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ Opcode = X86ISD::HSUB;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
- return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
- return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ Opcode = ISD::SHL;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ Opcode = ISD::SRL;
+ break;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ Opcode = ISD::SRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
@@ -9583,15 +9635,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_sse41_insertps:
return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_pd_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
@@ -9621,7 +9676,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
- unsigned X86CC = 0;
+ unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
@@ -9672,44 +9727,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
- return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
- return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psra_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ Opcode = X86ISD::VSHL;
+ break;
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ Opcode = X86ISD::VSRL;
+ break;
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ Opcode = X86ISD::VSRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/AVX immediate shift intrinsics
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
- return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psrai_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ Opcode = X86ISD::VSHLI;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ Opcode = X86ISD::VSRLI;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ Opcode = X86ISD::VSRAI;
+ break;
+ }
+ return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
+ }
+
// Fix vector shift instructions where the last operand is a non-immediate
// i32 value.
case Intrinsic::x86_mmx_pslli_w:
@@ -9724,8 +9828,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
if (isa<ConstantSDNode>(ShAmt))
return SDValue();
- unsigned NewIntNo = 0;
+ unsigned NewIntNo;
switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_mmx_pslli_w:
NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
@@ -9750,7 +9855,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_mmx_psrai_d:
NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
@@ -9766,6 +9870,116 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
}
+ case Intrinsic::x86_sse42_pcmpistria128:
+ case Intrinsic::x86_sse42_pcmpestria128:
+ case Intrinsic::x86_sse42_pcmpistric128:
+ case Intrinsic::x86_sse42_pcmpestric128:
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ case Intrinsic::x86_sse42_pcmpistris128:
+ case Intrinsic::x86_sse42_pcmpestris128:
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ case Intrinsic::x86_sse42_pcmpestriz128: {
+ unsigned Opcode;
+ unsigned X86CC;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse42_pcmpistria128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpestria128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpistric128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpestric128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpistris128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpestris128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_sse42_pcmpestriz128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_E;
+ break;
+ }
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8),
+ SDValue(PCMP.getNode(), 1));
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ case Intrinsic::x86_sse42_pcmpistri128:
+ case Intrinsic::x86_sse42_pcmpestri128: {
+ unsigned Opcode;
+ if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
+ Opcode = X86ISD::PCMPISTRI;
+ else
+ Opcode = X86ISD::PCMPESTRI;
+
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ }
+ }
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+
+ // RDRAND intrinsics.
+ case Intrinsic::x86_rdrand_16:
+ case Intrinsic::x86_rdrand_32:
+ case Intrinsic::x86_rdrand_64: {
+ // Emit the node with the right value type.
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
+ SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0));
+
+ // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise
+ // return the value from Rand, which is always 0, casted to i32.
+ SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
+ DAG.getConstant(1, Op->getValueType(1)),
+ DAG.getConstant(X86::COND_B, MVT::i32),
+ SDValue(Result.getNode(), 1) };
+ SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
+ DAG.getVTList(Op->getValueType(1), MVT::Glue),
+ Ops, 4);
+
+ // Return { result, isValid, chain }.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
+ SDValue(Result.getNode(), 2));
+ }
}
}
@@ -9816,7 +10030,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
@@ -9833,7 +10046,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
- MF.getRegInfo().addLiveOut(StoreAddrReg);
return DAG.getNode(X86ISD::EH_RETURN, dl,
MVT::Other,
@@ -10149,23 +10361,21 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
DebugLoc dl = Op.getDebugLoc();
- SDValue Idx0 = DAG.getConstant(0, MVT::i32);
- SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
- SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
// Extract the RHS vectors
SDValue RHS = Op.getOperand(1);
- SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
- SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
@@ -10176,14 +10386,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
@@ -10193,7 +10403,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntArith(Op, DAG);
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
@@ -10310,6 +10520,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
+ llvm_unreachable("Unknown shift opcode.");
}
if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
@@ -10353,6 +10564,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
+ llvm_unreachable("Unknown shift opcode.");
}
}
}
@@ -10421,15 +10633,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Extract the two vectors
- SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
- SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
- DAG, dl);
+ SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
+ SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl);
// Recreate the shift amount vectors
SDValue Amt1, Amt2;
@@ -10448,9 +10659,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
&Amt2Csts[0], NumElems/2);
} else {
// Variable shift amount
- Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
- Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
- DAG, dl);
+ Amt1 = Extract128BitVector(Amt, 0, DAG, dl);
+ Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl);
}
// Issue new vector shifts for the smaller types
@@ -10560,20 +10770,18 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
return SDValue();
if (!Subtarget->hasAVX2()) {
// needs to be split
- int NumElems = VT.getVectorNumElements();
- SDValue Idx0 = DAG.getConstant(0, MVT::i32);
- SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+ unsigned NumElems = VT.getVectorNumElements();
// Extract the LHS vectors
SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
- SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
EVT ExtraEltVT = ExtraVT.getVectorElementType();
- int ExtraNumElems = ExtraVT.getVectorNumElements();
+ unsigned ExtraNumElems = ExtraVT.getVectorNumElements();
ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
ExtraNumElems/2);
SDValue Extra = DAG.getValueType(ExtraVT);
@@ -10859,6 +11067,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -11013,7 +11222,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Regs64bit ? X86::RBX : X86::EBX,
swapInL, cpInH.getValue(1));
swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
- Regs64bit ? X86::RCX : X86::ECX,
+ Regs64bit ? X86::RCX : X86::ECX,
swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
@@ -11118,10 +11327,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
@@ -11190,6 +11401,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
+ case X86ISD::SAHF: return "X86ISD::SAHF";
+ case X86ISD::RDRAND: return "X86ISD::RDRAND";
+ case X86ISD::FMADD: return "X86ISD::FMADD";
+ case X86ISD::FMSUB: return "X86ISD::FMSUB";
+ case X86ISD::FNMADD: return "X86ISD::FNMADD";
+ case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
+ case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
+ case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
}
}
@@ -11258,6 +11477,15 @@ bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return true;
}
+bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return Imm == (int32_t)Imm;
+}
+
+bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // Can also use sub to handle negated immediates.
+ return Imm == (int32_t)Imm;
+}
+
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
@@ -11300,8 +11528,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isMOVLMask(M, VT) ||
isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT) ||
- isPSHUFLWMask(M, VT) ||
+ isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) ||
+ isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) ||
isPALIGNRMask(M, VT, Subtarget) ||
isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
@@ -11316,7 +11544,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
- if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ if (NumElts == 4 && VT.is128BitVector()) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
@@ -11460,7 +11688,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
// result in out1, out2
// fallthrough -->nextMBB
- const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const TargetRegisterClass *RC = &X86::GR32RegClass;
const unsigned LoadOpc = X86::MOV32rm;
const unsigned NotOpc = X86::NOT32r;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -11662,7 +11890,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
- unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
@@ -11672,7 +11900,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
argOpers[valArgIndx]->isImm()) &&
"invalid operand");
- unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
else
@@ -11687,7 +11915,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MIB.addReg(t2);
// Generate movc
- unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
MIB.addReg(t2);
MIB.addReg(t1);
@@ -11742,8 +11970,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
MIB.addOperand(Op);
}
BuildMI(*BB, MI, dl,
- TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
- MI->getOperand(0).getReg())
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
@@ -11776,24 +12003,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
}
MachineBasicBlock *
-X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
- DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- // First arg in ECX, the second in EAX.
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
- .addReg(MI->getOperand(0).getReg());
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
- .addReg(MI->getOperand(1).getReg());
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
-
- MI->eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -12306,8 +12515,9 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI)
+ .addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
+ .addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
@@ -12517,7 +12727,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Load the old value of the high byte of the control word...
unsigned OldCW =
- F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+ F->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
@@ -12596,8 +12806,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB);
- case X86::MWAIT:
- return EmitMwait(MI, BB);
// Atomic Lowering.
case X86::ATOMAND32:
@@ -12605,25 +12813,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
X86::AND32ri, X86::MOV32rm,
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
- X86::GR32RegisterClass);
+ &X86::GR32RegClass);
case X86::ATOMOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
X86::OR32ri, X86::MOV32rm,
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
- X86::GR32RegisterClass);
+ &X86::GR32RegClass);
case X86::ATOMXOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
X86::XOR32ri, X86::MOV32rm,
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
- X86::GR32RegisterClass);
+ &X86::GR32RegClass);
case X86::ATOMNAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
- X86::GR32RegisterClass, true);
+ &X86::GR32RegClass, true);
case X86::ATOMMIN32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
case X86::ATOMMAX32:
@@ -12638,25 +12846,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
X86::AND16ri, X86::MOV16rm,
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- X86::GR16RegisterClass);
+ &X86::GR16RegClass);
case X86::ATOMOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
X86::OR16ri, X86::MOV16rm,
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- X86::GR16RegisterClass);
+ &X86::GR16RegClass);
case X86::ATOMXOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
X86::XOR16ri, X86::MOV16rm,
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- X86::GR16RegisterClass);
+ &X86::GR16RegClass);
case X86::ATOMNAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- X86::GR16RegisterClass, true);
+ &X86::GR16RegClass, true);
case X86::ATOMMIN16:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
case X86::ATOMMAX16:
@@ -12671,25 +12879,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
X86::AND8ri, X86::MOV8rm,
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- X86::GR8RegisterClass);
+ &X86::GR8RegClass);
case X86::ATOMOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
X86::OR8ri, X86::MOV8rm,
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- X86::GR8RegisterClass);
+ &X86::GR8RegClass);
case X86::ATOMXOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
X86::XOR8ri, X86::MOV8rm,
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- X86::GR8RegisterClass);
+ &X86::GR8RegClass);
case X86::ATOMNAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- X86::GR8RegisterClass, true);
+ &X86::GR8RegClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
// This group is for 64-bit host.
case X86::ATOMAND64:
@@ -12697,25 +12905,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
X86::AND64ri32, X86::MOV64rm,
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- X86::GR64RegisterClass);
+ &X86::GR64RegClass);
case X86::ATOMOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
X86::OR64ri32, X86::MOV64rm,
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- X86::GR64RegisterClass);
+ &X86::GR64RegClass);
case X86::ATOMXOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
X86::XOR64ri32, X86::MOV64rm,
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- X86::GR64RegisterClass);
+ &X86::GR64RegClass);
case X86::ATOMNAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- X86::GR64RegisterClass, true);
+ &X86::GR64RegClass, true);
case X86::ATOMMIN64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
case X86::ATOMMAX64:
@@ -12870,10 +13078,10 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
/// inserting the result into the low part of a new 256-bit vector
static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
// vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j)
+ for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
SVOp->getMaskElt(j) >= 0)
return false;
@@ -12886,10 +13094,10 @@ static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
/// inserting the result into the high part of a new 256-bit vector
static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
// vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j)
+ for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
SVOp->getMaskElt(j) >= 0)
return false;
@@ -12906,7 +13114,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
V2.getOpcode() == ISD::CONCAT_VECTORS) {
@@ -12931,30 +13139,31 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
// To match the shuffle mask, the first half of the mask should
// be exactly the first vector, and all the rest a splat with the
// first element of the second one.
- for (int i = 0; i < NumElems/2; ++i)
+ for (unsigned i = 0; i != NumElems/2; ++i)
if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
return SDValue();
// If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
- SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
- SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
- SDValue ResNode =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
- Ld->getMemoryVT(),
- Ld->getPointerInfo(),
- Ld->getAlignment(),
- false/*isVolatile*/, true/*ReadMem*/,
- false/*WriteMem*/);
- return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
- }
+ if (Ld->hasNUsesOfValue(1, 0)) {
+ SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
+ SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+ Ld->getMemoryVT(),
+ Ld->getPointerInfo(),
+ Ld->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
+ return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+ }
+ }
// Emit a zeroed vector and insert the desired subvector on its
// first half.
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
- SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
- DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}
@@ -12964,18 +13173,15 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
// vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
if (isShuffleHigh128VectorInsertLow(SVOp)) {
- SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32),
- DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
- V, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}
// vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
if (isShuffleLow128VectorInsertHigh(SVOp)) {
- SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
- V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ SDValue V = Extract128BitVector(V1, 0, DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
return DCI.CombineTo(N, InsV);
}
@@ -12995,12 +13201,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ if (Subtarget->hasAVX() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
@@ -13014,16 +13220,17 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
}
-/// PerformTruncateCombine - Converts truncate operation to
+/// DCI, PerformTruncateCombine - Converts truncate operation to
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
-SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
DAGCombinerInfo &DCI) const {
if (!DCI.isBeforeLegalizeOps())
return SDValue();
- if (!Subtarget->hasAVX()) return SDValue();
+ if (!Subtarget->hasAVX())
+ return SDValue();
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
@@ -13032,55 +13239,102 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
+ if (Subtarget->hasAVX2()) {
+ // AVX2: v4i64 -> v4i32
+
+ // VPERMD
+ static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op);
+ Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32),
+ ShufMask);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // AVX: v4i64 -> v4i32
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(2));
+ DAG.getIntPtrConstant(2));
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
// PSHUFD
- int ShufMask1[] = {0, 2, 0, 0};
+ static const int ShufMask1[] = {0, 2, 0, 0};
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT),
- ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT),
- ShufMask1);
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
// MOVLHPS
- int ShufMask2[] = {0, 1, 4, 5};
+ static const int ShufMask2[] = {0, 1, 4, 5};
return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
}
+
if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
+ if (Subtarget->hasAVX2()) {
+ // AVX2: v8i32 -> v8i16
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
+
+ // PSHUFB
+ SmallVector<SDValue,32> pshufbMask;
+ for (unsigned i = 0; i < 2; ++i) {
+ pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
+ for (unsigned j = 0; j < 8; ++j)
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8,
+ &pshufbMask[0], 32);
+ Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV);
+
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op);
+
+ static const int ShufMask[] = {0, 2, -1, -1};
+ Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64),
+ &ShufMask[0]);
+
+ Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+ DAG.getIntPtrConstant(0));
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ }
+
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
- DAG.getIntPtrConstant(4));
+ DAG.getIntPtrConstant(4));
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
// PSHUFB
- int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
- -1, -1, -1, -1, -1, -1, -1, -1};
+ static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1};
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo,
- DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi,
- DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
// MOVLHPS
- int ShufMask2[] = {0, 1, 4, 5};
+ static const int ShufMask2[] = {0, 1, 4, 5};
SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
@@ -13127,7 +13381,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
+ if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask,
+ UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
@@ -13276,8 +13531,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
-
-
DebugLoc DL = N->getDebugLoc();
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
@@ -13559,9 +13812,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// to simplify previous instructions.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
- !DCI.isBeforeLegalize() &&
- TLI.isOperationLegal(ISD::VSELECT, VT)) {
+ !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+
+ // Don't optimize vector selects that map to mask-registers.
+ if (BitWidth == 1)
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
@@ -13576,6 +13833,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Check whether a boolean test is testing a boolean value generated by
+// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
+// code.
+//
+// Simplify the following patterns:
+// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
+// to (Op EFLAGS Cond)
+//
+// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
+// to (Op EFLAGS !Cond)
+//
+// where Op could be BRCOND or CMOV.
+//
+static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+ // Quit if not CMP and SUB with its value result used.
+ if (Cmp.getOpcode() != X86ISD::CMP &&
+ (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
+ return SDValue();
+
+ // Quit if not used as a boolean value.
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ // Check CMP operands. One of them should be 0 or 1 and the other should be
+ // an SetCC or extended from it.
+ SDValue Op1 = Cmp.getOperand(0);
+ SDValue Op2 = Cmp.getOperand(1);
+
+ SDValue SetCC;
+ const ConstantSDNode* C = 0;
+ bool needOppositeCond = (CC == X86::COND_E);
+
+ if ((C = dyn_cast<ConstantSDNode>(Op1)))
+ SetCC = Op2;
+ else if ((C = dyn_cast<ConstantSDNode>(Op2)))
+ SetCC = Op1;
+ else // Quit if all operands are not constants.
+ return SDValue();
+
+ if (C->getZExtValue() == 1)
+ needOppositeCond = !needOppositeCond;
+ else if (C->getZExtValue() != 0)
+ // Quit if the constant is neither 0 or 1.
+ return SDValue();
+
+ // Skip 'zext' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+ SetCC = SetCC.getOperand(0);
+
+ // Quit if not SETCC.
+ // FIXME: So far we only handle the boolean value generated from SETCC. If
+ // there is other ways to generate boolean values, we need handle them here
+ // as well.
+ if (SetCC.getOpcode() != X86ISD::SETCC)
+ return SDValue();
+
+ // Set the condition code or opposite one if necessary.
+ CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+
+ return SetCC.getOperand(1);
+}
+
+static bool IsValidFCMOVCondition(X86::CondCode CC) {
+ switch (CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_AE:
+ case X86::COND_A:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
@@ -13589,6 +13928,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
+
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
@@ -13600,6 +13940,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(Cond, CC);
+ if (Flags.getNode() &&
+ // Extra check as FCMOV only supports a subset of X86 cond.
+ (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) {
+ SDValue Ops[] = { FalseOp, TrueOp,
+ DAG.getConstant(CC, MVT::i8), Flags };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+ Ops, array_lengthof(Ops));
+ }
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
@@ -14022,7 +14374,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
// Sometimes the operand may come from a insert_subvector building a 256-bit
// allones vector
- if (VT.getSizeInBits() == 256 &&
+ if (VT.is256BitVector() &&
N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
@@ -14260,6 +14612,41 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Generate NEG and CMOV for integer abs.
+static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Since X86 does not have CMOV for 8-bit integer, we don't convert
+ // 8-bit integer abs to NEG and CMOV.
+ if (VT.isInteger() && VT.getSizeInBits() == 8)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
+ // and change it to SUB and CMOV.
+ if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
+ N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA &&
+ N1.getOperand(0) == N0.getOperand(0))
+ if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
+ if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
+ // Generate SUB & CMOV.
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
+ DAG.getConstant(0, VT), N0.getOperand(0));
+
+ SDValue Ops[] = { N0.getOperand(0), Neg,
+ DAG.getConstant(X86::COND_GE, MVT::i8),
+ SDValue(Neg.getNode(), 1) };
+ return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue),
+ Ops, array_lengthof(Ops));
+ }
+ return SDValue();
+}
+
// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -14267,6 +14654,16 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ if (Subtarget->hasCMov()) {
+ SDValue RV = performIntegerAbsCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Try forming BMI if it is available.
+ if (!Subtarget->hasBMI())
+ return SDValue();
+
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && VT != MVT::i64)
@@ -14292,7 +14689,8 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
LoadSDNode *Ld = cast<LoadSDNode>(N);
EVT RegVT = Ld->getValueType(0);
EVT MemVT = Ld->getMemoryVT();
@@ -14314,63 +14712,94 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
unsigned RegSz = RegVT.getSizeInBits();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
- // All sizes must be a power of two
- if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
- // Attempt to load the original value using a single load op.
- // Find a scalar type which is equal to the loaded word size.
+ // All sizes must be a power of two.
+ if (!isPowerOf2_32(RegSz * MemSz * NumElems))
+ return SDValue();
+
+ // Attempt to load the original value using scalar loads.
+ // Find the largest scalar type that divides the total loaded size.
MVT SclrLoadTy = MVT::i8;
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) {
+ if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
SclrLoadTy = Tp;
- break;
}
}
- // Proceed if a load word is found.
- if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
+ // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
+ if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
+ (64 <= MemSz))
+ SclrLoadTy = MVT::f64;
+ // Calculate the number of scalar loads that we need to perform
+ // in order to load our vector from memory.
+ unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+
+ // Represent our vector as a sequence of elements which are the
+ // largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
RegSz/SclrLoadTy.getSizeInBits());
+ // Represent the data using the same element type that is stored in
+ // memory. In practice, we ''widen'' MemVT.
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
RegSz/MemVT.getScalarType().getSizeInBits());
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
- // Perform a single load.
- SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
- Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->isVolatile(),
- Ld->isNonTemporal(), Ld->isInvariant(),
- Ld->getAlignment());
+ assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
+ "Invalid vector type");
+
+ // We can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
- // Insert the word loaded into a vector.
- SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- LoadUnitVecVT, ScalarLoad);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
+
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ // Perform a single load.
+ SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
+ Ptr, Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+ Chains.push_back(ScalarLoad.getValue(1));
+ // Create the first element type using SCALAR_TO_VECTOR in order to avoid
+ // another round of DAGCombining.
+ if (i == 0)
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
+ else
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
+ ScalarLoad, DAG.getIntPtrConstant(i));
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT,
- ScalarInVector);
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
unsigned SizeRatio = RegSz/MemSz;
// Redistribute the loaded elements into the different locations.
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i;
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i*SizeRatio] = i;
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(SlicedVec.getValueType()),
- ShuffleVec.data());
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
// Bitcast to the requested type.
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
// Replace the original load with the new sequence
// and return the new chain.
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff);
- return SDValue(ScalarLoad.getNode(), 1);
+ return DCI.CombineTo(N, Shuff, TF, true);
}
return SDValue();
@@ -14387,13 +14816,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we are saving a concatenation of two XMM registers, perform two stores.
- // This is better in Sandy Bridge cause one 256-bit mem op is done via two
- // 128-bit ones. If in the future the cost becomes only one memory access the
- // first version would be better.
- if (VT.getSizeInBits() == 256 &&
- StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
- StoredVal.getNumOperands() == 2) {
-
+ // On Sandy Bridge, 256-bit memory operations are executed by two
+ // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
+ // memory operation.
+ if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
+ StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
+ StoredVal.getNumOperands() == 2) {
SDValue Value0 = StoredVal.getOperand(0);
SDValue Value1 = StoredVal.getOperand(1);
@@ -14438,14 +14866,16 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio;
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i] = i * SizeRatio;
- // Can't shuffle using an illegal type
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
// At this point all of the data is stored at the bottom of the
// register. We now need to save it to mem.
@@ -14454,13 +14884,18 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz)
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
StoreType = Tp;
}
+ // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
+ if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
+ (64 <= NumElems * ToSz))
+ StoreType = MVT::f64;
+
// Bitcast the original vector into a vector of store-size units
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
@@ -14469,7 +14904,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Ptr = St->getBasePtr();
// Perform one or more big stores into memory.
- for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) {
+ for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
StoreType, ShuffWide,
DAG.getIntPtrConstant(i));
@@ -14818,18 +15253,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps())
return SDValue();
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasAVX())
return SDValue();
- // Optimize vectors in AVX mode
- // Sign extend v8i16 to v8i32 and
- // v4i32 to v4i64
- //
- // Divide input vector into two parts
- // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
- // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
- // concat the vectors to original VT
-
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
@@ -14838,23 +15264,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
(VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
+
+ // Optimize vectors in AVX mode
+ // Sign extend v8i16 to v8i32 and
+ // v4i32 to v4i64
+ //
+ // Divide input vector into two parts
+ // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+ // concat the vectors to original VT
+
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(OpVT);
+
SmallVector<int,8> ShufMask1(NumElems, -1);
- for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask1[i] = i;
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- ShufMask1.data());
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
- for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2;
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask2[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- ShufMask2.data());
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
VT.getVectorNumElements()/2);
- OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+ OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
@@ -14862,7 +15302,42 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ EVT ScalarVT = VT.getScalarType();
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+ return SDValue();
+
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ SDValue C = N->getOperand(2);
+
+ bool NegA = (A.getOpcode() == ISD::FNEG);
+ bool NegB = (B.getOpcode() == ISD::FNEG);
+ bool NegC = (C.getOpcode() == ISD::FNEG);
+
+ // Negative multiplication when NegA xor NegB
+ bool NegMul = (NegA != NegB);
+ if (NegA)
+ A = A.getOperand(0);
+ if (NegB)
+ B = B.getOperand(0);
+ if (NegC)
+ C = C.getOperand(0);
+
+ unsigned Opcode;
+ if (!NegMul)
+ Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+ else
+ Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+ return DAG.getNode(Opcode, dl, VT, A, B, C);
+}
+
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
@@ -14887,6 +15362,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, VT));
}
+
// Optimize vectors in AVX mode:
//
// v8i16 -> v8i32
@@ -14899,50 +15375,139 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
- if (Subtarget->hasAVX()) {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
- if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
- SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
- SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG);
- SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG);
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- VT.getVectorNumElements()/2);
+ SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
+ SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec);
+ SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec);
- OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements()/2);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
- }
+ OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
+ return SDValue();
+}
+// Optimize x == -y --> x+y == 0
+// x != -y --> x+y != 0
+static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
+ if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, N->getDebugLoc(),
+ LHS.getValueType(), RHS, LHS.getOperand(1));
+ return DAG.getSetCC(N->getDebugLoc(), N->getValueType(0),
+ addV, DAG.getConstant(0, addV.getValueType()), CC);
+ }
+ if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
+ if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, N->getDebugLoc(),
+ RHS.getValueType(), LHS, RHS.getOperand(1));
+ return DAG.getSetCC(N->getDebugLoc(), N->getValueType(0),
+ addV, DAG.getConstant(0, addV.getValueType()), CC);
+ }
return SDValue();
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
- unsigned X86CC = N->getConstantOperandVal(0);
- SDValue EFLAG = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
+ SDValue EFLAGS = N->getOperand(1);
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
- if (X86CC == X86::COND_B)
+ if (CC == X86::COND_B)
return DAG.getNode(ISD::AND, DL, MVT::i8,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(CC, MVT::i8), EFLAGS),
DAG.getConstant(1, MVT::i8));
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+ }
+
+ return SDValue();
+}
+
+// Optimize branch condition evaluation.
+//
+static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest = N->getOperand(1);
+ SDValue EFLAGS = N->getOperand(3);
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
+
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+ Flags);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ EVT InVT = Op0->getValueType(0);
+
+ // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
+ if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+ // Notice that we use SINT_TO_FP because we know that the high bits
+ // are zero and SINT_TO_FP is better supported by the hardware.
+ return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ }
+
return SDValue();
}
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86TargetLowering *XTLI) {
SDValue Op0 = N->getOperand(0);
+ EVT InVT = Op0->getValueType(0);
+
+ // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
+ if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+ SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ }
+
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
@@ -14961,6 +15526,20 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT()
+ if (VT == MVT::v8i8 || VT == MVT::v4i8) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT DstVT = VT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+ SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, I);
+ }
+
+ return SDValue();
+}
+
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -15095,9 +15674,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
- case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
+ case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -15105,10 +15686,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
- case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
+ case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
case X86ISD::UNPCKH:
@@ -15123,6 +15707,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+ case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
}
return SDValue();
@@ -15652,55 +16237,55 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// in the normal allocation?
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget->is64Bit()) {
- if (VT == MVT::i32 || VT == MVT::f32)
- return std::make_pair(0U, X86::GR32RegisterClass);
- else if (VT == MVT::i16)
- return std::make_pair(0U, X86::GR16RegisterClass);
- else if (VT == MVT::i8 || VT == MVT::i1)
- return std::make_pair(0U, X86::GR8RegisterClass);
- else if (VT == MVT::i64 || VT == MVT::f64)
- return std::make_pair(0U, X86::GR64RegisterClass);
- break;
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, &X86::GR32RegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16RegClass);
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, &X86::GR8RegClass);
+ if (VT == MVT::i64 || VT == MVT::f64)
+ return std::make_pair(0U, &X86::GR64RegClass);
+ break;
}
// 32-bit fallthrough
case 'Q': // Q_REGS
if (VT == MVT::i32 || VT == MVT::f32)
- return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
- else if (VT == MVT::i16)
- return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
- else if (VT == MVT::i8 || VT == MVT::i1)
- return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
- else if (VT == MVT::i64)
- return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
+ return std::make_pair(0U, &X86::GR32_ABCDRegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16_ABCDRegClass);
+ if (VT == MVT::i8 || VT == MVT::i1)
+ return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
- return std::make_pair(0U, X86::GR8RegisterClass);
+ return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i16)
- return std::make_pair(0U, X86::GR16RegisterClass);
+ return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
- return std::make_pair(0U, X86::GR32RegisterClass);
- return std::make_pair(0U, X86::GR64RegisterClass);
+ return std::make_pair(0U, &X86::GR32RegClass);
+ return std::make_pair(0U, &X86::GR64RegClass);
case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
- return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+ return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
- return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+ return std::make_pair(0U, &X86::GR16_NOREXRegClass);
if (VT == MVT::i32 || !Subtarget->is64Bit())
- return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
- return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
+ return std::make_pair(0U, &X86::GR32_NOREXRegClass);
+ return std::make_pair(0U, &X86::GR64_NOREXRegClass);
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
- return std::make_pair(0U, X86::RFP32RegisterClass);
+ return std::make_pair(0U, &X86::RFP32RegClass);
if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
- return std::make_pair(0U, X86::RFP64RegisterClass);
- return std::make_pair(0U, X86::RFP80RegisterClass);
+ return std::make_pair(0U, &X86::RFP64RegClass);
+ return std::make_pair(0U, &X86::RFP80RegClass);
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget->hasMMX()) break;
- return std::make_pair(0U, X86::VR64RegisterClass);
+ return std::make_pair(0U, &X86::VR64RegClass);
case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget->hasSSE2()) break;
// FALL THROUGH.
@@ -15712,10 +16297,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// Scalar SSE types.
case MVT::f32:
case MVT::i32:
- return std::make_pair(0U, X86::FR32RegisterClass);
+ return std::make_pair(0U, &X86::FR32RegClass);
case MVT::f64:
case MVT::i64:
- return std::make_pair(0U, X86::FR64RegisterClass);
+ return std::make_pair(0U, &X86::FR64RegClass);
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
@@ -15723,7 +16308,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
- return std::make_pair(0U, X86::VR128RegisterClass);
+ return std::make_pair(0U, &X86::VR128RegClass);
// AVX types.
case MVT::v32i8:
case MVT::v16i16:
@@ -15731,8 +16316,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case MVT::v4i64:
case MVT::v8f32:
case MVT::v4f64:
- return std::make_pair(0U, X86::VR256RegisterClass);
-
+ return std::make_pair(0U, &X86::VR256RegClass);
}
break;
}
@@ -15755,28 +16339,28 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Constraint[6] == '}') {
Res.first = X86::ST0+Constraint[4]-'0';
- Res.second = X86::RFP80RegisterClass;
+ Res.second = &X86::RFP80RegClass;
return Res;
}
// GCC allows "st(0)" to be called just plain "st".
if (StringRef("{st}").equals_lower(Constraint)) {
Res.first = X86::ST0;
- Res.second = X86::RFP80RegisterClass;
+ Res.second = &X86::RFP80RegClass;
return Res;
}
// flags -> EFLAGS
if (StringRef("{flags}").equals_lower(Constraint)) {
Res.first = X86::EFLAGS;
- Res.second = X86::CCRRegisterClass;
+ Res.second = &X86::CCRRegClass;
return Res;
}
// 'A' means EAX + EDX.
if (Constraint == "A") {
Res.first = X86::EAX;
- Res.second = X86::GR32_ADRegisterClass;
+ Res.second = &X86::GR32_ADRegClass;
return Res;
}
return Res;
@@ -15792,7 +16376,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
- if (Res.second == X86::GR16RegisterClass) {
+ if (Res.second == &X86::GR16RegClass) {
if (VT == MVT::i8) {
unsigned DestReg = 0;
switch (Res.first) {
@@ -15804,7 +16388,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
if (DestReg) {
Res.first = DestReg;
- Res.second = X86::GR8RegisterClass;
+ Res.second = &X86::GR8RegClass;
}
} else if (VT == MVT::i32) {
unsigned DestReg = 0;
@@ -15821,7 +16405,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
if (DestReg) {
Res.first = DestReg;
- Res.second = X86::GR32RegisterClass;
+ Res.second = &X86::GR32RegClass;
}
} else if (VT == MVT::i64) {
unsigned DestReg = 0;
@@ -15838,22 +16422,25 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
if (DestReg) {
Res.first = DestReg;
- Res.second = X86::GR64RegisterClass;
+ Res.second = &X86::GR64RegClass;
}
}
- } else if (Res.second == X86::FR32RegisterClass ||
- Res.second == X86::FR64RegisterClass ||
- Res.second == X86::VR128RegisterClass) {
+ } else if (Res.second == &X86::FR32RegClass ||
+ Res.second == &X86::FR64RegClass ||
+ Res.second == &X86::VR128RegClass) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
- if (VT == MVT::f32)
- Res.second = X86::FR32RegisterClass;
- else if (VT == MVT::f64)
- Res.second = X86::FR64RegisterClass;
- else if (X86::VR128RegisterClass->hasType(VT))
- Res.second = X86::VR128RegisterClass;
+
+ if (VT == MVT::f32 || VT == MVT::i32)
+ Res.second = &X86::FR32RegClass;
+ else if (VT == MVT::f64 || VT == MVT::i64)
+ Res.second = &X86::FR64RegClass;
+ else if (X86::VR128RegClass.hasType(VT))
+ Res.second = &X86::VR128RegClass;
+ else if (X86::VR256RegClass.hasType(VT))
+ Res.second = &X86::VR256RegClass;
}
return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 09116e88af57..9123ebd8ae49 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -137,10 +137,6 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
- /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
/// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector. If you think this is too close to the previous
/// mnemonic, so do I; blame Intel.
@@ -207,6 +203,10 @@ namespace llvm {
// TLSADDR - Thread Local Storage.
TLSADDR,
+ // TLSBASEADDR - Thread Local Storage. A call to get the start address
+ // of the TLS block for the current module.
+ TLSBASEADDR,
+
// TLSCALL - Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
@@ -242,9 +242,6 @@ namespace llvm {
// PCMP* - Vector integer comparisons.
PCMPEQ, PCMPGT,
- // VPCOM, VPCOMU - XOP Vector integer comparisons.
- VPCOM, VPCOMU,
-
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
@@ -293,6 +290,14 @@ namespace llvm {
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
+ // FMA nodes
+ FMADD,
+ FNMADD,
+ FMSUB,
+ FNMSUB,
+ FMADDSUB,
+ FMSUBADD,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -315,6 +320,19 @@ namespace llvm {
SFENCE,
LFENCE,
+ // FNSTSW16r - Store FP status word into i16 register.
+ FNSTSW16r,
+
+ // SAHF - Store contents of %ah into %eflags.
+ SAHF,
+
+ // RDRAND - Get a random integer and indicate whether it is valid in CF.
+ RDRAND,
+
+ // PCMP*STRI
+ PCMPISTRI,
+ PCMPESTRI,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -558,6 +576,18 @@ namespace llvm {
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalAddImmediate(int64_t Imm) const;
+
/// isTruncateFree - Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
@@ -575,6 +605,12 @@ namespace llvm {
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
/// isNarrowingProfitable - Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.
@@ -634,7 +670,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
@@ -761,6 +798,7 @@ namespace llvm {
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
@@ -797,12 +835,7 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
@@ -822,9 +855,9 @@ namespace llvm {
virtual bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const;
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
@@ -909,10 +942,14 @@ namespace llvm {
/// equivalent, for use with the given x86 condition code.
SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SelectionDAG &DAG) const;
+
+ /// Convert a comparison if required by the subtarget.
+ SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
};
namespace X86 {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 0eee08339384..f790611b8f8c 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
X86xor_flag, xor, 1, 0>;
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
X86add_flag, add, 1, 1>;
+let isCompare = 1 in {
defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 0>;
+}
// Arithmetic.
let Uses = [EFLAGS] in {
@@ -1143,7 +1145,9 @@ let Uses = [EFLAGS] in {
0, 0>;
}
+let isCompare = 1 in {
defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+}
//===----------------------------------------------------------------------===//
@@ -1154,7 +1158,7 @@ defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
(X86cmp (and_su node:$lhs, node:$rhs), 0)>;
-let Defs = [EFLAGS] in {
+let isCompare = 1, Defs = [EFLAGS] in {
let isCommutable = 1 in {
def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index fa1d67644db7..aaef4a466ded 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -55,11 +55,11 @@ struct X86AddressMode {
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
Base.Reg = 0;
}
-
-
+
+
void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
-
+
if (BaseType == X86AddressMode::RegBase)
MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
false, false, false, 0, false));
@@ -67,16 +67,16 @@ struct X86AddressMode {
assert(BaseType == X86AddressMode::FrameIndexBase);
MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
}
-
+
MO.push_back(MachineOperand::CreateImm(Scale));
MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
false, false, false, 0, false));
-
+
if (GV)
MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
else
MO.push_back(MachineOperand::CreateImm(Disp));
-
+
MO.push_back(MachineOperand::CreateReg(0, false, false,
false, false, false, 0, false));
}
@@ -122,7 +122,7 @@ static inline const MachineInstrBuilder &
addFullAddress(const MachineInstrBuilder &MIB,
const X86AddressMode &AM) {
assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
-
+
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
else {
@@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB,
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
else
MIB.addImm(AM.Disp);
-
+
return MIB.addReg(0);
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 6f9e8492613a..99c2b8f955e4 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -375,11 +375,16 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in
+ Uses = [ESP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
+def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_base_addr32",
+ [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
+ Requires<[In32BitMode]>;
+}
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
@@ -389,11 +394,16 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in
+ Uses = [RSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
+def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_base_addr64",
+ [(X86tlsbaseaddr tls64baseaddr:$sym)]>,
+ Requires<[In64BitMode]>;
+}
// Darwin TLS Support
// For i386, the address of the thunk is passed on the stack, on return the
@@ -1008,8 +1018,8 @@ def : Pat<(X86call (i64 texternalsym:$dst)),
(CALL64pcrel32 texternalsym:$dst)>;
// tailcall stuff
-def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
- (TCRETURNri GR32_TC:$dst, imm:$off)>,
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In32BitMode]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
@@ -1623,6 +1633,12 @@ def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+// sub 0, reg
+def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
+def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
+def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
+def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
+
// mul reg, reg
def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index bf11fdec5add..b0c27c882710 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -18,16 +18,16 @@
// Return instructions.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP in {
- def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ def RET : I <0xC3, RawFrm, (outs), (ins),
"ret",
[(X86retflag 0)], IIC_RET>;
- def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ def RETW : I <0xC3, RawFrm, (outs), (ins),
"ret{w}",
[], IIC_RET>, OpSize;
- def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret\t$amt",
[(X86retflag timm:$amt)], IIC_RET_IMM>;
- def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret{w}\t$amt",
[], IIC_RET_IMM>, OpSize;
def LRETL : I <0xCB, RawFrm, (outs), (ins),
@@ -148,12 +148,12 @@ let isCall = 1 in
// registers are added manually.
let Uses = [ESP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i32imm_pcrel:$dst,variable_ops),
+ (outs), (ins i32imm_pcrel:$dst),
"call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
- def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
Requires<[In32BitMode]>;
- def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
Requires<[In32BitMode]>;
@@ -174,7 +174,7 @@ let isCall = 1 in
// callw for 16 bit code for the assembler.
let isAsmParserOnly = 1 in
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
- (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ (outs), (ins i16imm_pcrel:$dst),
"callw\t$dst", []>, OpSize;
}
@@ -185,23 +185,23 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
let Uses = [ESP] in {
def TCRETURNdi : PseudoI<(outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+ (ins i32imm_pcrel:$dst, i32imm:$offset), []>;
def TCRETURNri : PseudoI<(outs),
- (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
let mayLoad = 1 in
def TCRETURNmi : PseudoI<(outs),
- (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+ (ins i32mem_TC:$dst, i32imm:$offset), []>;
// FIXME: The should be pseudo instructions that are lowered when going to
// mcinst.
def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, variable_ops),
+ (ins i32imm_pcrel:$dst),
"jmp\t$dst # TAILCALL",
[], IIC_JMP_REL>;
- def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
- def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
"jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
}
@@ -218,14 +218,14 @@ let isCall = 1, Uses = [RSP] in {
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ (outs), (ins i64i32imm_pcrel:$dst),
"call{q}\t$dst", [], IIC_CALL_RI>,
Requires<[In64BitMode]>;
- def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)],
IIC_CALL_RI>,
Requires<[In64BitMode]>;
- def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
IIC_CALL_MEM>,
Requires<[In64BitMode]>;
@@ -240,7 +240,7 @@ let isCall = 1, isCodeGenOnly = 1 in
let Defs = [RAX, R10, R11, RSP, EFLAGS],
Uses = [RSP] in {
def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ (outs), (ins i64i32imm_pcrel:$dst),
"call{q}\t$dst", [], IIC_CALL_RI>,
Requires<[IsWin64]>;
}
@@ -250,21 +250,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
let Uses = [RSP],
usesCustomInserter = 1 in {
def TCRETURNdi64 : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset),
[]>;
def TCRETURNri64 : PseudoI<(outs),
- (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
- (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+ (ins i64mem_TC:$dst, i32imm:$offset), []>;
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i64i32imm_pcrel:$dst, variable_ops),
+ (ins i64i32imm_pcrel:$dst),
"jmp\t$dst # TAILCALL", [], IIC_JMP_REL>;
- def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
let mayLoad = 1 in
- def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+ def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
"jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>;
}
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 0d5490ad9cc1..2eb454ded21b 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in {
// Sign/Zero extenders
+let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
TB, OpSize;
+let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
TB, OpSize;
+} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
@@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
TB;
+let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
TB, OpSize;
+let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
TB, OpSize;
+} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
@@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
+let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
@@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[], IIC_MOVZX>, TB;
+}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d57937b2e1b7..265b4bb997f9 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -15,83 +15,245 @@
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
+let Constraints = "$src1 = $dst" in {
multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
+let neverHasSideEffects = 1 in {
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+ let mayLoad = 1 in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+} // neverHasSideEffects = 1
}
+// Intrinsic for 213 pattern
+multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ Intrinsic Int128, Intrinsic Int256, SDNode Op213,
+ ValueType OpVT128, ValueType OpVT256> {
+ def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
+ VR128:$src3))]>;
+
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2,
+ VR128:$src1, VR128:$src3)))]>;
+
+ def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
+ (MemFrag128 addr:$src3)))]>;
+
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2, VR128:$src1,
+ (MemFrag128 addr:$src3))))]>;
+
+
+ def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
+ VR256:$src3))]>;
+
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (OpVT256 (Op213 VR256:$src2, VR256:$src1,
+ VR256:$src3)))]>;
+
+ def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
+ (MemFrag256 addr:$src3)))]>;
+
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst,
+ (OpVT256 (Op213 VR256:$src2, VR256:$src1,
+ (MemFrag256 addr:$src3))))]>;
+}
+} // Constraints = "$src1 = $dst"
+
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpcodeStr, string PackTy> {
- defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
- defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
- defm r231 : fma3p_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+ string OpcodeStr, string PackTy,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ Intrinsic Int128, Intrinsic Int256, SDNode Op,
+ ValueType OpTy128, ValueType OpTy256> {
+ defm r213 : fma3p_rm_int <opc213, !strconcat(OpcodeStr,
+ !strconcat("213", PackTy)), MemFrag128, MemFrag256,
+ Int128, Int256, Op, OpTy128, OpTy256>;
+ defm r132 : fma3p_rm <opc132,
+ !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r231 : fma3p_rm <opc231,
+ !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
}
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
- defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
- defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
- defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
- defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+ defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
+ memopv8f32, int_x86_fma_vfmadd_ps,
+ int_x86_fma_vfmadd_ps_256, X86Fmadd,
+ v4f32, v8f32>;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
+ memopv8f32, int_x86_fma_vfmsub_ps,
+ int_x86_fma_vfmsub_ps_256, X86Fmsub,
+ v4f32, v8f32>;
+ defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
+ memopv4f32, memopv8f32,
+ int_x86_fma_vfmaddsub_ps,
+ int_x86_fma_vfmaddsub_ps_256, X86Fmaddsub,
+ v4f32, v8f32>;
+ defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
+ memopv4f32, memopv8f32,
+ int_x86_fma_vfmsubadd_ps,
+ int_x86_fma_vfmaddsub_ps_256, X86Fmsubadd,
+ v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
- defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
- defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
- defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+ defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
+ memopv4f64, int_x86_fma_vfmadd_pd,
+ int_x86_fma_vfmadd_pd_256, X86Fmadd, v2f64,
+ v4f64>, VEX_W;
+ defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
+ memopv4f64, int_x86_fma_vfmsub_pd,
+ int_x86_fma_vfmsub_pd_256, X86Fmsub, v2f64,
+ v4f64>, VEX_W;
+ defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
+ memopv2f64, memopv4f64,
+ int_x86_fma_vfmaddsub_pd,
+ int_x86_fma_vfmaddsub_pd_256, X86Fmaddsub,
+ v2f64, v4f64>, VEX_W;
+ defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
+ memopv2f64, memopv4f64,
+ int_x86_fma_vfmsubadd_pd,
+ int_x86_fma_vfmsubadd_pd_256, X86Fmsubadd,
+ v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
- defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
- defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+ defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
+ memopv8f32, int_x86_fma_vfnmadd_ps,
+ int_x86_fma_vfnmadd_ps_256, X86Fnmadd, v4f32,
+ v8f32>;
+ defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
+ memopv8f32, int_x86_fma_vfnmsub_ps,
+ int_x86_fma_vfnmsub_ps_256, X86Fnmsub, v4f32,
+ v8f32>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
- defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+ defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
+ memopv4f64, int_x86_fma_vfnmadd_pd,
+ int_x86_fma_vfnmadd_pd_256, X86Fnmadd, v2f64,
+ v4f64>, VEX_W;
+ defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
+ memopv2f64,
+ memopv4f64, int_x86_fma_vfnmsub_pd,
+ int_x86_fma_vfnmsub_pd_256, X86Fnmsub, v2f64,
+ v4f64>, VEX_W;
}
-multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
- def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
- def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+let Constraints = "$src1 = $dst" in {
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
+ RegisterClass RC> {
+let neverHasSideEffects = 1 in {
+ def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+ let mayLoad = 1 in
+ def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+} // neverHasSideEffects = 1
}
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic IntId,
+ RegisterClass RC, SDNode OpNode, ValueType OpVT> {
+ def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
+ VR128:$src3))]>;
+ def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
+ def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ let mayLoad = 1 in
+ def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+}
+} // Constraints = "$src1 = $dst"
+
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpcodeStr> {
- defm SSr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132ss"), f32mem>;
- defm SSr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213ss"), f32mem>;
- defm SSr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231ss"), f32mem>;
- defm SDr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132sd"), f64mem>, VEX_W;
- defm SDr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213sd"), f64mem>, VEX_W;
- defm SDr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231sd"), f64mem>, VEX_W;
+ string OpStr, Intrinsic IntF32, Intrinsic IntF64,
+ SDNode OpNode> {
+ defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>;
+ defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>;
+ defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>,
+ VEX_W;
+ defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>,
+ VEX_W;
+ defm SSr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213ss"), ssmem,
+ sse_load_f32, IntF32, FR32, OpNode, f32>;
+ defm SDr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213sd"), sdmem,
+ sse_load_f64, IntF64, FR64, OpNode, f64>, VEX_W;
}
-defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd">, VEX_LIG;
-defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub">, VEX_LIG;
+defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
+ int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
+ int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
+
+defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
+ int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
+defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
+ int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
-defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd">, VEX_LIG;
-defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub">, VEX_LIG;
//===----------------------------------------------------------------------===//
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
@@ -178,43 +340,47 @@ let isCodeGenOnly = 1 in {
} // isCodeGenOnly = 1
}
+let Predicates = [HasFMA4] in {
+
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma4_vfmadd_ss>;
+ int_x86_fma_vfmadd_ss>;
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma4_vfmadd_sd>;
-defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma4_vfmadd_ps,
- int_x86_fma4_vfmadd_ps_256, memopv4f32, memopv8f32>;
-defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma4_vfmadd_pd,
- int_x86_fma4_vfmadd_pd_256, memopv2f64, memopv4f64>;
+ int_x86_fma_vfmadd_sd>;
+defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps,
+ int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>;
+defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd,
+ int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma4_vfmsub_ss>;
+ int_x86_fma_vfmsub_ss>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma4_vfmsub_sd>;
-defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma4_vfmsub_ps,
- int_x86_fma4_vfmsub_ps_256, memopv4f32, memopv8f32>;
-defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma4_vfmsub_pd,
- int_x86_fma4_vfmsub_pd_256, memopv2f64, memopv4f64>;
+ int_x86_fma_vfmsub_sd>;
+defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps,
+ int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>;
+defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd,
+ int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma4_vfnmadd_ss>;
+ int_x86_fma_vfnmadd_ss>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma4_vfnmadd_sd>;
-defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma4_vfnmadd_ps,
- int_x86_fma4_vfnmadd_ps_256, memopv4f32, memopv8f32>;
-defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma4_vfnmadd_pd,
- int_x86_fma4_vfnmadd_pd_256, memopv2f64, memopv4f64>;
+ int_x86_fma_vfnmadd_sd>;
+defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps,
+ int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>;
+defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd,
+ int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma4_vfnmsub_ss>;
+ int_x86_fma_vfnmsub_ss>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma4_vfnmsub_sd>;
-defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma4_vfnmsub_ps,
- int_x86_fma4_vfnmsub_ps_256, memopv4f32, memopv8f32>;
-defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma4_vfnmsub_pd,
- int_x86_fma4_vfnmsub_pd_256, memopv2f64, memopv4f64>;
-defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma4_vfmaddsub_ps,
- int_x86_fma4_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
-defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma4_vfmaddsub_pd,
- int_x86_fma4_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
-defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma4_vfmsubadd_ps,
- int_x86_fma4_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
-defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma4_vfmsubadd_pd,
- int_x86_fma4_vfmsubadd_pd_256, memopv2f64, memopv4f64>;
+ int_x86_fma_vfnmsub_sd>;
+defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps,
+ int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>;
+defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd,
+ int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>;
+defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps,
+ int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
+defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd,
+ int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
+defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps,
+ int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
+defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd,
+ int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>;
+} // HasFMA4
+
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index a13887e932b2..568726e08ece 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -27,6 +27,7 @@ def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
+def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -41,6 +42,7 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
+def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
@@ -203,6 +205,7 @@ def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
}
}
+let Defs = [FPSW] in {
defm ADD : FPBinary_rr<fadd>;
defm SUB : FPBinary_rr<fsub>;
defm MUL : FPBinary_rr<fmul>;
@@ -213,6 +216,7 @@ defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
defm MUL : FPBinary<fmul, MRM1m, "mul">;
defm DIV : FPBinary<fdiv, MRM6m, "div">;
defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+}
class FPST0rInst<bits<8> o, string asm>
: FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
@@ -257,6 +261,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
}
+let Defs = [FPSW] in {
defm CHS : FPUnary<fneg, 0xE0, "fchs">;
defm ABS : FPUnary<fabs, 0xE1, "fabs">;
defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
@@ -269,6 +274,7 @@ def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
}
def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+} // Defs = [FPSW]
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
@@ -316,6 +322,7 @@ multiclass FPCMov<PatLeaf cc> {
Requires<[HasCMov]>;
}
+let Defs = [FPSW] in {
let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
@@ -416,24 +423,40 @@ def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
}
let mayLoad = 1 in {
-def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
-def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
-def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
-def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
-def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
-def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src",
+ IIC_FLD>;
+def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src",
+ IIC_FLD>;
+def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src",
+ IIC_FLD80>;
+def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src",
+ IIC_FILD>;
+def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src",
+ IIC_FILD>;
+def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src",
+ IIC_FILD>;
}
let mayStore = 1 in {
-def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
-def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
-def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
-def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
-def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
-def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
-def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
-def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
-def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
-def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst",
+ IIC_FST>;
+def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst",
+ IIC_FST>;
+def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst",
+ IIC_FST>;
+def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst",
+ IIC_FST>;
+def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst",
+ IIC_FST80>;
+def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst",
+ IIC_FIST>;
+def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst",
+ IIC_FIST>;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst",
+ IIC_FIST>;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst",
+ IIC_FIST>;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst",
+ IIC_FIST>;
}
// FISTTP requires SSE3 even though it's a FPStack op.
@@ -459,17 +482,23 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
} // Predicates = [HasSSE3]
let mayStore = 1 in {
-def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
-def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
+ IIC_FST>;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
+ IIC_FST>;
def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
- "fisttp{ll}\t$dst">;
+ "fisttp{ll}\t$dst", IIC_FST>;
}
// FP Stack manipulation instructions.
-def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
-def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
-def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
-def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op",
+ IIC_FLD>, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op",
+ IIC_FST>, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op",
+ IIC_FST>, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op",
+ IIC_FXCH>, D9;
// Floating point constant loads.
let isReMaterializable = 1 in {
@@ -487,20 +516,21 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
-def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
-def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9;
// Floating point compares.
-let Defs = [EFLAGS] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- []>; // FPSW = cmp ST(0) with ST(i)
+ [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- []>; // FPSW = cmp ST(0) with ST(i)
+ [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- []>; // FPSW = cmp ST(0) with ST(i)
-
+ [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // Defs = [FPSW]
+
// CC = ST(0) cmp ST(i)
+let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -509,85 +539,94 @@ def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
}
-let Defs = [EFLAGS], Uses = [ST0] in {
+let Defs = [FPSW], Uses = [ST0] in {
def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
(outs), (ins RST:$reg),
- "fucom\t$reg">, DD;
+ "fucom\t$reg", IIC_FUCOM>, DD;
def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg),
- "fucomp\t$reg">, DD;
+ "fucomp\t$reg", IIC_FUCOM>, DD;
def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
(outs), (ins),
- "fucompp">, DA;
+ "fucompp", IIC_FUCOM>, DA;
+}
+let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
(outs), (ins RST:$reg),
- "fucomi\t$reg">, DB;
+ "fucomi\t$reg", IIC_FUCOMI>, DB;
def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg),
- "fucompi\t$reg">, DF;
+ "fucompi\t$reg", IIC_FUCOMI>, DF;
}
+let Defs = [EFLAGS, FPSW] in {
def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
- "fcomi\t$reg">, DB;
+ "fcomi\t$reg", IIC_FCOMI>, DB;
def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
- "fcompi\t$reg">, DF;
+ "fcompi\t$reg", IIC_FCOMI>, DF;
+}
// Floating point flag ops.
-let Defs = [AX] in
-def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
- (outs), (ins), "fnstsw %ax", []>, DF;
+let Defs = [AX], Uses = [FPSW] in
+def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw %ax",
+ [(set AX, (X86fp_stsw FPSW))], IIC_FNSTSW>, DF;
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
- [(X86fp_cwd_get16 addr:$dst)]>;
+ [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>;
let mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
- (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>;
// FPU control instructions
-def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
+let Defs = [FPSW] in
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB;
def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
- "ffree\t$reg">, DD;
+ "ffree\t$reg", IIC_FFREE>, DD;
// Clear exceptions
-def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
+let Defs = [FPSW] in
+def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB;
// Operandless floating-point instructions for the disassembler.
-def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
-
-def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
-def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
-def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9;
-def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9;
-def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9;
-def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9;
-def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9;
-def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9;
-def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9;
-def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9;
-def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9;
-def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9;
-def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9;
-def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9;
-def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9;
-def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9;
-def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9;
-def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9;
-def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9;
-def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9;
-def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
+
+def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9;
+def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", [], IIC_FXAM>, D9;
+def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", [], IIC_FLDL>, D9;
+def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", [], IIC_FLDL>, D9;
+def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", [], IIC_FLDL>, D9;
+def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", [], IIC_FLDL>, D9;
+def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", [], IIC_FLDL>, D9;
+def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", [], IIC_F2XM1>, D9;
+def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", [], IIC_FYL2X>, D9;
+def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", [], IIC_FPTAN>, D9;
+def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", [], IIC_FPATAN>, D9;
+def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", [], IIC_FXTRACT>, D9;
+def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", [], IIC_FPREM1>, D9;
+def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", [], IIC_FPSTP>, D9;
+def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", [], IIC_FPSTP>, D9;
+def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", [], IIC_FPREM>, D9;
+def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", [], IIC_FYL2XP1>, D9;
+def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", [], IIC_FSINCOS>, D9;
+def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", [], IIC_FRNDINT>, D9;
+def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", [], IIC_FSCALE>, D9;
+def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", [], IIC_FCOMPP>, DE;
def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave\t$dst", []>, TB;
+ "fxsave\t$dst", [], IIC_FXSAVE>, TB;
def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ "fxsaveq\t$dst", [], IIC_FXSAVE>, TB, REX_W,
+ Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", []>, TB;
+ "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
+ "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W,
+ Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index b3870906ab0a..81b4f812af66 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -255,8 +255,9 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
-class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
- : I<o, F, outs, ins, asm, []> {}
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, [], itin> {}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
@@ -365,6 +366,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// S2SI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
@@ -377,8 +379,11 @@ class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
-class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>;
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
@@ -392,6 +397,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
Requires<[HasAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
+ Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
@@ -503,29 +512,29 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSE2, HasAES]>;
+ Requires<[HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- Requires<[HasSSE2, HasAES]>;
+ Requires<[HasAES]>;
-// CLMUL Instruction Templates
-class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+// PCLMUL Instruction Templates
+class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- OpSize, Requires<[HasSSE2, HasCLMUL]>;
+ OpSize, Requires<[HasPCLMUL]>;
-class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+ OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin>, T8,
- OpSize, VEX_4V, Requires<[HasFMA3]>;
+ OpSize, VEX_4V, Requires<[HasFMA]>;
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 35801e43229b..d13167bb05db 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -71,9 +71,14 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+
+def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisOpSmallerThanOp<1, 0> ]>>;
+
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
-
+
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
@@ -102,13 +107,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
-def X86vpcom : SDNode<"X86ISD::VPCOM",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>;
-def X86vpcomu : SDNode<"X86ISD::VPCOMU",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>;
-
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>]>>;
@@ -127,7 +125,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+
+def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
@@ -162,9 +163,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
+def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
+def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
+def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
+def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+
+def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
+ SDTCisVT<4, i8>]>;
+def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
+ SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
+ SDTCisVT<6, i8>]>;
+
+def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
+def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -304,7 +322,7 @@ def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
}]>;
def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
+ (st node:$val, node:$ptr), [{
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
return ST->isNonTemporal() && !ST->isTruncatingStore() &&
ST->getAddressingMode() == ISD::UNINDEXED &&
@@ -313,7 +331,7 @@ def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
}]>;
def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
+ (st node:$val, node:$ptr), [{
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
return ST->isNonTemporal() &&
ST->getAlignment() < 16;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b12c1db0fe1d..cca04e5433f5 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -54,38 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load",
enum {
// Select which memory operand is being unfolded.
- // (stored in bits 0 - 7)
+ // (stored in bits 0 - 3)
TB_INDEX_0 = 0,
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
- TB_INDEX_MASK = 0xff,
-
- // Minimum alignment required for load/store.
- // Used for RegOp->MemOp conversion.
- // (stored in bits 8 - 15)
- TB_ALIGN_SHIFT = 8,
- TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
- TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
- TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+ TB_INDEX_3 = 3,
+ TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
// This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 16,
+ TB_NO_REVERSE = 1 << 4,
// Do not insert the forward map (RegOp -> MemOp) into the table.
// This is needed for Native Client, which prohibits branch
// instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 17,
+ TB_NO_FORWARD = 1 << 5,
- TB_FOLDED_LOAD = 1 << 18,
- TB_FOLDED_STORE = 1 << 19
+ TB_FOLDED_LOAD = 1 << 6,
+ TB_FOLDED_STORE = 1 << 7,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
};
struct X86OpTblEntry {
uint16_t RegOp;
uint16_t MemOp;
- uint32_t Flags;
+ uint16_t Flags;
};
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
@@ -408,20 +410,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
{ X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -492,14 +484,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
- { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
{ X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
{ X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
@@ -535,6 +533,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
{ X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
{ X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
+ { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
+
// AVX 256-bit foldable instructions
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
@@ -543,6 +543,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
+
// AVX2 foldable instructions
{ X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
{ X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
@@ -558,6 +559,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
{ X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
{ X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -671,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
@@ -808,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
@@ -1122,6 +1121,158 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// Index 2, folded load
Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
}
+
+ static const X86OpTblEntry OpTbl3[] = {
+ // FMA foldable instructions
+ { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 },
+ { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 },
+ { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
+ { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
+ { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
+ { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 },
+ { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 },
+
+ { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr213r_Int, X86::VFMADDPSr213m_Int, TB_ALIGN_16 },
+ { X86::VFMADDPDr213r_Int, X86::VFMADDPDr213m_Int, TB_ALIGN_16 },
+ { X86::VFMADDPSr213rY_Int, X86::VFMADDPSr213mY_Int, TB_ALIGN_32 },
+ { X86::VFMADDPDr213rY_Int, X86::VFMADDPDr213mY_Int, TB_ALIGN_32 },
+
+ { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
+ { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
+ { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
+ { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
+ { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 },
+ { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 },
+
+ { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr213r_Int, X86::VFNMADDPSr213m_Int, TB_ALIGN_16 },
+ { X86::VFNMADDPDr213r_Int, X86::VFNMADDPDr213m_Int, TB_ALIGN_16 },
+ { X86::VFNMADDPSr213rY_Int, X86::VFNMADDPSr213mY_Int, TB_ALIGN_32 },
+ { X86::VFNMADDPDr213rY_Int, X86::VFNMADDPDr213mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
+ { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
+ { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
+ { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
+ { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 },
+ { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 },
+
+ { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr213r_Int, X86::VFMSUBPSr213m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBPDr213r_Int, X86::VFMSUBPDr213m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBPSr213rY_Int, X86::VFMSUBPSr213mY_Int, TB_ALIGN_32 },
+ { X86::VFMSUBPDr213rY_Int, X86::VFMSUBPDr213mY_Int, TB_ALIGN_32 },
+
+ { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
+ { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
+ { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
+ { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
+ { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 },
+ { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 },
+
+ { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr213r_Int, X86::VFNMSUBPSr213m_Int, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr213r_Int, X86::VFNMSUBPDr213m_Int, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr213rY_Int, X86::VFNMSUBPSr213mY_Int, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr213rY_Int, X86::VFNMSUBPDr213mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr213r_Int, X86::VFMADDSUBPSr213m_Int, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr213r_Int, X86::VFMADDSUBPDr213m_Int, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr213rY_Int, X86::VFMADDSUBPSr213mY_Int, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr213rY_Int, X86::VFMADDSUBPDr213mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr213r_Int, X86::VFMSUBADDPSr213m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr213r_Int, X86::VFMSUBADDPDr213m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr213rY_Int, X86::VFMSUBADDPSr213mY_Int, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr213rY_Int, X86::VFMSUBADDPDr213mY_Int, TB_ALIGN_32 },
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
+ unsigned RegOp = OpTbl3[i].RegOp;
+ unsigned MemOp = OpTbl3[i].MemOp;
+ unsigned Flags = OpTbl3[i].Flags;
+ AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 3, folded load
+ Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
+ }
+
}
void
@@ -1312,6 +1463,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ // Don't waste compile time scanning use-def chains of physregs.
+ if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+ return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
E = MRI.def_end(); I != E; ++I) {
@@ -1369,16 +1523,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
return false;
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- bool isPICBase = false;
- for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
- E = MRI.def_end(); I != E; ++I) {
- MachineInstr *DefMI = I.getOperand().getParent();
- if (DefMI->getOpcode() != X86::MOVPC32r)
- return false;
- assert(!isPICBase && "More than one PIC base?");
- isPICBase = true;
- }
- return isPICBase;
+ return regIsPICBase(BaseReg, MRI);
}
return false;
}
@@ -1782,12 +1927,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ const TargetRegisterClass *RC = MIOpc == X86::INC64r ?
+ (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
+ (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src,
- MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
- X86::GR32_NOSPRegisterClass))
+ !MF.getRegInfo().constrainRegClass(Src, RC))
return 0;
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1812,11 +1958,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ const TargetRegisterClass *RC = MIOpc == X86::DEC64r ?
+ (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
+ (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src,
- MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
- X86::GR32_NOSPRegisterClass))
+ !MF.getRegInfo().constrainRegClass(Src, RC))
return 0;
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1844,10 +1991,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
const TargetRegisterClass *RC;
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
Opc = X86::LEA64r;
- RC = X86::GR64_NOSPRegisterClass;
+ RC = &X86::GR64_NOSPRegClass;
} else {
Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- RC = X86::GR32_NOSPRegisterClass;
+ RC = &X86::GR32_NOSPRegClass;
}
@@ -1863,6 +2010,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, Src2, isKill2);
+
+ // Preserve undefness of the operands.
+ bool isUndef = MI->getOperand(1).isUndef();
+ bool isUndef2 = MI->getOperand(2).isUndef();
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
+
if (LV && isKill2)
LV->replaceKillInstruction(Src2, MI, NewMI);
break;
@@ -2079,7 +2233,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
}
-static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
case X86::JE_4: return X86::COND_E;
@@ -2101,6 +2255,84 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
}
}
+/// getCondFromSETOpc - return condition code of a SET opcode.
+static X86::CondCode getCondFromSETOpc(unsigned Opc) {
+ switch (Opc) {
+ default: return X86::COND_INVALID;
+ case X86::SETAr: case X86::SETAm: return X86::COND_A;
+ case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
+ case X86::SETBr: case X86::SETBm: return X86::COND_B;
+ case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
+ case X86::SETEr: case X86::SETEm: return X86::COND_E;
+ case X86::SETGr: case X86::SETGm: return X86::COND_G;
+ case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
+ case X86::SETLr: case X86::SETLm: return X86::COND_L;
+ case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
+ case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
+ case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
+ case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
+ case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
+ case X86::SETOr: case X86::SETOm: return X86::COND_O;
+ case X86::SETPr: case X86::SETPm: return X86::COND_P;
+ case X86::SETSr: case X86::SETSm: return X86::COND_S;
+ }
+}
+
+/// getCondFromCmovOpc - return condition code of a CMov opcode.
+static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
+ switch (Opc) {
+ default: return X86::COND_INVALID;
+ case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
+ case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
+ return X86::COND_A;
+ case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
+ return X86::COND_AE;
+ case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
+ case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
+ return X86::COND_B;
+ case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
+ return X86::COND_BE;
+ case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
+ case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
+ return X86::COND_E;
+ case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
+ case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
+ return X86::COND_G;
+ case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
+ case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
+ return X86::COND_GE;
+ case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
+ case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
+ return X86::COND_L;
+ case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
+ case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
+ return X86::COND_LE;
+ case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
+ return X86::COND_NE;
+ case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
+ case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
+ return X86::COND_NO;
+ case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
+ return X86::COND_NP;
+ case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
+ case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
+ return X86::COND_NS;
+ case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
+ case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
+ return X86::COND_O;
+ case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
+ case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
+ return X86::COND_P;
+ case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
+ case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
+ return X86::COND_S;
+ }
+}
+
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
@@ -2147,6 +2379,101 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
}
}
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+static X86::CondCode getSwappedCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: return X86::COND_INVALID;
+ case X86::COND_E: return X86::COND_E;
+ case X86::COND_NE: return X86::COND_NE;
+ case X86::COND_L: return X86::COND_G;
+ case X86::COND_LE: return X86::COND_GE;
+ case X86::COND_G: return X86::COND_L;
+ case X86::COND_GE: return X86::COND_LE;
+ case X86::COND_B: return X86::COND_A;
+ case X86::COND_BE: return X86::COND_AE;
+ case X86::COND_A: return X86::COND_B;
+ case X86::COND_AE: return X86::COND_BE;
+ }
+}
+
+/// getSETFromCond - Return a set opcode for the given condition and
+/// whether it has memory operand.
+static unsigned getSETFromCond(X86::CondCode CC,
+ bool HasMemoryOperand) {
+ static const unsigned Opc[16][2] = {
+ { X86::SETAr, X86::SETAm },
+ { X86::SETAEr, X86::SETAEm },
+ { X86::SETBr, X86::SETBm },
+ { X86::SETBEr, X86::SETBEm },
+ { X86::SETEr, X86::SETEm },
+ { X86::SETGr, X86::SETGm },
+ { X86::SETGEr, X86::SETGEm },
+ { X86::SETLr, X86::SETLm },
+ { X86::SETLEr, X86::SETLEm },
+ { X86::SETNEr, X86::SETNEm },
+ { X86::SETNOr, X86::SETNOm },
+ { X86::SETNPr, X86::SETNPm },
+ { X86::SETNSr, X86::SETNSm },
+ { X86::SETOr, X86::SETOm },
+ { X86::SETPr, X86::SETPm },
+ { X86::SETSr, X86::SETSm }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ return Opc[CC][HasMemoryOperand ? 1 : 0];
+}
+
+/// getCMovFromCond - Return a cmov opcode for the given condition,
+/// register size in bytes, and operand type.
+static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand) {
+ static const unsigned Opc[32][3] = {
+ { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
+ { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
+ { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
+ { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
+ { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
+ { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
+ { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
+ { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
+ { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
+ { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
+ { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
+ { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
+ { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
+ { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
+ { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
+ { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
+ { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
+ { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
+ { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
+ { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
+ { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
+ { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
+ { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
+ { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
+ { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
+ { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
+ { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
+ { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
+ { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
+ { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
+ { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
+ { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ unsigned Idx = HasMemoryOperand ? 16+CC : CC;
+ switch(RegBytes) {
+ default: llvm_unreachable("Illegal register size!");
+ case 2: return Opc[Idx][0];
+ case 4: return Opc[Idx][1];
+ case 8: return Opc[Idx][2];
+ }
+}
+
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator()) return false;
@@ -2213,7 +2540,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// Handle conditional branches.
- X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+ X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
@@ -2311,7 +2638,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I->isDebugValue())
continue;
if (I->getOpcode() != X86::JMP_4 &&
- GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
// Remove the branch.
I->eraseFromParent();
@@ -2371,6 +2698,56 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
+bool X86InstrInfo::
+canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ // Not all subtargets have cmov instructions.
+ if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ return false;
+ if (Cond.size() != 1)
+ return false;
+ // We cannot do the composite conditions, at least not in SSA form.
+ if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
+ if (X86::GR16RegClass.hasSubClassEq(RC) ||
+ X86::GR32RegClass.hasSubClassEq(RC) ||
+ X86::GR64RegClass.hasSubClassEq(RC)) {
+ // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
+ // Bridge. Probably Ivy Bridge as well.
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do vectors.
+ return false;
+}
+
+void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert(Cond.size() == 1 && "Invalid Cond array");
+ unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
+ MRI.getRegClass(DstReg)->getSize(),
+ false/*HasMemoryOperand*/);
+ BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+}
+
/// isHReg - Test if the given register is a physical h register.
static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
@@ -2637,6 +3014,464 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
NewMIs.push_back(MIB);
}
+bool X86InstrInfo::
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ // A SUB can be used to perform comparison.
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(2).getImm();
+ return true;
+ case X86::CMP64rr:
+ case X86::CMP32rr:
+ case X86::CMP16rr:
+ case X86::CMP8rr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ SrcReg = MI->getOperand(0).getReg();
+ if (MI->getOperand(1).getReg() != SrcReg) return false;
+ // Compare against zero.
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ }
+ return false;
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// This function can be extended later on.
+/// SrcReg, SrcRegs: register operands for FlagI.
+/// ImmValue: immediate for FlagI if it takes an immediate.
+inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if (((FlagI->getOpcode() == X86::CMP64rr &&
+ OI->getOpcode() == X86::SUB64rr) ||
+ (FlagI->getOpcode() == X86::CMP32rr &&
+ OI->getOpcode() == X86::SUB32rr)||
+ (FlagI->getOpcode() == X86::CMP16rr &&
+ OI->getOpcode() == X86::SUB16rr)||
+ (FlagI->getOpcode() == X86::CMP8rr &&
+ OI->getOpcode() == X86::SUB8rr)) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
+
+ if (((FlagI->getOpcode() == X86::CMP64ri32 &&
+ OI->getOpcode() == X86::SUB64ri32) ||
+ (FlagI->getOpcode() == X86::CMP64ri8 &&
+ OI->getOpcode() == X86::SUB64ri8) ||
+ (FlagI->getOpcode() == X86::CMP32ri &&
+ OI->getOpcode() == X86::SUB32ri) ||
+ (FlagI->getOpcode() == X86::CMP32ri8 &&
+ OI->getOpcode() == X86::SUB32ri8) ||
+ (FlagI->getOpcode() == X86::CMP16ri &&
+ OI->getOpcode() == X86::SUB16ri) ||
+ (FlagI->getOpcode() == X86::CMP16ri8 &&
+ OI->getOpcode() == X86::SUB16ri8) ||
+ (FlagI->getOpcode() == X86::CMP8ri &&
+ OI->getOpcode() == X86::SUB8ri)) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
+
+/// isDefConvertible - check whether the definition can be converted
+/// to remove a comparison against zero.
+inline static bool isDefConvertible(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
+ case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
+ case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
+ case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
+ case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
+ case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
+ case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
+ case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
+ case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
+ case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
+ case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
+ case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
+ case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
+ case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
+ case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
+ case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
+ case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
+ case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
+ case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
+ case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
+ case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
+ case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
+ case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
+ case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
+ case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
+ return true;
+ }
+}
+
+/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// operates on the same source operands and sets flags in the same way as
+/// Compare; remove Compare if possible.
+bool X86InstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Check whether we can replace SUB with CMP.
+ unsigned NewOpcode = 0;
+ switch (CmpInstr->getOpcode()) {
+ default: break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+ return false;
+ // There is no use of the destination register, we can replace SUB with CMP.
+ switch (CmpInstr->getOpcode()) {
+ default: llvm_unreachable(0);
+ case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
+ case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
+ case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
+ case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
+ case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
+ case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
+ case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
+ case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
+ case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
+ case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
+ case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
+ case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
+ case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
+ case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
+ case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
+ }
+ CmpInstr->setDesc(get(NewOpcode));
+ CmpInstr->RemoveOperand(0);
+ // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+ if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+ NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+ return false;
+ }
+ }
+
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+
+ // CmpInstr is the first instruction of the BB.
+ MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+
+ // If we are comparing against zero, check whether we can use MI to update
+ // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
+ bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
+ if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
+ !isDefConvertible(MI)))
+ return false;
+
+ // We are searching for an earlier instruction that can make CmpInstr
+ // redundant and that instruction will be saved in Sub.
+ MachineInstr *Sub = NULL;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // We iterate backward, starting from the instruction before CmpInstr and
+ // stop when reaching the definition of a source register or done with the BB.
+ // RI points to the instruction before CmpInstr.
+ // If the definition is in this basic block, RE points to the definition;
+ // otherwise, RE is the rend of the basic block.
+ MachineBasicBlock::reverse_iterator
+ RI = MachineBasicBlock::reverse_iterator(I),
+ RE = CmpInstr->getParent() == MI->getParent() ?
+ MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
+ CmpInstr->getParent()->rend();
+ MachineInstr *Movr0Inst = 0;
+ for (; RI != RE; ++RI) {
+ MachineInstr *Instr = &*RI;
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (!IsCmpZero &&
+ isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+ Sub = Instr;
+ break;
+ }
+
+ if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
+ Instr->readsRegister(X86::EFLAGS, TRI)) {
+ // This instruction modifies or uses EFLAGS.
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // They are safe to move up, if the definition to EFLAGS is dead and
+ // earlier instructions do not read or write EFLAGS.
+ if (!Movr0Inst && (Instr->getOpcode() == X86::MOV8r0 ||
+ Instr->getOpcode() == X86::MOV16r0 ||
+ Instr->getOpcode() == X86::MOV32r0 ||
+ Instr->getOpcode() == X86::MOV64r0) &&
+ Instr->registerDefIsDead(X86::EFLAGS, TRI)) {
+ Movr0Inst = Instr;
+ continue;
+ }
+
+ // We can't remove CmpInstr.
+ return false;
+ }
+ }
+
+ // Return false if no candidates exist.
+ if (!IsCmpZero && !Sub)
+ return false;
+
+ bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg);
+
+ // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
+ // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
+ // If we are done with the basic block, we need to check whether EFLAGS is
+ // live-out.
+ bool IsSafe = false;
+ SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+ MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
+ for (++I; I != E; ++I) {
+ const MachineInstr &Instr = *I;
+ bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
+ bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
+ // We should check the usage if this instruction uses and updates EFLAGS.
+ if (!UseEFLAGS && ModifyEFLAGS) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again.
+ IsSafe = true;
+ break;
+ }
+ if (!UseEFLAGS && !ModifyEFLAGS)
+ continue;
+
+ // EFLAGS is used by this instruction.
+ X86::CondCode OldCC;
+ bool OpcIsSET = false;
+ if (IsCmpZero || IsSwapped) {
+ // We decode the condition code from opcode.
+ if (Instr.isBranch())
+ OldCC = getCondFromBranchOpc(Instr.getOpcode());
+ else {
+ OldCC = getCondFromSETOpc(Instr.getOpcode());
+ if (OldCC != X86::COND_INVALID)
+ OpcIsSET = true;
+ else
+ OldCC = getCondFromCMovOpc(Instr.getOpcode());
+ }
+ if (OldCC == X86::COND_INVALID) return false;
+ }
+ if (IsCmpZero) {
+ switch (OldCC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ // CF and OF are used, we can't perform this optimization.
+ return false;
+ }
+ } else if (IsSwapped) {
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
+ // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ // We swap the condition code and synthesize the new opcode.
+ X86::CondCode NewCC = getSwappedCondition(OldCC);
+ if (NewCC == X86::COND_INVALID) return false;
+
+ // Synthesize the new opcode.
+ bool HasMemoryOperand = Instr.hasOneMemOperand();
+ unsigned NewOpc;
+ if (Instr.isBranch())
+ NewOpc = GetCondBranchFromCond(NewCC);
+ else if(OpcIsSET)
+ NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
+ else {
+ unsigned DstReg = Instr.getOperand(0).getReg();
+ NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
+ HasMemoryOperand);
+ }
+
+ // Push the MachineInstr to OpsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // instructions will be modified.
+ OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+ }
+ if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
+ IsSafe = true;
+ break;
+ }
+ }
+
+ // If EFLAGS is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if ((IsCmpZero || IsSwapped) && !IsSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+
+ // The instruction to be updated is either Sub or MI.
+ Sub = IsCmpZero ? MI : Sub;
+ // Move Movr0Inst to the place right before Sub.
+ if (Movr0Inst) {
+ Sub->getParent()->remove(Movr0Inst);
+ Sub->getParent()->insert(MachineBasicBlock::iterator(Sub), Movr0Inst);
+ }
+
+ // Make sure Sub instruction defines EFLAGS.
+ assert(Sub->getNumOperands() >= 2 &&
+ Sub->getOperand(Sub->getNumOperands()-1).isReg() &&
+ Sub->getOperand(Sub->getNumOperands()-1).getReg() == X86::EFLAGS &&
+ "EFLAGS should be the last operand of SUB, ADD, OR, XOR, AND");
+ Sub->getOperand(Sub->getNumOperands()-1).setIsDef(true);
+ CmpInstr->eraseFromParent();
+
+ // Modify the condition code of instructions in OpsToUpdate.
+ for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
+ OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+ return true;
+}
+
+/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// operand at the use. We fold the load instructions if load defines a virtual
+/// register, the virtual register is used once in the same BB, and the
+/// instructions in-between do not load or store, and have no side effects.
+MachineInstr* X86InstrInfo::
+optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ if (FoldAsLoadDefReg == 0)
+ return 0;
+ // To be conservative, if there exists another load, clear the load candidate.
+ if (MI->mayLoad()) {
+ FoldAsLoadDefReg = 0;
+ return 0;
+ }
+
+ // Check whether we can move DefMI here.
+ DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
+ assert(DefMI);
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(this, 0, SawStore))
+ return 0;
+
+ // We try to commute MI if possible.
+ unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
+ for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
+ return 0;
+
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand) return 0;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
+ }
+
+ if (Idx == 1) {
+ // MI was changed but it didn't help, commute it back!
+ commuteInstruction(MI, false);
+ return 0;
+ }
+
+ // Check whether we can commute MI and enable folding.
+ if (MI->isCommutable()) {
+ MachineInstr *NewMI = commuteInstruction(MI, false);
+ // Unable to commute.
+ if (!NewMI) return 0;
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
/// instruction with two undef reads of the register being defined. This is
/// used for mapping:
@@ -2795,6 +3630,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (i == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (i == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
}
// If table selected...
@@ -2809,7 +3646,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NULL;
bool NarrowToMOV32rm = false;
if (Size) {
- unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
+ unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
if (Size < RCSize) {
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -3202,7 +4039,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
UnfoldStore &= FoldedStore;
const MCInstrDesc &MCID = get(Opc);
- const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
@@ -3297,7 +4134,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the store instruction.
if (UnfoldStore) {
- const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
+ const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
std::pair<MachineInstr::mmo_iterator,
MachineInstr::mmo_iterator> MMOs =
MF.extractStoreMemRefs(MI->memoperands_begin(),
@@ -3323,7 +4160,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
bool FoldedStore = I->second.second & TB_FOLDED_STORE;
const MCInstrDesc &MCID = get(Opc);
- const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
unsigned NumDefs = MCID.NumDefs;
std::vector<SDValue> AddrOps;
std::vector<SDValue> BeforeOps;
@@ -3344,7 +4182,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// Emit the load instruction.
SDNode *Load = 0;
- MachineFunction &MF = DAG.getMachineFunction();
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
std::pair<MachineInstr::mmo_iterator,
@@ -3371,7 +4208,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
std::vector<EVT> VTs;
const TargetRegisterClass *DstRC = 0;
if (MCID.getNumDefs() > 0) {
- DstRC = getRegClass(MCID, 0, &RI);
+ DstRC = getRegClass(MCID, 0, &RI, MF);
VTs.push_back(*DstRC->vt_begin());
}
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
@@ -3625,7 +4462,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// Create the register. The code to initialize it is inserted
// later, by the CGBR pass (below).
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
@@ -3835,7 +4672,7 @@ namespace {
unsigned PC;
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
- PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
else
PC = GlobalBaseReg;
@@ -3869,3 +4706,117 @@ namespace {
char CGBR::ID = 0;
FunctionPass*
llvm::createGlobalBaseRegPass() { return new CGBR(); }
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ is64Bit ? X86::RAX : X86::EAX)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
+ ? &X86::GR64RegClass
+ : &X86::GR32RegClass);
+
+ // Insert a copy from RAX/EAX to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(is64Bit ? X86::RAX : X86::EAX);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index b23d7560ec16..b6f69af037c2 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -128,8 +128,8 @@ class X86InstrInfo : public X86GenInstrInfo {
X86TargetMachine &TM;
const X86RegisterInfo RI;
- /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
- /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+ /// RegOp2MemOpTable3Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+ /// RegOp2MemOpTable2, RegOp2MemOpTable3 - Load / store folding opcode maps.
///
typedef DenseMap<unsigned,
std::pair<unsigned, unsigned> > RegOp2MemOpTableType;
@@ -137,6 +137,7 @@ class X86InstrInfo : public X86GenInstrInfo {
RegOp2MemOpTableType RegOp2MemOpTable0;
RegOp2MemOpTableType RegOp2MemOpTable1;
RegOp2MemOpTableType RegOp2MemOpTable2;
+ RegOp2MemOpTableType RegOp2MemOpTable3;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
@@ -144,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo {
std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
MemOp2RegOpTableType MemOp2RegOpTable;
- void AddTableEntry(RegOp2MemOpTableType &R2MTable,
- MemOp2RegOpTableType &M2RTable,
- unsigned RegOp, unsigned MemOp, unsigned Flags);
+ static void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -218,6 +219,14 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ virtual bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const;
+ virtual void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -363,6 +372,33 @@ public:
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const;
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const;
+
+ /// optimizeCompareInstr - Check if there exists an earlier instruction that
+ /// operates on the same source operands and sets flags in the same way as
+ /// Compare; remove Compare if possible.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ /// optimizeLoadInstr - Try to remove the load by folding it to a register
+ /// operand at the use. We fold the load instructions if and only if the
+ /// def and use are in the same BB. We only look at one load and see
+ /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+ /// defined by the load we are trying to fold. DefMI returns the machine
+ /// instruction that defines FoldAsLoadDefReg, and the function returns
+ /// the machine instruction generated due to folding.
+ virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 6a2531269d8f..d293156c1f71 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -63,6 +63,10 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2,
[SDTCisInt<0>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
+
+def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -95,6 +99,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
@@ -131,6 +137,11 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
+
+def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -199,6 +210,9 @@ def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
@@ -278,6 +292,20 @@ def X86Mem256AsmOperand : AsmOperandClass {
let Name = "Mem256"; let PredicateMethod = "isMem256";
}
+// Gather mem operands
+def X86MemVX32Operand : AsmOperandClass {
+ let Name = "MemVX32"; let PredicateMethod = "isMemVX32";
+}
+def X86MemVY32Operand : AsmOperandClass {
+ let Name = "MemVY32"; let PredicateMethod = "isMemVY32";
+}
+def X86MemVX64Operand : AsmOperandClass {
+ let Name = "MemVX64"; let PredicateMethod = "isMemVX64";
+}
+def X86MemVY64Operand : AsmOperandClass {
+ let Name = "MemVY64"; let PredicateMethod = "isMemVY64";
+}
+
def X86AbsMemAsmOperand : AsmOperandClass {
let Name = "AbsMem";
let SuperClasses = [X86MemAsmOperand];
@@ -316,6 +344,20 @@ def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
+
+// Gather mem operands
+def vx32mem : X86MemOperand<"printi32mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
+ let ParserMatchClass = X86MemVX32Operand; }
+def vy32mem : X86MemOperand<"printi32mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
+ let ParserMatchClass = X86MemVY32Operand; }
+def vx64mem : X86MemOperand<"printi64mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
+ let ParserMatchClass = X86MemVX64Operand; }
+def vy64mem : X86MemOperand<"printi64mem">{
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
+ let ParserMatchClass = X86MemVY64Operand; }
}
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
@@ -328,7 +370,7 @@ def i8mem_NOREX : Operand<i64> {
}
// GPRs available for tailcall.
-// It represents GR64_TC or GR64_TCW64.
+// It represents GR32_TC, GR64_TC or GR64_TCW64.
def ptr_rc_tailcall : PointerLikeRegClass<2>;
// Special i32mem for addresses of load folding tail calls. These are not
@@ -336,7 +378,8 @@ def ptr_rc_tailcall : PointerLikeRegClass<2>;
// after callee-saved register are popped.
def i32mem_TC : Operand<i32> {
let PrintMethod = "printi32mem";
- let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall,
+ i32imm, i8imm);
let ParserMatchClass = X86Mem32AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
@@ -487,6 +530,9 @@ def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
@@ -494,6 +540,9 @@ def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
@@ -514,8 +563,8 @@ def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
-def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
-def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
+def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
+def HasFMA : Predicate<"Subtarget->hasFMA()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def HasXOP : Predicate<"Subtarget->hasXOP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
@@ -680,25 +729,27 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
// Nop
let neverHasSideEffects = 1 in {
- def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
- "nop{w}\t$zero", []>, TB, OpSize;
+ "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize;
def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
- "nop{l}\t$zero", []>, TB;
+ "nop{l}\t$zero", [], IIC_NOP>, TB;
}
// Constructing a stack frame.
def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", []>;
+ "enter\t$len, $lvl", [], IIC_ENTER>;
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
+ (outs), (ins), "leave", [], IIC_LEAVE>,
+ Requires<[In32BitMode]>;
let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+ (outs), (ins), "leave", [], IIC_LEAVE>,
+ Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
@@ -706,41 +757,49 @@ def LEAVE64 : I<0xC9, RawFrm,
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
-def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
- OpSize;
-def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
-def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
- OpSize;
-def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
- OpSize;
-def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
-def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
-
-def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
+ IIC_POP_REG16>, OpSize;
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
+ IIC_POP_REG>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
+ IIC_POP_REG>, OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", [],
+ IIC_POP_MEM>, OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
+ IIC_POP_REG>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
+ IIC_POP_MEM>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
Requires<[In32BitMode]>;
}
let mayStore = 1 in {
-def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
- OpSize;
-def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
-def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
- OpSize;
-def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
+ IIC_PUSH_REG>, OpSize;
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
+ IIC_PUSH_REG>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
+ IIC_PUSH_REG>, OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
+ IIC_PUSH_MEM>,
OpSize;
-def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
-def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
+ IIC_PUSH_REG>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
+ IIC_PUSH_MEM>;
def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
- "push{l}\t$imm", []>;
+ "push{l}\t$imm", [], IIC_PUSH_IMM>;
def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{w}\t$imm", []>, OpSize;
+ "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize;
def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
- "push{l}\t$imm", []>;
+ "push{l}\t$imm", [], IIC_PUSH_IMM>;
-def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
-def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
+ OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
Requires<[In32BitMode]>;
}
@@ -749,44 +808,48 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
def POP64r : I<0x58, AddRegFrm,
- (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
+ IIC_POP_REG>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
+ IIC_POP_MEM>;
}
let mayStore = 1 in {
def PUSH64r : I<0x50, AddRegFrm,
- (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+ (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
+ IIC_PUSH_REG>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
+ IIC_PUSH_MEM>;
}
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
- "push{q}\t$imm", []>;
+ "push{q}\t$imm", [], IIC_PUSH_IMM>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{q}\t$imm", []>;
+ "push{q}\t$imm", [], IIC_PUSH_IMM>;
def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
- "push{q}\t$imm", []>;
+ "push{q}\t$imm", [], IIC_PUSH_IMM>;
}
let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
-def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
Requires<[In64BitMode]>;
let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
-def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
Requires<[In64BitMode]>;
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
mayLoad=1, neverHasSideEffects=1 in {
-def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>,
Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
mayStore=1, neverHasSideEffects=1 in {
-def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>,
Requires<[In32BitMode]>;
}
@@ -794,84 +857,92 @@ let Constraints = "$src = $dst" in { // GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
- [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+ [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, TB;
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
- [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+ [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
} // Constraints = "$src = $dst"
// Bit scan instructions.
let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
+ IIC_BSF>, TB, OpSize;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
- OpSize;
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
+ IIC_BSF>, TB, OpSize;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
+ IIC_BSF>, TB;
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
+ IIC_BSF>, TB;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
+ IIC_BSF>, TB;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
+ TB, OpSize;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
+ IIC_BSR>, TB,
OpSize;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
+ IIC_BSR>, TB;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
+ IIC_BSR>, TB;
} // Defs = [EFLAGS]
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
-def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>;
-def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize;
-def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>;
-def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", [], IIC_MOVS>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", [], IIC_MOVS>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", [], IIC_MOVS>;
}
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
-def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>;
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", [], IIC_STOS>;
let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
-def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize;
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", [], IIC_STOS>, OpSize;
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
-def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>;
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", [], IIC_STOS>;
let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
-def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", [], IIC_STOS>;
-def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>;
-def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize;
-def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>;
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", [], IIC_SCAS>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", [], IIC_SCAS>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", [], IIC_SCAS>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", [], IIC_SCAS>;
-def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>;
-def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize;
-def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>;
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
//===----------------------------------------------------------------------===//
@@ -880,64 +951,64 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>;
+ "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize;
def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(set GR8:$dst, imm:$src)]>;
+ [(set GR8:$dst, imm:$src)], IIC_MOV>;
def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, imm:$src)]>, OpSize;
+ [(set GR16:$dst, imm:$src)], IIC_MOV>, OpSize;
def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, imm:$src)]>;
+ [(set GR32:$dst, imm:$src)], IIC_MOV>;
def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
"movabs{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, imm:$src)]>;
+ [(set GR64:$dst, imm:$src)], IIC_MOV>;
def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, i64immSExt32:$src)]>;
+ [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
}
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(store (i8 imm:$src), addr:$dst)]>;
+ [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+ [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize;
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(store (i32 imm:$src), addr:$dst)]>;
+ [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>;
def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
- [(store i64immSExt32:$src, addr:$dst)]>;
+ [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|AL, $src}", []>,
+ "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize,
+ "mov{w}\t{$src, %ax|AL, $src}", [], IIC_MOV_MEM>, OpSize,
Requires<[In32BitMode]>;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|EAX, $src}", []>,
+ "mov{l}\t{$src, %eax|EAX, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, AL}", []>,
+ "mov{b}\t{%al, $dst|$dst, AL}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize,
+ "mov{w}\t{%ax, $dst|$dst, AL}", [], IIC_MOV_MEM>, OpSize,
Requires<[In32BitMode]>;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, EAX}", []>,
+ "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
// FIXME: These definitions are utterly broken
@@ -958,42 +1029,42 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>;
+ "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(set GR8:$dst, (loadi8 addr:$src))]>;
+ [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+ [(set GR16:$dst, (loadi16 addr:$src))], IIC_MOV_MEM>, OpSize;
def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (loadi32 addr:$src))]>;
+ [(set GR32:$dst, (loadi32 addr:$src))], IIC_MOV_MEM>;
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (load addr:$src))]>;
+ [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
}
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(store GR8:$src, addr:$dst)]>;
+ [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(store GR16:$src, addr:$dst)]>, OpSize;
+ [(store GR16:$src, addr:$dst)], IIC_MOV_MEM>, OpSize;
def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(store GR32:$src, addr:$dst)]>;
+ [(store GR32:$src, addr:$dst)], IIC_MOV_MEM>;
def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
- [(store GR64:$src, addr:$dst)]>;
+ [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1002,24 +1073,28 @@ let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>;
let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
+ IIC_MOV_MEM>;
let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
+ IIC_MOV_MEM>;
}
// Condition code ops, incl. set if equal/not equal/...
-let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
-def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
+let Defs = [EFLAGS], Uses = [AH] in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
+ [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
-def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
+ IIC_AHF>; // AH = flags
//===----------------------------------------------------------------------===//
@@ -1028,13 +1103,14 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
let Defs = [EFLAGS] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
+ [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
+ OpSize, TB;
def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))], IIC_BT_RR>, TB;
def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
@@ -1045,31 +1121,33 @@ def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
// (implicit EFLAGS)]
- []
+ [], IIC_BT_MR
>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi32 addr:$src1), GR32:$src2),
// (implicit EFLAGS)]
- []
+ [], IIC_BT_MR
>, TB, Requires<[FastBTMem]>;
def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi64 addr:$src1), GR64:$src2),
// (implicit EFLAGS)]
- []
+ [], IIC_BT_MR
>, TB;
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
- OpSize, TB;
+ [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
+ IIC_BT_RI>, OpSize, TB;
def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))],
+ IIC_BT_RI>, TB;
def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
+ IIC_BT_RI>, TB;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
@@ -1077,91 +1155,103 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
- ]>, OpSize, TB;
+ ], IIC_BT_MI>, OpSize, TB;
def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
- ]>, TB;
+ ], IIC_BT_MI>, TB;
def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
- i64immSExt8:$src2))]>, TB;
+ i64immSExt8:$src2))], IIC_BT_MI>, TB;
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
+ OpSize, TB;
def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
+ OpSize, TB;
def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
+ OpSize, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
+ OpSize, TB;
def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
+ OpSize, TB;
def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
+ OpSize, TB;
def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
+ OpSize, TB;
def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
+ OpSize, TB;
def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
+ OpSize, TB;
def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
+ OpSize, TB;
def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
+ OpSize, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+ "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
+ OpSize, TB;
def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+ "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
} // Defs = [EFLAGS]
@@ -1175,89 +1265,106 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
let Constraints = "$val = $dst" in {
def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
"xchg{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+ [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))],
+ IIC_XCHG_MEM>;
def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
"xchg{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))],
+ IIC_XCHG_MEM>,
OpSize;
def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
"xchg{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))],
+ IIC_XCHG_MEM>;
def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
"xchg{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))],
+ IIC_XCHG_MEM>;
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
- "xchg{b}\t{$val, $src|$src, $val}", []>;
+ "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
- "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+ "xchg{w}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>, OpSize;
def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
- "xchg{l}\t{$val, $src|$src, $val}", []>;
+ "xchg{l}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
- "xchg{q}\t{$val, $src|$src, $val}", []>;
+ "xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
}
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
- "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize;
+ "xchg{w}\t{$src, %ax|AX, $src}", [], IIC_XCHG_REG>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
- "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>;
+ "xchg{l}\t{$src, %eax|EAX, $src}", [], IIC_XCHG_REG>,
+ Requires<[In32BitMode]>;
// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding.
// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP.
def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
- "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>;
+ "xchg{l}\t{$src, %eax|EAX, $src}", [], IIC_XCHG_REG>,
+ Requires<[In64BitMode]>;
def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
- "xchg{q}\t{$src, %rax|RAX, $src}", []>;
+ "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>;
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
- "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+ "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
- "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB,
+ OpSize;
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
- "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+ "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB,
+ OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
- "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG8>, TB;
def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
- "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG>, TB, OpSize;
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG>, TB;
def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_REG>, TB;
let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
- "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM8>, TB;
def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM>, TB;
def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
+ IIC_CMPXCHG_MEM>, TB;
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
- "cmpxchg8b\t$dst", []>, TB;
+ "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB;
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
- "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>;
+ "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>,
+ TB, Requires<[HasCmpxchg16b]>;
@@ -1281,69 +1388,75 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
// String manipulation instructions
-def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
-def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
-def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
-def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>;
-def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
-def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
-def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
+def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>;
+def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize;
+def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>;
// Flag instructions
-def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
-def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
-def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
-def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
-def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
-def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
-def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>;
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
-def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
// Table lookup instructions
-def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
// ASCII Adjust After Addition
// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
-def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
+ Requires<[In32BitMode]>;
// ASCII Adjust AX Before Division
// sets AL, AH and EFLAGS and uses AL and AH
def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
- "aad\t$src", []>, Requires<[In32BitMode]>;
+ "aad\t$src", [], IIC_AAD>, Requires<[In32BitMode]>;
// ASCII Adjust AX After Multiply
// sets AL, AH and EFLAGS and uses AL
def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
- "aam\t$src", []>, Requires<[In32BitMode]>;
+ "aam\t$src", [], IIC_AAM>, Requires<[In32BitMode]>;
// ASCII Adjust AL After Subtraction - sets
// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
-def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>,
+ Requires<[In32BitMode]>;
// Decimal Adjust AL after Addition
// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
-def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
+ Requires<[In32BitMode]>;
// Decimal Adjust AL after Subtraction
// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
-def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
+ Requires<[In32BitMode]>;
// Check Array Index Against Bounds
def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+ "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
Requires<[In32BitMode]>;
def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "bound\t{$src, $dst|$dst, $src}", []>,
+ "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>,
Requires<[In32BitMode]>;
// Adjust RPL Field of Segment Selector
def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
- "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+ "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
+ Requires<[In32BitMode]>;
def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
- "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+ "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
+ Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
// MOVBE Instructions
@@ -1351,22 +1464,28 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
let Predicates = [HasMOVBE] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8;
+ [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
+ OpSize, T8;
def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8;
+ [(set GR32:$dst, (bswap (loadi32 addr:$src)))], IIC_MOVBE>,
+ T8;
def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8;
+ [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
+ T8;
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
- [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8;
+ [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
+ OpSize, T8;
def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
- [(store (bswap GR32:$src), addr:$dst)]>, T8;
+ [(store (bswap GR32:$src), addr:$dst)], IIC_MOVBE>,
+ T8;
def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
- [(store (bswap GR64:$src), addr:$dst)]>, T8;
+ [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
+ T8;
}
//===----------------------------------------------------------------------===//
@@ -1374,11 +1493,14 @@ let Predicates = [HasMOVBE] in {
//
let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
- "rdrand{w}\t$dst", []>, OpSize, TB;
+ "rdrand{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize, TB;
def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
- "rdrand{l}\t$dst", []>, TB;
+ "rdrand{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86rdrand))]>, TB;
def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
- "rdrand{q}\t$dst", []>, TB;
+ "rdrand{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86rdrand))]>, TB;
}
//===----------------------------------------------------------------------===//
@@ -1774,9 +1896,9 @@ def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
// We accept "fnstsw %eax" even though it only writes %ax.
-def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
-def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
-def : InstAlias<"fnstsw" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw %eax", (FNSTSW16r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW16r)>;
+def : InstAlias<"fnstsw" , (FNSTSW16r)>;
// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
// this is compatible with what GAS does.
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 63f96b6f5d3b..c8f40bbb4905 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -20,71 +20,130 @@
// MMX Multiclasses
//===----------------------------------------------------------------------===//
+def MMX_INTALU_ITINS : OpndItins<
+ IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
+>;
+
+def MMX_INTALUQ_ITINS : OpndItins<
+ IIC_MMX_ALUQ_RR, IIC_MMX_ALUQ_RM
+>;
+
+def MMX_PHADDSUBW : OpndItins<
+ IIC_MMX_PHADDSUBW_RR, IIC_MMX_PHADDSUBW_RM
+>;
+
+def MMX_PHADDSUBD : OpndItins<
+ IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM
+>;
+
+def MMX_PMUL_ITINS : OpndItins<
+ IIC_MMX_PMUL, IIC_MMX_PMUL
+>;
+
+def MMX_PSADBW_ITINS : OpndItins<
+ IIC_MMX_PSADBW, IIC_MMX_PSADBW
+>;
+
+def MMX_MISC_FUNC_ITINS : OpndItins<
+ IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG
+>;
+
+def MMX_SHIFT_ITINS : ShiftOpndItins<
+ IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI
+>;
+
+def MMX_UNPCK_H_ITINS : OpndItins<
+ IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM
+>;
+
+def MMX_UNPCK_L_ITINS : OpndItins<
+ IIC_MMX_UNPCK_L, IIC_MMX_UNPCK_L
+>;
+
+def MMX_PCK_ITINS : OpndItins<
+ IIC_MMX_PCK_RR, IIC_MMX_PCK_RM
+>;
+
+def MMX_PSHUF_ITINS : OpndItins<
+ IIC_MMX_PSHUF, IIC_MMX_PSHUF
+>;
+
+def MMX_CVT_PD_ITINS : OpndItins<
+ IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM
+>;
+
+def MMX_CVT_PS_ITINS : OpndItins<
+ IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM
+>;
+
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- bit Commutable = 0> {
+ OpndItins itins, bit Commutable = 0> {
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> {
let isCommutable = Commutable;
}
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2))))]>;
+ (bitconvert (load_mmx addr:$src2))))],
+ itins.rm>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
- Intrinsic IntId2> {
+ Intrinsic IntId2, ShiftOpndItins itins> {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>;
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2))))]>;
+ (bitconvert (load_mmx addr:$src2))))],
+ itins.rm>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>;
}
}
/// Unary MMX instructions requiring SSSE3.
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64> {
+ Intrinsic IntId64, OpndItins itins> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
+ [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
- (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+ (IntId64 (bitconvert (memopmmx addr:$src))))],
+ itins.rm>;
}
/// Binary MMX instructions requiring SSSE3.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64> {
+ Intrinsic IntId64, OpndItins itins> {
let isCommutable = 0 in
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
(IntId64 VR64:$src1,
- (bitconvert (memopmmx addr:$src2))))]>;
+ (bitconvert (memopmmx addr:$src2))))], itins.rm>;
}
}
@@ -103,13 +162,13 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d> {
+ string asm, OpndItins itins, Domain d> {
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (Int SrcRC:$src))],
- IIC_DEFAULT, d>;
+ itins.rr, d>;
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (Int (ld_frag addr:$src)))],
- IIC_DEFAULT, d>;
+ itins.rm, d>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -139,22 +198,24 @@ def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (x86mmx (scalar_to_vector GR32:$src)))]>;
+ (x86mmx (scalar_to_vector GR32:$src)))],
+ IIC_MMX_MOV_MM_RM>;
let canFoldAsLoad = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
+ [(set VR64:$dst,
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_MMX_MOV_MM_RM>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+ "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+ "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
- []>;
+ [], IIC_MMX_MOV_MM_RM>;
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
@@ -163,197 +224,276 @@ def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
- (bitconvert VR64:$src))]>;
+ (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (bitconvert GR64:$src))]>;
+ (bitconvert GR64:$src))], IIC_MMX_MOV_MM_RM>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
- "movq\t{$src, $dst|$dst, $src}", []>;
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_MMX_MOVQ_RR>;
let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (load_mmx addr:$src))]>;
+ [(set VR64:$dst, (load_mmx addr:$src))],
+ IIC_MMX_MOVQ_RM>;
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (x86mmx VR64:$src), addr:$dst)]>;
+ [(store (x86mmx VR64:$src), addr:$dst)],
+ IIC_MMX_MOVQ_RM>;
def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (bitconvert
(i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))))]>;
+ (iPTR 0))))))],
+ IIC_MMX_MOVQ_RR>;
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector
- (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+ (i64 (bitconvert (x86mmx VR64:$src))))))],
+ IIC_MMX_MOVQ_RR>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
- (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_MMX_MOVQ_RR>;
def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
- (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>;
+ (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [],
+ IIC_MMX_MOVQ_RR>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
- [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
+ IIC_MMX_MOVQ_RM>;
let AddedComplexity = 15 in
// movd to MMX register zero-extends
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
+ IIC_MMX_MOV_MM_RM>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (X86vzmovl (x86mmx
- (scalar_to_vector (loadi32 addr:$src))))))]>;
+ (scalar_to_vector (loadi32 addr:$src))))))],
+ IIC_MMX_MOV_MM_RM>;
// Arithmetic Instructions
-defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
-defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
-defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
+ MMX_INTALU_ITINS>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
+ MMX_INTALU_ITINS>;
// -- Addition
-defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
-defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
-defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
-defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
-defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
-defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
-
-defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
-defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
-
-defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
-defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
-defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
+ MMX_INTALUQ_ITINS, 1>;
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
+ MMX_PHADDSUBW>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
+ MMX_PHADDSUBD>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
+ MMX_PHADDSUBW>;
// -- Subtraction
-defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
-defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
-defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
-defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
-
-defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
-defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
-
-defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
-defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
-
-defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
-defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
-defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
+ MMX_INTALUQ_ITINS, 1>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
+ MMX_INTALU_ITINS, 1>;
+
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
+ MMX_PHADDSUBW>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
+ MMX_PHADDSUBD>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
+ MMX_PHADDSUBW>;
// -- Multiplication
-defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
-
-defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
-defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
-defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
+ MMX_PMUL_ITINS, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
+ MMX_PMUL_ITINS, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
+ MMX_PMUL_ITINS, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
+ MMX_PMUL_ITINS, 1>;
let isCommutable = 1 in
defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
- int_x86_ssse3_pmul_hr_sw>;
+ int_x86_ssse3_pmul_hr_sw, MMX_PMUL_ITINS>;
// -- Miscellanea
-defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
+ MMX_PMUL_ITINS, 1>;
defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
- int_x86_ssse3_pmadd_ub_sw>;
-defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
-defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
-
-defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
-defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
-
-defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
-defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
-
-defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
-
-defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
-defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
-defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+ int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>;
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
+ MMX_MISC_FUNC_ITINS, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
+ MMX_MISC_FUNC_ITINS, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
+ MMX_MISC_FUNC_ITINS, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
+ MMX_MISC_FUNC_ITINS, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
+ MMX_MISC_FUNC_ITINS, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
+ MMX_MISC_FUNC_ITINS, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
+ MMX_PSADBW_ITINS, 1>;
+
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
+ MMX_MISC_FUNC_ITINS>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
+ MMX_MISC_FUNC_ITINS>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
+ MMX_MISC_FUNC_ITINS>;
let Constraints = "$src1 = $dst" in
defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
// Logical Instructions
-defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
-defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
-defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
-defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>;
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
+ MMX_INTALU_ITINS, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
+ MMX_INTALU_ITINS>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
- int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
+ MMX_SHIFT_ITINS>;
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
- int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
+ MMX_SHIFT_ITINS>;
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
- int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
+ MMX_SHIFT_ITINS>;
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
- int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
+ MMX_SHIFT_ITINS>;
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
- int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
+ MMX_SHIFT_ITINS>;
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
- int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
+ MMX_SHIFT_ITINS>;
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
- int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
+ MMX_SHIFT_ITINS>;
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
- int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
+ MMX_SHIFT_ITINS>;
// Comparison Instructions
-defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
-defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
-defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
-
-defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
-defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
-defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
+ MMX_INTALU_ITINS>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
+ MMX_INTALU_ITINS>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
+ MMX_INTALU_ITINS>;
// -- Unpack Instructions
defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
- int_x86_mmx_punpckhbw>;
+ int_x86_mmx_punpckhbw,
+ MMX_UNPCK_H_ITINS>;
defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
- int_x86_mmx_punpckhwd>;
+ int_x86_mmx_punpckhwd,
+ MMX_UNPCK_H_ITINS>;
defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
- int_x86_mmx_punpckhdq>;
+ int_x86_mmx_punpckhdq,
+ MMX_UNPCK_H_ITINS>;
defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
- int_x86_mmx_punpcklbw>;
+ int_x86_mmx_punpcklbw,
+ MMX_UNPCK_L_ITINS>;
defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
- int_x86_mmx_punpcklwd>;
+ int_x86_mmx_punpcklwd,
+ MMX_UNPCK_L_ITINS>;
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
- int_x86_mmx_punpckldq>;
+ int_x86_mmx_punpckldq,
+ MMX_UNPCK_L_ITINS>;
// -- Pack Instructions
-defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
-defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
-defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
+ MMX_PCK_ITINS>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
+ MMX_PCK_ITINS>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
+ MMX_PCK_ITINS>;
// -- Shuffle Instructions
-defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
+ MMX_PSHUF_ITINS>;
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
+ IIC_MMX_PSHUF>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
- imm:$src2))]>;
-
+ imm:$src2))],
+ IIC_MMX_PSHUF>;
@@ -361,24 +501,24 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
+ MMX_CVT_PS_ITINS, SSEPackedSingle>, TB;
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
+ MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize;
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
+ MMX_CVT_PS_ITINS, SSEPackedSingle>, TB;
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
+ MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize;
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
+ MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize;
let Constraints = "$src1 = $dst" in {
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
int_x86_sse_cvtpi2ps,
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, TB;
+ SSEPackedSingle>, TB;
}
// Extract / Insert
@@ -386,14 +526,16 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
- (iPTR imm:$src2)))]>;
+ (iPTR imm:$src2)))],
+ IIC_MMX_PEXTR>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
(ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
- GR32:$src2, (iPTR imm:$src3)))]>;
+ GR32:$src2, (iPTR imm:$src3)))],
+ IIC_MMX_PINSRW>;
def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
@@ -401,7 +543,8 @@ let Constraints = "$src1 = $dst" in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
- (iPTR imm:$src3)))]>;
+ (iPTR imm:$src3)))],
+ IIC_MMX_PINSRW>;
}
// Mask creation
@@ -411,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
(int_x86_mmx_pmovmskb VR64:$src))]>;
-// MMX to XMM for vector types
-def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
- (v2i64 (MOVQI2PQIrm addr:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq
- (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
- (v2i64 (MOVDI2PDIrm addr:$src))>;
-
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
@@ -439,11 +568,13 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
let Uses = [EDI] in
def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
- [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
+ IIC_MMX_MASKMOV>;
let Uses = [RDI] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
- [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
+ IIC_MMX_MASKMOV>;
// 64-bit bit convert.
def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 65e3c1e19fa9..e4c35b9bc556 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
// A vector extract of the first f32/f64 position is a subregister copy
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// A 128-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
@@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
// Bitcasts between 128-bit vector types. Return the original type since
// no instruction is needed for the conversion
@@ -562,59 +562,57 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VMOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4f32 (V_SET0)),
- (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
}
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
@@ -628,70 +626,68 @@ let Predicates = [HasAVX] in {
sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2f64 (V_SET0)),
- (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2i64 (V_SET0)),
- (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
- (VMOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
- (VMOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
// Shuffle with VMOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>;
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>;
// 256-bit variants
def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
- (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
- (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
// Shuffle with VMOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
- (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
- (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
@@ -699,17 +695,13 @@ let Predicates = [HasAVX] in {
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
let Predicates = [HasSSE1] in {
@@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
}
let AddedComplexity = 20 in {
- // MOVSSrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
+ // MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
- (MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
// Shuffle with MOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
}
let Predicates = [HasSSE2] in {
@@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in {
}
let AddedComplexity = 20 in {
- // MOVSDrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
+ // MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
// Extract and store.
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
// Shuffle with MOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
//===----------------------------------------------------------------------===//
@@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d, OpndItins itins> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr, d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm, d>;
+ X86MemOperand x86memop, string asm, Domain d,
+ OpndItins itins> {
+let neverHasSideEffects = 1 in {
+ def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [], itins.rr, d>;
+ let mayLoad = 1 in
+ def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [], itins.rm, d>;
+}
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1443,7 +1426,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
SSE_CVT_SS2SI_32>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1451,7 +1434,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
SSE_CVT_SD2SI>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>,
XD, VEX, VEX_W, VEX_LIG;
@@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
XS, VEX_4V, VEX_W, VEX_LIG;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
XD, VEX_4V, VEX_LIG;
-defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
- XD, VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+
let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
// and/or XMM operand(s).
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
itins.rm>;
}
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
- SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+ sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W;
@@ -1587,94 +1574,71 @@ let Constraints = "$src1 = $dst" in {
"cvtsi2sd", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
- "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
+ "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
}
/// SSE 1 Only
// Aliases for intrinsics
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si",
+ ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si", SSE_CVT_SS2SI_64>,
- XS, VEX, VEX_W;
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
- XD, VEX;
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si", SSE_CVT_SD2SI>,
- XD, VEX, VEX_W;
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si",
+ ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>,
- XS, REX_W;
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
- XD;
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si{q}", SSE_CVT_SD2SI>,
- XD, REX_W;
-
-let Pattern = []<dag> in {
-defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
- "cvtss2si\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
-}
-
-let Pattern = []<dag> in {
-defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_32>, XS;
-defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_64>, XS, REX_W;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+
+defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{l}",
+ SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{q}",
+ SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
+
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{l}",
+ SSE_CVT_SS2SI_32>, XS;
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{q}",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
+
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
+
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- TB; /* PD SSE3 form is avaiable */
-}
-
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (VCVTSS2SIrm addr:$src)>;
- def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (VCVTSS2SI64rm addr:$src)>;
-}
-
-let Predicates = [HasSSE1] in {
- def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (CVTSS2SIrm addr:$src)>;
- def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (CVTSS2SI64rm addr:$src)>;
-}
+ TB, Requires<[HasSSE2]>;
/// SSE 2 Only
// Convert scalar double to scalar single
+let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1685,6 +1649,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
@@ -1700,17 +1665,37 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
XD,
Requires<[HasSSE2, OptForSize]>;
-defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
- SSE_CVT_Scalar, 0>,
- XS, VEX_4V;
-let Constraints = "$src1 = $dst" in
-defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
- SSE_CVT_Scalar>, XS;
+def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>;
+def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>;
+}
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
+let neverHasSideEffects = 1 in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1722,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+}
-let Predicates = [HasAVX] in {
+let AddedComplexity = 1 in { // give AVX priority
def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-}
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
+} // AddedComplexity = 1
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1760,190 +1747,146 @@ def : Pat<(extloadf32 addr:$src),
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
- Requires<[HasAVX]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
- Requires<[HasAVX]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[HasSSE2]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))],
- IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[HasSSE2]>;
-}
-
-// Convert doubleword to packed single/double fp
-// SSE2 instructions without OpSize prefix
-def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PS_RM>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- TB, Requires<[HasSSE2]>;
-def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PS_RM>,
- TB, Requires<[HasSSE2]>;
-
-// FIXME: why the non-intrinsic version is described as SSE3?
-// SSE2 instructions with XS prefix
-def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XS, Requires<[HasSSE2]>;
-def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>,
- XS, Requires<[HasSSE2]>;
-
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>;
+}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>;
-def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- VEX;
-def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
-def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
-
-// SSE2 packed instructions with XD prefix
-def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XD, VEX, Requires<[HasAVX]>;
-def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- XD, VEX, Requires<[HasAVX]>;
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- XD, Requires<[HasSSE2]>;
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ VEX;
+// XMM only
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
+def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX;
+def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+ VEX, VEX_L;
+def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
+}
+
+def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
+def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
-def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
- (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-
-def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
-def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+ (memopv8f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+
+def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>;
+def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_VCVTDQ2PSrr VR128:$src)>;
+ (VCVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (Int_VCVTDQ2PSrm addr:$src)>;
+ (VCVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
@@ -1963,9 +1906,14 @@ let Predicates = [HasAVX] in {
let Predicates = [HasSSE2] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_CVTDQ2PSrr VR128:$src)>;
+ (CVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (Int_CVTDQ2PSrm addr:$src)>;
+ (CVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (CVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(CVTTPS2DQrr VR128:$src)>;
@@ -1978,183 +1926,186 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
-let isCodeGenOnly = 1 in
-def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
-def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
-def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src)>;
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTTPD2DQYrm addr:$src)>;
+} // Predicates = [HasAVX]
+
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
// Convert packed single to packed double
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB, VEX;
}
+
+let Predicates = [HasSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
+let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB;
+}
-def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- TB, Requires<[HasSSE2]>;
-def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- TB, Requires<[HasSSE2]>;
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+let neverHasSideEffects = 1, mayLoad = 1 in
+def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX;
+def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX;
+def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX;
+}
+
+let neverHasSideEffects = 1, mayLoad = 1 in
+def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
+def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+ IIC_SSE_CVT_PD_RM>;
+
+// AVX 256-bit register conversion intrinsics
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VCVTDQ2PDYrm addr:$src)>;
+} // Predicates = [HasAVX]
// Convert packed double to packed single
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
-def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
-def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2psx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
+
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
IIC_SSE_CVT_PD_RR>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
-def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
-def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
-
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
-def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
- (VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
- (VCVTDQ2PSYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
- (VCVTPD2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
- (VCVTPD2PSYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
- (VCVTPS2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
- (VCVTPS2DQYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
- (VCVTPS2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
- (VCVTPS2PDYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
- (VCVTTPD2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTTPD2DQYrm addr:$src)>;
-
-// Match fround and fextend for 128/256-bit conversions
-def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
- (VCVTPD2PSYrr VR256:$src)>;
-def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
- (VCVTPD2PSYrm addr:$src)>;
-
-def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
- (VCVTPS2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
- (VCVTPS2PDYrm addr:$src)>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+ // Match fround and fextend for 128/256-bit conversions
+ def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+ def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ (VCVTPS2PDYrm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
@@ -2587,17 +2538,13 @@ let Predicates = [HasAVX] in {
OpSize, VEX;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>;
+ (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>;
+ (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>;
+ (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>;
+ (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
@@ -2622,17 +2569,17 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
+ (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[HasSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
+ (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[HasSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
+ (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[HasSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
+ (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[HasSSE2]>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -3230,34 +3177,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))),
let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VSQRTSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
(VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
- (VSQRTSDr (f64 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
- sub_sd)>;
+ (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR64)),
+ VR128)>;
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VRSQRTSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
(VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VRCPSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
@@ -3336,13 +3279,6 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
IIC_SSE_MOVNT>, VEX;
}
-def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
- (VMOVNTDQYmr addr:$dst, VR256:$src)>;
-def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
- (VMOVNTPDYmr addr:$dst, VR256:$src)>;
-def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
- (VMOVNTPSYmr addr:$dst, VR256:$src)>;
-
let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -4610,7 +4546,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
// Bitcast FR64 <-> GR64
//
let Predicates = [HasAVX] in
-def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
VEX;
@@ -4623,7 +4559,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, VEX;
-def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
IIC_SSE_MOVDQ>;
@@ -4897,80 +4833,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
//===---------------------------------------------------------------------===//
-// SSE3 - Conversion Instructions
-//===---------------------------------------------------------------------===//
-
-// Convert Packed Double FP to Packed DW Integers
-let Predicates = [HasAVX] in {
-// The assembler can recognize rr 256-bit instructions by seeing a ymm
-// register, but the same isn't true when using memory operands instead.
-// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-
-// XMM only
-def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
-
-// YMM only
-def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
-
-def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>;
-def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
-
-def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
- (VCVTTPD2DQYrr VR256:$src)>;
-def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
- (VCVTTPD2DQYrm addr:$src)>;
-
-// Convert Packed DW Integers to Packed Double FP
-let Predicates = [HasAVX] in {
-def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-}
-
-def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
-def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>;
-
-// AVX 256-bit register conversion intrinsics
-def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
- (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
- (VCVTDQ2PDYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
- (VCVTPD2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTPD2DQYrm addr:$src)>;
-
-def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
- (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (VCVTDQ2PDYrm addr:$src)>;
-
-//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
@@ -5580,16 +5442,14 @@ let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
Requires<[HasSSE3]>;
-def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
- [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
- Requires<[HasSSE3]>;
}
let Uses = [EAX, ECX, EDX] in
def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
TB, Requires<[HasSSE3]>;
let Uses = [ECX, EAX] in
-def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
@@ -5730,14 +5590,26 @@ let Predicates = [HasSSE41] in {
(PMOVZXDQrm addr:$src)>;
}
+let Predicates = [HasAVX2] in {
+ let AddedComplexity = 15 in {
+ def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))),
+ (VPMOVZXDQYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
+ (VPMOVZXWDYrr VR128:$src)>;
+ }
+
+ def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+}
+
let Predicates = [HasAVX] in {
-def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
-def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
}
let Predicates = [HasSSE41] in {
-def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
-def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
}
@@ -6608,15 +6480,15 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv4f32, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, f128mem, 0>, VEX_4V;
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
+ int_x86_avx_blend_ps_256, VR256, memopv8f32, f256mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, f128mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
+ int_x86_avx_blend_pd_256, VR256, memopv4f64, f256mem, 0>, VEX_4V;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, memopv2i64, i128mem, 0>, VEX_4V;
@@ -6625,10 +6497,10 @@ let Predicates = [HasAVX] in {
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, memopv4f32, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, memopv2f64, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, memopv8f32, i256mem, 0>, VEX_4V;
@@ -6647,10 +6519,10 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, i128mem>;
+ VR128, memopv4f32, f128mem>;
let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, i128mem>;
+ VR128, memopv2f64, f128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
VR128, memopv2i64, i128mem>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
@@ -6658,10 +6530,10 @@ let Constraints = "$src1 = $dst" in {
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv4f32, i128mem>;
+ VR128, memopv4f32, f128mem>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv2f64, i128mem>;
+ VR128, memopv2f64, f128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
@@ -6687,15 +6559,15 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
-defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
memopv2f64, int_x86_sse41_blendvpd>;
-defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
memopv4f64, int_x86_avx_blendv_pd_256>;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
memopv4f32, int_x86_sse41_blendvps>;
-defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
memopv8f32, int_x86_avx_blendv_ps_256>;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
@@ -6766,7 +6638,7 @@ let Predicates = [HasAVX2] in {
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- Intrinsic IntId> {
+ X86MemOperand x86memop, Intrinsic IntId> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
@@ -6775,7 +6647,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
+ (ins VR128:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
@@ -6785,14 +6657,28 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
}
let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
int_x86_sse41_blendvpd>;
let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
int_x86_sse41_pblendvb>;
+// Aliases with the implicit xmm0 argument
+def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPDrr0 VR128:$dst, VR128:$src2)>;
+def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>;
+def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPSrr0 VR128:$dst, VR128:$src2)>;
+def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>;
+def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (PBLENDVBrr0 VR128:$dst, VR128:$src2)>;
+def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
+ (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
+
let Predicates = [HasSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
(v16i8 VR128:$src2))),
@@ -6955,81 +6841,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
}
// Packed Compare Implicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS] in {
- multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+ multiclass SS42AI_pcmpistri<string asm> {
def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
+ let mayLoad = 1 in
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
}
}
-let Predicates = [HasAVX] in {
-defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
- VEX;
-}
-
-defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
-defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
-defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
-defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
-defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
-defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
// Packed Compare Explicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
- multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+ multiclass SS42AI_pcmpestri<string asm> {
def rr : SS42AI<0x61, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
+ let mayLoad = 1 in
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- [(set ECX,
- (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
}
}
-let Predicates = [HasAVX] in {
-defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
- VEX;
-}
-
-defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
-defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
-defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
-defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
-defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
-defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
//===----------------------------------------------------------------------===//
// SSE4.2 - CRC Instructions
@@ -7204,52 +7051,50 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
OpSize;
//===----------------------------------------------------------------------===//
-// CLMUL Instructions
+// PCLMUL Instructions
//===----------------------------------------------------------------------===//
-// Carry-less Multiplication instructions
-let neverHasSideEffects = 1 in {
// AVX carry-less Multiplication instructions
-def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>;
+ [(set VR128:$dst,
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
-let mayLoad = 1 in
-def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>;
+ [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
+ (memopv2i64 addr:$src2), imm:$src3))]>;
+// Carry-less Multiplication instructions
let Constraints = "$src1 = $dst" in {
-def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>;
+ [(set VR128:$dst,
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
-let mayLoad = 1 in
-def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>;
+ [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
+ (memopv2i64 addr:$src2), imm:$src3))]>;
} // Constraints = "$src1 = $dst"
-} // neverHasSideEffects = 1
multiclass pclmul_alias<string asm, int immop> {
- def : InstAlias<!strconcat("pclmul", asm,
- "dq {$src, $dst|$dst, $src}"),
+ def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
(PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
- def : InstAlias<!strconcat("pclmul", asm,
- "dq {$src, $dst|$dst, $src}"),
+ def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
(PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
- def : InstAlias<!strconcat("vpclmul", asm,
+ def : InstAlias<!strconcat("vpclmul", asm,
"dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
(VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
- def : InstAlias<!strconcat("vpclmul", asm,
+ def : InstAlias<!strconcat("vpclmul", asm,
"dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
(VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
}
@@ -7259,6 +7104,45 @@ defm : pclmul_alias<"lqhq", 0x10>;
defm : pclmul_alias<"lqlq", 0x00>;
//===----------------------------------------------------------------------===//
+// SSE4A Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSSE4A] in {
+
+let Constraints = "$src = $dst" in {
+def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst),
+ (ins VR128:$src, i8imm:$len, i8imm:$idx),
+ "extrq\t{$idx, $len, $src|$src, $len, $idx}",
+ [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+ imm:$idx))]>, TB, OpSize;
+def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$mask),
+ "extrq\t{$mask, $src|$src, $mask}",
+ [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
+ VR128:$mask))]>, TB, OpSize;
+
+def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx),
+ "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
+ [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
+ VR128:$src2, imm:$len, imm:$idx))]>, XD;
+def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$mask),
+ "insertq\t{$mask, $src|$src, $mask}",
+ [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
+ VR128:$mask))]>, XD;
+}
+
+def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
+ "movntss\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS;
+
+def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movntsd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD;
+}
+
+//===----------------------------------------------------------------------===//
// AVX Instructions
//===----------------------------------------------------------------------===//
@@ -7286,7 +7170,7 @@ let ExeDomain = SSEPackedSingle in {
int_x86_avx_vbroadcast_ss_256>;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
int_x86_avx_vbroadcast_sd_256>;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
@@ -7298,8 +7182,8 @@ let ExeDomain = SSEPackedSingle in {
int_x86_avx2_vbroadcast_ss_ps_256>;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
+def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
@@ -7595,7 +7479,6 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
-let Predicates = [HasAVX, HasF16C] in {
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (Int VR128:$src))]>,
@@ -7604,27 +7487,26 @@ let Predicates = [HasAVX, HasF16C] in {
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
}
-}
multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
-let Predicates = [HasAVX, HasF16C] in {
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32i8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
TA, OpSize, VEX;
- let neverHasSideEffects = 1, mayLoad = 1 in
- def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst),
- (ins RC:$src1, i32i8imm:$src2),
+ let neverHasSideEffects = 1, mayStore = 1 in
+ def mr : Ii8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, RC:$src1, i32i8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
TA, OpSize, VEX;
}
-}
-defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
-defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
-defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
-defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+let Predicates = [HasAVX, HasF16C] in {
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+}
//===----------------------------------------------------------------------===//
// AVX2 Instructions
@@ -7711,6 +7593,49 @@ let Predicates = [HasAVX2] in {
(VPBROADCASTQrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VPBROADCASTQYrm addr:$src)>;
+
+ def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBrr VR128:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBYrr VR128:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWrr VR128:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWYrr VR128:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDrr VR128:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDYrr VR128:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQYrr VR128:$src)>;
+ def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSrr VR128:$src)>;
+ def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSYrr VR128:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VBROADCASTSDYrr VR128:$src)>;
+
+ // Provide fallback in case the load node that is used in the patterns above
+ // is used by additional users, which prevents the pattern selection.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
+ (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ }
}
// AVX1 broadcast patterns
@@ -7718,16 +7643,42 @@ let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VBROADCASTSDrm addr:$src)>;
+ (VBROADCASTSDYrm addr:$src)>;
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDrm addr:$src)>;
-
+ (VBROADCASTSDYrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
+
+ // Provide fallback in case the load node that is used in the patterns above
+ // is used by additional users, which prevents the pattern selection.
+ let AddedComplexity = 20 in {
+ // 128bit broadcasts:
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
+
+ def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
+ def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -7820,8 +7771,8 @@ let neverHasSideEffects = 1 in {
def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>,
- VEX_4V;
+ []>, VEX_4V;
+let mayLoad = 1 in
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7954,3 +7905,30 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+
+//===----------------------------------------------------------------------===//
+// VGATHER - GATHER Operations
+multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
+ X86MemOperand memop128, X86MemOperand memop256> {
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb),
+ (ins VR128:$src1, memop128:$src2, VR128:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
+ []>, VEX_4VOp3;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb),
+ (ins RC256:$src1, memop256:$src2, RC256:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
+ []>, VEX_4VOp3, VEX_L;
+}
+
+let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>;
+}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index bddba6cb0c4d..ea716bfd6bd8 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -14,7 +14,8 @@
//===----------------------------------------------------------------------===//
let Defs = [RAX, RDX] in
- def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
+ TB;
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
@@ -26,14 +27,17 @@ let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
}
-def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
-def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", [], IIC_HLT>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", [], IIC_RSM>, TB;
// Interrupt and SysCall Instructions.
let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
- [(int_x86_int (i8 3))]>;
+ [(int_x86_int (i8 3))], IIC_INT3>;
+
+def : Pat<(debugtrap),
+ (INT3)>;
// The long form of "int $3" turns into int3 as a size optimization.
// FIXME: This doesn't work because InstAlias can't match immediate constants.
@@ -41,23 +45,25 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
- [(int_x86_int imm:$trap)]>;
+ [(int_x86_int imm:$trap)], IIC_INT>;
-def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
-def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
-def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB,
+def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", [], IIC_SYSCALL>, TB;
+def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", [], IIC_SYSCALL>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", [], IIC_SYSCALL>, TB,
Requires<[In64BitMode]>;
-def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
-
-def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", [],
+ IIC_SYS_ENTER_EXIT>, TB;
+
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [],
+ IIC_SYS_ENTER_EXIT>, TB;
def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
Requires<[In64BitMode]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
Requires<[In64BitMode]>;
@@ -66,73 +72,73 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
//
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
- "in{b}\t{%dx, %al|AL, DX}", []>;
+ "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
let Defs = [AX], Uses = [DX] in
def IN16rr : I<0xED, RawFrm, (outs), (ins),
- "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize;
+ "in{w}\t{%dx, %ax|AX, DX}", [], IIC_IN_RR>, OpSize;
let Defs = [EAX], Uses = [DX] in
def IN32rr : I<0xED, RawFrm, (outs), (ins),
- "in{l}\t{%dx, %eax|EAX, DX}", []>;
+ "in{l}\t{%dx, %eax|EAX, DX}", [], IIC_IN_RR>;
let Defs = [AL] in
def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
- "in{b}\t{$port, %al|AL, $port}", []>;
+ "in{b}\t{$port, %al|AL, $port}", [], IIC_IN_RI>;
let Defs = [AX] in
def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
- "in{w}\t{$port, %ax|AX, $port}", []>, OpSize;
+ "in{w}\t{$port, %ax|AX, $port}", [], IIC_IN_RI>, OpSize;
let Defs = [EAX] in
def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
- "in{l}\t{$port, %eax|EAX, $port}", []>;
+ "in{l}\t{$port, %eax|EAX, $port}", [], IIC_IN_RI>;
let Uses = [DX, AL] in
def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
- "out{b}\t{%al, %dx|DX, AL}", []>;
+ "out{b}\t{%al, %dx|DX, AL}", [], IIC_OUT_RR>;
let Uses = [DX, AX] in
def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
- "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize;
+ "out{w}\t{%ax, %dx|DX, AX}", [], IIC_OUT_RR>, OpSize;
let Uses = [DX, EAX] in
def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
- "out{l}\t{%eax, %dx|DX, EAX}", []>;
+ "out{l}\t{%eax, %dx|DX, EAX}", [], IIC_OUT_RR>;
let Uses = [AL] in
def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
- "out{b}\t{%al, $port|$port, AL}", []>;
+ "out{b}\t{%al, $port|$port, AL}", [], IIC_OUT_IR>;
let Uses = [AX] in
def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
- "out{w}\t{%ax, $port|$port, AX}", []>, OpSize;
+ "out{w}\t{%ax, $port|$port, AX}", [], IIC_OUT_IR>, OpSize;
let Uses = [EAX] in
def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
- "out{l}\t{%eax, $port|$port, EAX}", []>;
+ "out{l}\t{%eax, $port|$port, EAX}", [], IIC_OUT_IR>;
-def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
-def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
-def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
//===----------------------------------------------------------------------===//
// Moves to and from control registers
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
@@ -150,254 +156,265 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
//
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>;
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>;
def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize;
def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>;
def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>;
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize;
def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>;
def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>;
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize;
def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+ "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
+ "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
-def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize;
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize;
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize;
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
-def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr",
+ [], IIC_INVLPG>, TB;
def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t$dst", []>, TB, OpSize;
+ "str{w}\t$dst", [], IIC_STR>, TB, OpSize;
def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
- "str{l}\t$dst", []>, TB;
+ "str{l}\t$dst", [], IIC_STR>, TB;
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
- "str{q}\t$dst", []>, TB;
+ "str{q}\t$dst", [], IIC_STR>, TB;
def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t$dst", []>, TB;
+ "str{w}\t$dst", [], IIC_STR>, TB;
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
- "ltr{w}\t$src", []>, TB;
+ "ltr{w}\t$src", [], IIC_LTR>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
- "ltr{w}\t$src", []>, TB;
+ "ltr{w}\t$src", [], IIC_LTR>, TB;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
- "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%cs|CS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
- "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%cs|CS}", [], IIC_PUSH_CS>, Requires<[In32BitMode]>;
def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
- "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%ss|SS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
- "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%ss|SS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>;
def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
- "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%ds|DS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
- "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%ds|DS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>;
def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
- "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%es|ES}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>,
+ OpSize;
def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
- "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%es|ES}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>;
def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
- "push{w}\t{%fs|FS}", []>, OpSize, TB;
+ "push{w}\t{%fs|FS}", [], IIC_PUSH_SR>, OpSize, TB;
def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
- "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>;
+ "push{l}\t{%fs|FS}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>;
def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
- "push{w}\t{%gs|GS}", []>, OpSize, TB;
+ "push{w}\t{%gs|GS}", [], IIC_PUSH_SR>, OpSize, TB;
def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
- "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>;
+ "push{l}\t{%gs|GS}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>;
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
- "push{q}\t{%fs|FS}", []>, TB;
+ "push{q}\t{%fs|FS}", [], IIC_PUSH_SR>, TB;
def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
- "push{q}\t{%gs|GS}", []>, TB;
+ "push{q}\t{%gs|GS}", [], IIC_PUSH_SR>, TB;
// No "pop cs" instruction.
def POPSS16 : I<0x17, RawFrm, (outs), (ins),
- "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%ss|SS}", [], IIC_POP_SR_SS>,
+ OpSize, Requires<[In32BitMode]>;
def POPSS32 : I<0x17, RawFrm, (outs), (ins),
- "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%ss|SS}", [], IIC_POP_SR_SS>,
+ Requires<[In32BitMode]>;
def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
- "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%ds|DS}", [], IIC_POP_SR>,
+ OpSize, Requires<[In32BitMode]>;
def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
- "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%ds|DS}", [], IIC_POP_SR>,
+ Requires<[In32BitMode]>;
def POPES16 : I<0x07, RawFrm, (outs), (ins),
- "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%es|ES}", [], IIC_POP_SR>,
+ OpSize, Requires<[In32BitMode]>;
def POPES32 : I<0x07, RawFrm, (outs), (ins),
- "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%es|ES}", [], IIC_POP_SR>,
+ Requires<[In32BitMode]>;
def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
- "pop{w}\t{%fs|FS}", []>, OpSize, TB;
+ "pop{w}\t{%fs|FS}", [], IIC_POP_SR>, OpSize, TB;
def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
- "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>;
+ "pop{l}\t{%fs|FS}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>;
def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
- "pop{q}\t{%fs|FS}", []>, TB;
+ "pop{q}\t{%fs|FS}", [], IIC_POP_SR>, TB;
def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
- "pop{w}\t{%gs|GS}", []>, OpSize, TB;
+ "pop{w}\t{%gs|GS}", [], IIC_POP_SR>, OpSize, TB;
def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
- "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>;
+ "pop{l}\t{%gs|GS}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>;
def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
- "pop{q}\t{%gs|GS}", []>, TB;
+ "pop{q}\t{%gs|GS}", [], IIC_POP_SR>, TB;
def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize;
def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lds{l}\t{$src, $dst|$dst, $src}", []>;
+ "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>;
def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize;
def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+ "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize;
def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "les{l}\t{$src, $dst|$dst, $src}", []>;
+ "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>;
def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize;
def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+ "lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize;
def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+ "lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
- "verr\t$seg", []>, TB;
+ "verr\t$seg", [], IIC_VERR>, TB;
def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
- "verr\t$seg", []>, TB;
+ "verr\t$seg", [], IIC_VERR>, TB;
def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
- "verw\t$seg", []>, TB;
+ "verw\t$seg", [], IIC_VERW_MEM>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
- "verw\t$seg", []>, TB;
+ "verw\t$seg", [], IIC_VERW_REG>, TB;
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+ "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdt\t$dst", []>, TB;
+ "sgdt\t$dst", [], IIC_SGDT>, TB;
def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
- "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+ "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
"sidt\t$dst", []>, TB;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
- "sldt{w}\t$dst", []>, TB, OpSize;
+ "sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize;
def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
+ "sldt{w}\t$dst", [], IIC_SLDT>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
- "sldt{l}\t$dst", []>, TB;
+ "sldt{l}\t$dst", [], IIC_SLDT>, TB;
// LLDT is not interpreted specially in 64-bit mode because there is no sign
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
- "sldt{q}\t$dst", []>, TB;
+ "sldt{q}\t$dst", [], IIC_SLDT>, TB;
def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{q}\t$dst", []>, TB;
+ "sldt{q}\t$dst", [], IIC_SLDT>, TB;
def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+ "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdt\t$src", []>, TB;
+ "lgdt\t$src", [], IIC_LGDT>, TB;
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+ "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidt\t$src", []>, TB;
+ "lidt\t$src", [], IIC_LIDT>, TB;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
- "lldt{w}\t$src", []>, TB;
+ "lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
- "lldt{w}\t$src", []>, TB;
+ "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
//===----------------------------------------------------------------------===//
// Specialized register support
-def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
-def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
- "smsw{w}\t$dst", []>, OpSize, TB;
+ "smsw{w}\t$dst", [], IIC_SMSW>, OpSize, TB;
def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
- "smsw{l}\t$dst", []>, TB;
+ "smsw{l}\t$dst", [], IIC_SMSW>, TB;
// no m form encodable; use SMSW16m
def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
- "smsw{q}\t$dst", []>, TB;
+ "smsw{q}\t$dst", [], IIC_SMSW>, TB;
// For memory operands, there is only a 16-bit form
def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
- "smsw{w}\t$dst", []>, TB;
+ "smsw{w}\t$dst", [], IIC_SMSW>, TB;
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
- "lmsw{w}\t$src", []>, TB;
+ "lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB;
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
- "lmsw{w}\t$src", []>, TB;
+ "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
-def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
//===----------------------------------------------------------------------===//
// Cache instructions
-def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
//===----------------------------------------------------------------------===//
// XSAVE instructions
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 6a8f0c848673..6d3548f09398 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -17,17 +17,17 @@
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 65bbcb55ae12..8ec2c688d33f 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -15,7 +15,7 @@ multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, VEX;
- def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
}
@@ -36,27 +36,19 @@ let isAsmParserOnly = 1 in {
defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
- defm VFRCZPS : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
- defm VFRCZPD : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
}
// Scalar load 2 addr operand instructions
-let Constraints = "$src1 = $dst" in {
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
Operand memop, ComplexPattern mem_cpat> {
- def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
- VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX;
- def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
- memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1,
- (bitconvert mem_cpat:$src2)))]>, VEX;
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, VEX;
}
-} // Constraints = "$src1 = $dst"
-
let isAsmParserOnly = 1 in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
ssmem, sse_load_f32>;
@@ -64,12 +56,26 @@ let isAsmParserOnly = 1 in {
sdmem, sse_load_f64>;
}
+multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ PatFrag memop> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+ defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
+}
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop> {
def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
+ [(set VR256:$dst, (Int VR256:$src))]>, VEX;
def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
@@ -88,13 +94,13 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
+ (ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
VEX_4V, VEX_W;
def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src1, VR128:$src2),
+ (ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>,
@@ -116,25 +122,23 @@ let isAsmParserOnly = 1 in {
defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
}
-multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
- let neverHasSideEffects = 1 in {
- def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX;
- let mayLoad = 1 in
- def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX;
- }
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1, imm:$src2))]>, VEX;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, VEX;
}
let isAsmParserOnly = 1 in {
- defm VPROTW : xop3opimm<0xC1, "vprotw">;
- defm VPROTQ : xop3opimm<0xC3, "vprotq">;
- defm VPROTD : xop3opimm<0xC2, "vprotd">;
- defm VPROTB : xop3opimm<0xC0, "vprotb">;
+ defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
+ defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
+ defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
}
// Instruction where second source can be memory, but third must be register
@@ -146,7 +150,7 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
[(set VR128:$dst,
(Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM;
def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
@@ -170,32 +174,31 @@ let isAsmParserOnly = 1 in {
}
// Instruction where second source can be memory, third must be imm8
-multiclass xop4opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType VT> {
+multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (VT (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, VEX_4V;
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ VEX_4V;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, i8imm:$src3),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (VT (OpNode VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
- imm:$src3)))]>, VEX_4V;
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ imm:$src3))]>, VEX_4V;
}
let isAsmParserOnly = 1 in {
- defm VPCOMB : xop4opimm<0xCC, "vpcomb", X86vpcom, v16i8>;
- defm VPCOMW : xop4opimm<0xCD, "vpcomw", X86vpcom, v8i16>;
- defm VPCOMD : xop4opimm<0xCE, "vpcomd", X86vpcom, v4i32>;
- defm VPCOMQ : xop4opimm<0xCF, "vpcomq", X86vpcom, v2i64>;
- defm VPCOMUB : xop4opimm<0xEC, "vpcomub", X86vpcomu, v16i8>;
- defm VPCOMUW : xop4opimm<0xED, "vpcomuw", X86vpcomu, v8i16>;
- defm VPCOMUD : xop4opimm<0xEE, "vpcomud", X86vpcomu, v4i32>;
- defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", X86vpcomu, v2i64>;
+ defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
+ defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
+ defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
+ defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
+ defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
+ defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
+ defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
+ defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
}
// Instruction where either second or third source can be memory
@@ -207,7 +210,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>,
VEX_4V, VEX_I8IMM;
def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
@@ -215,7 +218,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
(bitconvert (memopv2i64 addr:$src3))))]>,
VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
@@ -237,7 +240,7 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
[(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>,
VEX_4V, VEX_I8IMM;
def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index c76d3ccf5d94..d7c08dfb0fdf 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -65,7 +65,7 @@ namespace llvm {
/// referenced global symbols.
virtual void relocate(void *Function, MachineRelocation *MR,
unsigned NumRelocs, unsigned char* GOTBase);
-
+
/// allocateThreadLocalMemory - Each target has its own way of
/// handling thread local variables. This method returns a value only
/// meaningful to the target.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index b578e8d9285d..9c0ce4ead2fa 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -46,12 +46,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
SmallString<128> Name;
-
+
if (!MO.isGlobal()) {
assert(MO.isSymbol());
Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
- } else {
+ } else {
const GlobalValue *GV = MO.getGlobal();
bool isImplicitlyPrivate = false;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
isImplicitlyPrivate = true;
-
+
Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
}
@@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
getMachOMMI().getFnStubEntry(Sym);
if (StubSym.getPointer())
return Sym;
-
+
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
@@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// lot of extra uniquing.
const MCExpr *Expr = 0;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
-
+
switch (MO.getTargetFlags()) {
default: llvm_unreachable("Unknown target flag on GV operand");
case X86II::MO_NO_FLAG: // No flag.
@@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DLLIMPORT:
case X86II::MO_DARWIN_STUB:
break;
-
+
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
case X86II::MO_TLVP_PIC_BASE:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
@@ -156,10 +156,14 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
break;
case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+ case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break;
+ case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+ case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+ case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break;
case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
@@ -169,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
Expr = MCSymbolRefExpr::Create(Sym, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
+ Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI() && MAI.hasSetDirective()) {
@@ -183,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
}
break;
}
-
+
if (Expr == 0)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
-
+
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
@@ -207,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
// Convert registers in the addr mode according to subreg64.
for (unsigned i = 0; i != 4; ++i) {
if (!MI->getOperand(OpNo+i).isReg()) continue;
-
+
unsigned Reg = MI->getOperand(OpNo+i).getReg();
if (Reg == 0) continue;
-
+
MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
}
}
@@ -276,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
return;
// Check whether this is an absolute address.
- // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
// to do this here.
bool Absolute = true;
if (Inst.getOperand(AddrOp).isExpr()) {
@@ -285,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
Absolute = false;
}
-
+
if (Absolute &&
(Inst.getOperand(AddrBase + 0).getReg() != 0 ||
Inst.getOperand(AddrBase + 2).getReg() != 0 ||
@@ -302,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
-
+
MCOperand MCOp;
switch (MO.getType()) {
default:
@@ -341,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// Ignore call clobbers.
continue;
}
-
+
OutMI.addOperand(MCOp);
}
-
+
// Handle a few special cases to eliminate operand modifiers.
ReSimplify:
switch (OutMI.getOpcode()) {
@@ -421,7 +425,7 @@ ReSimplify:
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
}
-
+
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
OutMI.setOpcode(Opcode);
@@ -441,7 +445,7 @@ ReSimplify:
case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
-
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -550,17 +554,38 @@ ReSimplify:
static void LowerTlsAddr(MCStreamer &OutStreamer,
X86MCInstLower &MCInstLowering,
const MachineInstr &MI) {
- bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
+ MI.getOpcode() == X86::TLS_base_addr64;
+
+ bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
+
MCContext &context = OutStreamer.getContext();
- if (is64Bits) {
+ if (needsPadding) {
MCInst prefix;
prefix.setOpcode(X86::DATA16_PREFIX);
OutStreamer.EmitInstruction(prefix);
}
+
+ MCSymbolRefExpr::VariantKind SRVK;
+ switch (MI.getOpcode()) {
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ SRVK = MCSymbolRefExpr::VK_TLSGD;
+ break;
+ case X86::TLS_base_addr32:
+ SRVK = MCSymbolRefExpr::VK_TLSLDM;
+ break;
+ case X86::TLS_base_addr64:
+ SRVK = MCSymbolRefExpr::VK_TLSLD;
+ break;
+ default:
+ llvm_unreachable("unexpected opcode");
+ }
+
MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
- const MCSymbolRefExpr *symRef =
- MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+ const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
MCInst LEA;
if (is64Bits) {
@@ -571,6 +596,14 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
LEA.addOperand(MCOperand::CreateReg(0)); // index
LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
} else {
LEA.setOpcode(X86::LEA32r);
LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
@@ -582,7 +615,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
}
OutStreamer.EmitInstruction(LEA);
- if (is64Bits) {
+ if (needsPadding) {
MCInst prefix;
prefix.setOpcode(X86::DATA16_PREFIX);
OutStreamer.EmitInstruction(prefix);
@@ -609,8 +642,6 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- OutStreamer.EmitCodeRegion();
-
X86MCInstLower MCInstLowering(Mang, *MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
@@ -646,6 +677,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TLS_addr32:
case X86::TLS_addr64:
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
case X86::MOVPC32r: {
@@ -655,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// call "L1$pb"
// "L1$pb":
// popl %esi
-
+
// Emit the call.
MCSymbol *PICBase = MF->getPICBaseSymbol();
TmpInst.setOpcode(X86::CALLpcrel32);
@@ -664,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
OutContext)));
OutStreamer.EmitInstruction(TmpInst);
-
+
// Emit the label.
OutStreamer.EmitLabel(PICBase);
-
+
// popl $reg
TmpInst.setOpcode(X86::POP32r);
TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
OutStreamer.EmitInstruction(TmpInst);
return;
}
-
+
case X86::ADD32ri: {
// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
break;
-
+
// Okay, we have something like:
// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
-
+
// For this, we want to print something like:
// MYGLOBAL + (. - PICBASE)
// However, we can't generate a ".", so just emit a new label here and refer
// to it.
MCSymbol *DotSym = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(DotSym);
-
+
// Now that we have emitted the label, lower the complex operand expression.
MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
+
const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
const MCExpr *PICBase =
MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
-
- DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
-
+
MCInst TmpInst;
TmpInst.setOpcode(X86::ADD32ri);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
@@ -710,9 +743,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
}
-
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 40df3db7d480..b4d4cfd301a5 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
class Mangler;
class TargetMachine;
class X86AsmPrinter;
-
+
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
@@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
public:
X86MCInstLower(Mangler *mang, const MachineFunction &MF,
X86AsmPrinter &asmprinter);
-
+
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
+
private:
MachineModuleInfoMachO &getMachOMMI() const;
};
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index c7471091ec47..78d20ce870f1 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
/// ForceFramePointer - True if the function is required to use of frame
- /// pointer for reasons other than it containing dynamic allocation or
+ /// pointer for reasons other than it containing dynamic allocation or
/// that FP eliminatation is turned off. For example, Cygwin main function
/// contains stack pointer re-alignment code which requires FP.
bool ForceFramePointer;
@@ -66,6 +66,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// ArgumentStackSize - The number of bytes on stack consumed by the arguments
/// being passed on the stack.
unsigned ArgumentStackSize;
+ /// NumLocalDynamics - Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
@@ -79,8 +81,9 @@ public:
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
VarArgsFPOffset(0),
- ArgumentStackSize(0) {}
-
+ ArgumentStackSize(0),
+ NumLocalDynamics(0) {}
+
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
CalleeSavedFrameSize(0),
@@ -93,9 +96,10 @@ public:
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
VarArgsFPOffset(0),
- ArgumentStackSize(0) {}
-
- bool getForceFramePointer() const { return ForceFramePointer;}
+ ArgumentStackSize(0),
+ NumLocalDynamics(0) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
@@ -130,6 +134,10 @@ public:
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
+
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index b56025fbb09c..877b8f6bc3d1 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -50,6 +50,10 @@ ForceStackAlign("force-align-stack",
" needed for the function."),
cl::init(false), cl::Hidden);
+cl::opt<bool>
+EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
@@ -73,6 +77,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
StackPtr = X86::ESP;
FramePtr = X86::EBP;
}
+ // Use a callee-saved register as the base pointer. These registers must
+ // not conflict with any ABI requirements. For example, in 32-bit mode PIC
+ // requires GOT in the EBX register before function calls via PLT GOT pointer.
+ BasePtr = Is64Bit ? X86::RBX : X86::ESI;
}
/// getCompactUnwindRegNum - This function maps the register to the number for
@@ -90,6 +98,12 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
return -1;
}
+bool
+X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ // Only enable when post-RA scheduling is enabled and this is needed.
+ return TM.getSubtargetImpl()->postRAScheduler();
+}
+
int
X86RegisterInfo::getSEHRegNum(unsigned i) const {
int reg = X86_MC::getX86RegNum(i);
@@ -146,7 +160,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
// The GR8_NOREX class is always used in a way that won't be constrained to a
// sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
// full GR8 class.
- if (RC == X86::GR8_NOREXRegisterClass)
+ if (RC == &X86::GR8_NOREXRegClass)
return RC;
const TargetRegisterClass *Super = RC;
@@ -175,7 +189,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
}
const TargetRegisterClass *
-X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
@@ -238,7 +253,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
if (ghcCall)
- return CSR_Ghc_SaveList;
+ return CSR_NoRegs_SaveList;
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
@@ -254,7 +269,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t*
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
- return CSR_Ghc_RegMask;
+ return CSR_NoRegs_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
if (IsWin64)
@@ -268,21 +283,33 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Set the stack-pointer register and its aliases as reserved.
Reserved.set(X86::RSP);
- Reserved.set(X86::ESP);
- Reserved.set(X86::SP);
- Reserved.set(X86::SPL);
+ for (MCSubRegIterator I(X86::RSP, this); I.isValid(); ++I)
+ Reserved.set(*I);
// Set the instruction pointer register and its aliases as reserved.
Reserved.set(X86::RIP);
- Reserved.set(X86::EIP);
- Reserved.set(X86::IP);
+ for (MCSubRegIterator I(X86::RIP, this); I.isValid(); ++I)
+ Reserved.set(*I);
// Set the frame-pointer register and its aliases as reserved if needed.
if (TFI->hasFP(MF)) {
Reserved.set(X86::RBP);
- Reserved.set(X86::EBP);
- Reserved.set(X86::BP);
- Reserved.set(X86::BPL);
+ for (MCSubRegIterator I(X86::RBP, this); I.isValid(); ++I)
+ Reserved.set(*I);
+ }
+
+ // Set the base-pointer register and its aliases as reserved if needed.
+ if (hasBasePointer(MF)) {
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ const uint32_t* RegMask = getCallPreservedMask(CC);
+ if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
+ report_fatal_error(
+ "Stack realignment in presence of dynamic allocas is not supported with"
+ "this calling convention.");
+
+ Reserved.set(getBaseRegister());
+ for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
+ Reserved.set(*I);
}
// Mark the segment registers as reserved.
@@ -293,6 +320,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::FS);
Reserved.set(X86::GS);
+ // Mark the floating point stack registers as reserved.
+ Reserved.set(X86::ST0);
+ Reserved.set(X86::ST1);
+ Reserved.set(X86::ST2);
+ Reserved.set(X86::ST3);
+ Reserved.set(X86::ST4);
+ Reserved.set(X86::ST5);
+ Reserved.set(X86::ST6);
+ Reserved.set(X86::ST7);
+
// Reserve the registers that only exist in 64-bit mode.
if (!Is64Bit) {
// These 8-bit registers are part of the x86-64 extension even though their
@@ -308,14 +345,13 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
X86::R8, X86::R9, X86::R10, X86::R11,
X86::R12, X86::R13, X86::R14, X86::R15
};
- for (const uint16_t *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
- Reserved.set(Reg);
+ for (MCRegAliasIterator AI(GPR64[n], this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
// XMM8, XMM9, ...
assert(X86::XMM15 == X86::XMM8+7);
- for (const uint16_t *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
- ++AI)
- Reserved.set(Reg);
+ for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
}
}
@@ -326,10 +362,36 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
+bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ if (!EnableBasePointer)
+ return false;
+
+ // When we need stack realignment and there are dynamic allocas, we can't
+ // reference off of the stack pointer, so we reserve a base pointer.
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ return false;
+}
+
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (MF.getTarget().Options.RealignStack &&
- !MFI->hasVarSizedObjects());
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ if (!MF.getTarget().Options.RealignStack)
+ return false;
+
+ // Stack realignment requires a frame pointer. If we already started
+ // register allocation with frame pointer elimination, it is too late now.
+ if (!MRI->canReserveReg(FramePtr))
+ return false;
+
+ // If a base pointer is necessary. Check that it isn't too late to reserve
+ // it.
+ if (MFI->hasVarSizedObjects())
+ return MRI->canReserveReg(BasePtr);
+ return true;
}
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
@@ -339,13 +401,6 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
- // FIXME: Currently we don't support stack realignment for functions with
- // variable-sized allocas.
- // FIXME: It's more complicated than this...
- if (0 && requiresRealignment && MFI->hasVarSizedObjects())
- report_fatal_error(
- "Stack realignment in presence of dynamic allocas is not supported");
-
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
return canRealignStack(MF);
@@ -485,7 +540,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Opc = MI.getOpcode();
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
- if (needsStackRealignment(MF))
+ if (hasBasePointer(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
+ else if (needsStackRealignment(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
else if (AfterFPPop)
BasePtr = StackPtr;
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index bee03936f1f7..1bc32cbb78f4 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -50,6 +50,11 @@ private:
///
unsigned FramePtr;
+ /// BasePtr - X86 physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
public:
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
@@ -65,7 +70,8 @@ public:
int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
/// Code Generation virtual methods...
- ///
+ ///
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
/// getMatchingSuperRegClass - Return a subclass of the specified register
/// class A so that each register in it has a sub-register of the
@@ -82,7 +88,8 @@ public:
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
- const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
/// getCrossCopyRegClass - Returns a legal register class to copy a register
/// in the specified class to or from. Returns NULL if it is possible to copy
@@ -104,6 +111,8 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
+
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
@@ -121,6 +130,7 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
+ unsigned getBaseRegister() const { return BasePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 5263a4934cbd..edc71845acba 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -23,9 +23,6 @@ let Namespace = "X86" in {
def sub_8bit_hi : SubRegIndex;
def sub_16bit : SubRegIndex;
def sub_32bit : SubRegIndex;
-
- def sub_ss : SubRegIndex;
- def sub_sd : SubRegIndex;
def sub_xmm : SubRegIndex;
@@ -163,8 +160,6 @@ let Namespace = "X86" in {
def FP6 : Register<"fp6">;
// XMM Registers, used by the various SSE instruction set extensions.
- // The sub_ss and sub_sd subregs are the same registers with another regclass.
- let CompositeIndices = [(sub_ss), (sub_sd)] in {
def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -184,7 +179,7 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- }}
+ } // CostPerUse
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
@@ -223,6 +218,9 @@ let Namespace = "X86" in {
def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
+ // Floating-point status word
+ def FPSW : Register<"fpsw">;
+
// Status flags register
def EFLAGS : Register<"flags">;
@@ -296,26 +294,18 @@ def GR8 : RegisterClass<"X86", [i8], 8,
def GR16 : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP,
- R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
-}
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
def GR32 : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
- R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
-}
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
- (GR16 sub_16bit),
- (GR32 sub_32bit)];
-}
+ RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
@@ -336,30 +326,12 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
// operations.
def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
-def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
- let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
-}
-def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
- let SubRegClasses = [(GR8_ABCD_L sub_8bit),
- (GR8_ABCD_H sub_8bit_hi),
- (GR16_ABCD sub_16bit)];
-}
-def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
- let SubRegClasses = [(GR8_ABCD_L sub_8bit),
- (GR8_ABCD_H sub_8bit_hi),
- (GR16_ABCD sub_16bit),
- (GR32_ABCD sub_32bit)];
-}
-def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
-}
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
- R8, R9, R11, RIP)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
- (GR16 sub_16bit),
- (GR32_TC sub_32bit)];
-}
-
+ R8, R9, R11, RIP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
R8, R9, R11)>;
@@ -373,64 +345,36 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
}
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
- (add AX, CX, DX, SI, DI, BX, BP, SP)> {
- let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
-}
+ (add AX, CX, DX, SI, DI, BX, BP, SP)>;
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
- (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
- let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
- (GR16_NOREX sub_16bit)];
-}
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
- (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
- let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
- (GR16_NOREX sub_16bit),
- (GR32_NOREX sub_32bit)];
-}
+ (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit
// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs
// to clear upper 32-bits of RAX so is not a NOP.
-def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
-}
+def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)>;
// GR32_NOSP - GR32 registers except ESP.
-def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
-}
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
// GR64_NOSP - GR64 registers except RSP (and RIP).
-def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
- let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
- (GR16 sub_16bit),
- (GR32_NOSP sub_32bit)];
-}
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
// ESP.
def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
- (and GR32_NOREX, GR32_NOSP)> {
- let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
- (GR16_NOREX sub_16bit)];
-}
+ (and GR32_NOREX, GR32_NOSP)>;
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
- (and GR64_NOREX, GR64_NOSP)> {
- let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
- (GR16_NOREX sub_16bit),
- (GR32_NOREX_NOSP sub_32bit)];
-}
+ (and GR64_NOREX, GR64_NOSP)>;
// A class to support the 'A' assembler constraint: EAX then EDX.
-def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
- let SubRegClasses = [(GR8_ABCD_L sub_8bit),
- (GR8_ABCD_H sub_8bit_hi),
- (GR16_ABCD sub_16bit)];
-}
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
@@ -458,17 +402,16 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
// Generic vector registers: VR64 and VR128.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (add FR32)> {
- let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
-}
-
+ 128, (add FR32)>;
def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- 256, (sequence "YMM%u", 0, 15)> {
- let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
-}
+ 256, (sequence "YMM%u", 0, 15)>;
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
+def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index 857becff66d7..03330566161d 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -21,7 +21,7 @@ namespace llvm {
/// RelocationType - An enum for the x86 relocation codes. Note that
/// the terminology here doesn't follow x86 convention - word means
/// 32-bit and dword means 64-bit. The relocations will be treated
- /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
/// emitter but JIT and ObjectCode will treat them differently
enum RelocationType {
/// reloc_pcrel_word - PC relative relocation, add the relocated value to
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 17f4efd8bcb0..c14407f9ac1b 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for X86
+// Instruction Itinerary classes used for X86
def IIC_DEFAULT : InstrItinClass;
def IIC_ALU_MEM : InstrItinClass;
def IIC_ALU_NONMEM : InstrItinClass;
@@ -253,6 +253,42 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
+// MMX
+def IIC_MMX_MOV_MM_RM : InstrItinClass;
+def IIC_MMX_MOV_REG_MM : InstrItinClass;
+def IIC_MMX_MOVQ_RM : InstrItinClass;
+def IIC_MMX_MOVQ_RR : InstrItinClass;
+
+def IIC_MMX_ALU_RM : InstrItinClass;
+def IIC_MMX_ALU_RR : InstrItinClass;
+def IIC_MMX_ALUQ_RM : InstrItinClass;
+def IIC_MMX_ALUQ_RR : InstrItinClass;
+def IIC_MMX_PHADDSUBW_RM : InstrItinClass;
+def IIC_MMX_PHADDSUBW_RR : InstrItinClass;
+def IIC_MMX_PHADDSUBD_RM : InstrItinClass;
+def IIC_MMX_PHADDSUBD_RR : InstrItinClass;
+def IIC_MMX_PMUL : InstrItinClass;
+def IIC_MMX_MISC_FUNC_MEM : InstrItinClass;
+def IIC_MMX_MISC_FUNC_REG : InstrItinClass;
+def IIC_MMX_PSADBW : InstrItinClass;
+def IIC_MMX_SHIFT_RI : InstrItinClass;
+def IIC_MMX_SHIFT_RM : InstrItinClass;
+def IIC_MMX_SHIFT_RR : InstrItinClass;
+def IIC_MMX_UNPCK_H_RM : InstrItinClass;
+def IIC_MMX_UNPCK_H_RR : InstrItinClass;
+def IIC_MMX_UNPCK_L : InstrItinClass;
+def IIC_MMX_PCK_RM : InstrItinClass;
+def IIC_MMX_PCK_RR : InstrItinClass;
+def IIC_MMX_PSHUF : InstrItinClass;
+def IIC_MMX_PEXTR : InstrItinClass;
+def IIC_MMX_PINSRW : InstrItinClass;
+def IIC_MMX_MASKMOV : InstrItinClass;
+
+def IIC_MMX_CVT_PD_RR : InstrItinClass;
+def IIC_MMX_CVT_PD_RM : InstrItinClass;
+def IIC_MMX_CVT_PS_RR : InstrItinClass;
+def IIC_MMX_CVT_PS_RM : InstrItinClass;
+
def IIC_CMPX_LOCK : InstrItinClass;
def IIC_CMPX_LOCK_8 : InstrItinClass;
def IIC_CMPX_LOCK_8B : InstrItinClass;
@@ -261,13 +297,185 @@ def IIC_CMPX_LOCK_16B : InstrItinClass;
def IIC_XADD_LOCK_MEM : InstrItinClass;
def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+def IIC_FILD : InstrItinClass;
+def IIC_FLD : InstrItinClass;
+def IIC_FLD80 : InstrItinClass;
+def IIC_FST : InstrItinClass;
+def IIC_FST80 : InstrItinClass;
+def IIC_FIST : InstrItinClass;
+def IIC_FLDZ : InstrItinClass;
+def IIC_FUCOM : InstrItinClass;
+def IIC_FUCOMI : InstrItinClass;
+def IIC_FCOMI : InstrItinClass;
+def IIC_FNSTSW : InstrItinClass;
+def IIC_FNSTCW : InstrItinClass;
+def IIC_FLDCW : InstrItinClass;
+def IIC_FNINIT : InstrItinClass;
+def IIC_FFREE : InstrItinClass;
+def IIC_FNCLEX : InstrItinClass;
+def IIC_WAIT : InstrItinClass;
+def IIC_FXAM : InstrItinClass;
+def IIC_FNOP : InstrItinClass;
+def IIC_FLDL : InstrItinClass;
+def IIC_F2XM1 : InstrItinClass;
+def IIC_FYL2X : InstrItinClass;
+def IIC_FPTAN : InstrItinClass;
+def IIC_FPATAN : InstrItinClass;
+def IIC_FXTRACT : InstrItinClass;
+def IIC_FPREM1 : InstrItinClass;
+def IIC_FPSTP : InstrItinClass;
+def IIC_FPREM : InstrItinClass;
+def IIC_FYL2XP1 : InstrItinClass;
+def IIC_FSINCOS : InstrItinClass;
+def IIC_FRNDINT : InstrItinClass;
+def IIC_FSCALE : InstrItinClass;
+def IIC_FCOMPP : InstrItinClass;
+def IIC_FXSAVE : InstrItinClass;
+def IIC_FXRSTOR : InstrItinClass;
+
+def IIC_FXCH : InstrItinClass;
+
+// System instructions
+def IIC_CPUID : InstrItinClass;
+def IIC_INT : InstrItinClass;
+def IIC_INT3 : InstrItinClass;
+def IIC_INVD : InstrItinClass;
+def IIC_INVLPG : InstrItinClass;
+def IIC_IRET : InstrItinClass;
+def IIC_HLT : InstrItinClass;
+def IIC_LXS : InstrItinClass;
+def IIC_LTR : InstrItinClass;
+def IIC_RDTSC : InstrItinClass;
+def IIC_RSM : InstrItinClass;
+def IIC_SIDT : InstrItinClass;
+def IIC_SGDT : InstrItinClass;
+def IIC_SLDT : InstrItinClass;
+def IIC_STR : InstrItinClass;
+def IIC_SWAPGS : InstrItinClass;
+def IIC_SYSCALL : InstrItinClass;
+def IIC_SYS_ENTER_EXIT : InstrItinClass;
+def IIC_IN_RR : InstrItinClass;
+def IIC_IN_RI : InstrItinClass;
+def IIC_OUT_RR : InstrItinClass;
+def IIC_OUT_IR : InstrItinClass;
+def IIC_INS : InstrItinClass;
+def IIC_MOV_REG_DR : InstrItinClass;
+def IIC_MOV_DR_REG : InstrItinClass;
+def IIC_MOV_REG_CR : InstrItinClass;
+def IIC_MOV_CR_REG : InstrItinClass;
+def IIC_MOV_REG_SR : InstrItinClass;
+def IIC_MOV_MEM_SR : InstrItinClass;
+def IIC_MOV_SR_REG : InstrItinClass;
+def IIC_MOV_SR_MEM : InstrItinClass;
+def IIC_LAR_RM : InstrItinClass;
+def IIC_LAR_RR : InstrItinClass;
+def IIC_LSL_RM : InstrItinClass;
+def IIC_LSL_RR : InstrItinClass;
+def IIC_LGDT : InstrItinClass;
+def IIC_LIDT : InstrItinClass;
+def IIC_LLDT_REG : InstrItinClass;
+def IIC_LLDT_MEM : InstrItinClass;
+def IIC_PUSH_CS : InstrItinClass;
+def IIC_PUSH_SR : InstrItinClass;
+def IIC_POP_SR : InstrItinClass;
+def IIC_POP_SR_SS : InstrItinClass;
+def IIC_VERR : InstrItinClass;
+def IIC_VERW_REG : InstrItinClass;
+def IIC_VERW_MEM : InstrItinClass;
+def IIC_WRMSR : InstrItinClass;
+def IIC_RDMSR : InstrItinClass;
+def IIC_RDPMC : InstrItinClass;
+def IIC_SMSW : InstrItinClass;
+def IIC_LMSW_REG : InstrItinClass;
+def IIC_LMSW_MEM : InstrItinClass;
+def IIC_ENTER : InstrItinClass;
+def IIC_LEAVE : InstrItinClass;
+def IIC_POP_MEM : InstrItinClass;
+def IIC_POP_REG16 : InstrItinClass;
+def IIC_POP_REG : InstrItinClass;
+def IIC_POP_F : InstrItinClass;
+def IIC_POP_FD : InstrItinClass;
+def IIC_POP_A : InstrItinClass;
+def IIC_PUSH_IMM : InstrItinClass;
+def IIC_PUSH_MEM : InstrItinClass;
+def IIC_PUSH_REG : InstrItinClass;
+def IIC_PUSH_F : InstrItinClass;
+def IIC_PUSH_A : InstrItinClass;
+def IIC_BSWAP : InstrItinClass;
+def IIC_BSF : InstrItinClass;
+def IIC_BSR : InstrItinClass;
+def IIC_MOVS : InstrItinClass;
+def IIC_STOS : InstrItinClass;
+def IIC_SCAS : InstrItinClass;
+def IIC_CMPS : InstrItinClass;
+def IIC_MOV : InstrItinClass;
+def IIC_MOV_MEM : InstrItinClass;
+def IIC_AHF : InstrItinClass;
+def IIC_BT_MI : InstrItinClass;
+def IIC_BT_MR : InstrItinClass;
+def IIC_BT_RI : InstrItinClass;
+def IIC_BT_RR : InstrItinClass;
+def IIC_BTX_MI : InstrItinClass;
+def IIC_BTX_MR : InstrItinClass;
+def IIC_BTX_RI : InstrItinClass;
+def IIC_BTX_RR : InstrItinClass;
+def IIC_XCHG_REG : InstrItinClass;
+def IIC_XCHG_MEM : InstrItinClass;
+def IIC_XADD_REG : InstrItinClass;
+def IIC_XADD_MEM : InstrItinClass;
+def IIC_CMPXCHG_MEM : InstrItinClass;
+def IIC_CMPXCHG_REG : InstrItinClass;
+def IIC_CMPXCHG_MEM8 : InstrItinClass;
+def IIC_CMPXCHG_REG8 : InstrItinClass;
+def IIC_CMPXCHG_8B : InstrItinClass;
+def IIC_CMPXCHG_16B : InstrItinClass;
+def IIC_LODS : InstrItinClass;
+def IIC_OUTS : InstrItinClass;
+def IIC_CLC : InstrItinClass;
+def IIC_CLD : InstrItinClass;
+def IIC_CLI : InstrItinClass;
+def IIC_CMC : InstrItinClass;
+def IIC_CLTS : InstrItinClass;
+def IIC_STC : InstrItinClass;
+def IIC_STI : InstrItinClass;
+def IIC_STD : InstrItinClass;
+def IIC_XLAT : InstrItinClass;
+def IIC_AAA : InstrItinClass;
+def IIC_AAD : InstrItinClass;
+def IIC_AAM : InstrItinClass;
+def IIC_AAS : InstrItinClass;
+def IIC_DAA : InstrItinClass;
+def IIC_DAS : InstrItinClass;
+def IIC_BOUND : InstrItinClass;
+def IIC_ARPL_REG : InstrItinClass;
+def IIC_ARPL_MEM : InstrItinClass;
+def IIC_MOVBE : InstrItinClass;
+
+def IIC_NOP : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
-def GenericItineraries : ProcessorItineraries<[], [], []>;
+// IssueWidth is analagous to the number of decode units. Core and its
+// descendents, including Nehalem and SandyBridge have 4 decoders.
+// Resources beyond the decoder operate on micro-ops and are bufferred
+// so adjacent micro-ops don't directly compete.
+//
+// MinLatency=0 indicates that RAW dependencies can be decoded in the
+// same cycle.
+//
+// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
+// indicates high latency opcodes. Alternatively, InstrItinData
+// entries may be included here to define specific operand
+// latencies. Since these latencies are not used for pipeline hazards,
+// they do not need to be exact.
+//
+// The GenericModel contains no instruciton itineraries.
+def GenericModel : SchedMachineModel {
+ let IssueWidth = 4;
+ let MinLatency = 0;
+ let LoadLatency = 4;
+ let HighLatency = 10;
+}
include "X86ScheduleAtom.td"
-
-
-
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 77d4e56d7c03..87102614cc8b 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -106,7 +106,7 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
// set
- InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
// jcc
InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
@@ -294,12 +294,237 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+ // MMX MOVs
+ InstrItinData<IIC_MMX_MOV_MM_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<3, [Port0]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
+ // other MMX
+ InstrItinData<IIC_MMX_ALU_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_ALU_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PMUL, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PSADBW, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_PCK_RM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_PCK_RR, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PSHUF, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_PEXTR, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_PINSRW, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [Port0]>] >,
+ // conversions
+ // from/to PD
+ InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
+ // from/to PI
+ InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<5, [Port0], 0>,
+ InstrStage<5, [Port1]>]>,
+
InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
- InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_FILD, [InstrStage<5, [Port0], 0>, InstrStage<5, [Port1]>] >,
+ InstrItinData<IIC_FLD, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_FLD80, [InstrStage<4, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_FST, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_FST80, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_FIST, [InstrStage<6, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_FLDZ, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FUCOM, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_FUCOMI, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_FCOMI, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNSTSW, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNSTCW, [InstrStage<8, [Port0, Port1]>] >,
+ InstrItinData<IIC_FLDCW, [InstrStage<5, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNINIT, [InstrStage<63, [Port0, Port1]>] >,
+ InstrItinData<IIC_FFREE, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FNCLEX, [InstrStage<25, [Port0, Port1]>] >,
+ InstrItinData<IIC_WAIT, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXAM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_FNOP, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FLDL, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_F2XM1, [InstrStage<99, [Port0, Port1]>] >,
+ InstrItinData<IIC_FYL2X, [InstrStage<146, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPTAN, [InstrStage<168, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPATAN, [InstrStage<183, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXTRACT, [InstrStage<25, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPREM1, [InstrStage<71, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPSTP, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_FPREM, [InstrStage<55, [Port0, Port1]>] >,
+ InstrItinData<IIC_FYL2XP1, [InstrStage<147, [Port0, Port1]>] >,
+ InstrItinData<IIC_FSINCOS, [InstrStage<174, [Port0, Port1]>] >,
+ InstrItinData<IIC_FRNDINT, [InstrStage<46, [Port0, Port1]>] >,
+ InstrItinData<IIC_FSCALE, [InstrStage<77, [Port0, Port1]>] >,
+ InstrItinData<IIC_FCOMPP, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_FXSAVE, [InstrStage<140, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXRSTOR, [InstrStage<141, [Port0, Port1]>] >,
+ InstrItinData<IIC_FXCH, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
+
+ // System instructions
+ InstrItinData<IIC_CPUID, [InstrStage<121, [Port0, Port1]>] >,
+ InstrItinData<IIC_INT, [InstrStage<127, [Port0, Port1]>] >,
+ InstrItinData<IIC_INT3, [InstrStage<130, [Port0, Port1]>] >,
+ InstrItinData<IIC_INVD, [InstrStage<1003, [Port0, Port1]>] >,
+ InstrItinData<IIC_INVLPG, [InstrStage<71, [Port0, Port1]>] >,
+ InstrItinData<IIC_IRET, [InstrStage<109, [Port0, Port1]>] >,
+ InstrItinData<IIC_HLT, [InstrStage<121, [Port0, Port1]>] >,
+ InstrItinData<IIC_LXS, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_LTR, [InstrStage<83, [Port0, Port1]>] >,
+ InstrItinData<IIC_RDTSC, [InstrStage<30, [Port0, Port1]>] >,
+ InstrItinData<IIC_RSM, [InstrStage<741, [Port0, Port1]>] >,
+ InstrItinData<IIC_SIDT, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SGDT, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_SLDT, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_STR, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_SWAPGS, [InstrStage<22, [Port0, Port1]>] >,
+ InstrItinData<IIC_SYSCALL, [InstrStage<96, [Port0, Port1]>] >,
+ InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<88, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_IN_RR, [InstrStage<94, [Port0, Port1]>] >,
+ InstrItinData<IIC_IN_RI, [InstrStage<92, [Port0, Port1]>] >,
+ InstrItinData<IIC_OUT_RR, [InstrStage<68, [Port0, Port1]>] >,
+ InstrItinData<IIC_OUT_IR, [InstrStage<72, [Port0, Port1]>] >,
+ InstrItinData<IIC_INS, [InstrStage<59, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_MOV_REG_DR, [InstrStage<88, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_DR_REG, [InstrStage<123, [Port0, Port1]>] >,
+ // worst case for mov REG_CRx
+ InstrItinData<IIC_MOV_REG_CR, [InstrStage<12, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_CR_REG, [InstrStage<136, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_MOV_MEM_SR, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_SR_REG, [InstrStage<21, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_SR_MEM, [InstrStage<26, [Port0, Port1]>] >,
+ // LAR
+ InstrItinData<IIC_LAR_RM, [InstrStage<50, [Port0, Port1]>] >,
+ InstrItinData<IIC_LAR_RR, [InstrStage<54, [Port0, Port1]>] >,
+ // LSL
+ InstrItinData<IIC_LSL_RM, [InstrStage<46, [Port0, Port1]>] >,
+ InstrItinData<IIC_LSL_RR, [InstrStage<49, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_LGDT, [InstrStage<44, [Port0, Port1]>] >,
+ InstrItinData<IIC_LIDT, [InstrStage<44, [Port0, Port1]>] >,
+ InstrItinData<IIC_LLDT_REG, [InstrStage<60, [Port0, Port1]>] >,
+ InstrItinData<IIC_LLDT_MEM, [InstrStage<64, [Port0, Port1]>] >,
+ // push control register, segment registers
+ InstrItinData<IIC_PUSH_CS, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_PUSH_SR, [InstrStage<2, [Port0, Port1]>] >,
+ // pop control register, segment registers
+ InstrItinData<IIC_POP_SR, [InstrStage<29, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_SR_SS, [InstrStage<48, [Port0, Port1]>] >,
+ // VERR, VERW
+ InstrItinData<IIC_VERR, [InstrStage<41, [Port0, Port1]>] >,
+ InstrItinData<IIC_VERW_REG, [InstrStage<51, [Port0, Port1]>] >,
+ InstrItinData<IIC_VERW_MEM, [InstrStage<50, [Port0, Port1]>] >,
+ // WRMSR, RDMSR
+ InstrItinData<IIC_WRMSR, [InstrStage<202, [Port0, Port1]>] >,
+ InstrItinData<IIC_RDMSR, [InstrStage<78, [Port0, Port1]>] >,
+ InstrItinData<IIC_RDPMC, [InstrStage<46, [Port0, Port1]>] >,
+ // SMSW, LMSW
+ InstrItinData<IIC_SMSW, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_LMSW_REG, [InstrStage<69, [Port0, Port1]>] >,
+ InstrItinData<IIC_LMSW_MEM, [InstrStage<67, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_ENTER, [InstrStage<32, [Port0, Port1]>] >,
+ InstrItinData<IIC_LEAVE, [InstrStage<2, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_POP_MEM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_REG16, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_REG, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_POP_F, [InstrStage<32, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_FD, [InstrStage<26, [Port0, Port1]>] >,
+ InstrItinData<IIC_POP_A, [InstrStage<9, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_PUSH_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_PUSH_REG, [InstrStage<1, [Port0], 0>,
+ InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_PUSH_F, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_BSF, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_BSR, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPS, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOV_MEM, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_AHF, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BT_MI, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BT_MR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_BT_RI, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_BT_RR, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_BTX_MI, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_BTX_MR, [InstrStage<11, [Port0, Port1]>] >,
+ InstrItinData<IIC_BTX_RI, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_BTX_RR, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_XCHG_REG, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XCHG_MEM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_REG, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_MEM, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_REG, [InstrStage<15, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_8B, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
+ InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
+ InstrItinData<IIC_STC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
+ InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
+ InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAA, [InstrStage<13, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAD, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAM, [InstrStage<21, [Port0, Port1]>] >,
+ InstrItinData<IIC_AAS, [InstrStage<13, [Port0, Port1]>] >,
+ InstrItinData<IIC_DAA, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_DAS, [InstrStage<20, [Port0, Port1]>] >,
+ InstrItinData<IIC_BOUND, [InstrStage<11, [Port0, Port1]>] >,
+ InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
+ InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
+ InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
+
+ InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
]>;
+// Atom machine model.
+def AtomModel : SchedMachineModel {
+ let IssueWidth = 2; // Allows 2 instructions per scheduling group.
+ let MinLatency = 1; // InstrStage cycles overrides MinLatency.
+ // OperandCycles may be used for expected latency.
+ let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
+ let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+
+ let Itineraries = AtomItineraries;
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 9a04e352ab62..00edcbc7d470 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
return SDValue();
-
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
@@ -62,13 +62,15 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Args.push_back(Entry);
Entry.Node = Size;
Args.push_back(Entry);
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/false,
DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
DAG, dl);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index ed1a40965aa9..908785296d78 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -39,10 +39,10 @@ unsigned char X86Subtarget::
ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
-
+
if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
return X86II::MO_PIC_BASE_OFFSET;
-
+
// Direct static reference to label.
return X86II::MO_NO_FLAG;
}
@@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// Large model never uses stubs.
if (TM.getCodeModel() == CodeModel::Large)
return X86II::MO_NO_FLAG;
-
+
if (isTargetDarwin()) {
// If symbol visibility is hidden, the extra load is not needed if
// target is x86-64 or the symbol is definitely defined in the current
@@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
return X86II::MO_NO_FLAG;
}
-
+
if (isPICStyleGOT()) { // 32-bit ELF targets.
// Extra load is needed for all externally visible.
if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
return X86II::MO_GOTOFF;
return X86II::MO_GOT;
}
-
+
if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
// Determine whether we have a stub reference and/or whether the reference
// is relative to the PIC base or not.
-
+
// If this is a strong reference to a definition, it is definitely not
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
@@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-
+
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
if (isDecl || GV->hasCommonLinkage()) {
// Hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
}
-
+
// Otherwise, no stub.
return X86II::MO_PIC_BASE_OFFSET;
}
-
+
if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
// Determine whether we have a stub reference.
-
+
// If this is a strong reference to a definition, it is definitely not
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
return X86II::MO_NO_FLAG;
-
+
// Unless we have a symbol with hidden visibility, we have to go through a
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
@@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// Otherwise, no stub.
return X86II::MO_NO_FLAG;
}
-
+
// Direct static reference to global.
return X86II::MO_NO_FLAG;
}
@@ -196,33 +196,32 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
- // FIXME: AVX codegen support is not ready.
- //if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); }
+ if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- if (IsIntel && ((ECX >> 1) & 0x1)) {
- HasCLMUL = true;
- ToggleFeature(X86::FeatureCLMUL);
+ if ((ECX >> 1) & 0x1) {
+ HasPCLMUL = true;
+ ToggleFeature(X86::FeaturePCLMUL);
}
- if (IsIntel && ((ECX >> 12) & 0x1)) {
- HasFMA3 = true;
- ToggleFeature(X86::FeatureFMA3);
+ if ((ECX >> 12) & 0x1) {
+ HasFMA = true;
+ ToggleFeature(X86::FeatureFMA);
}
if (IsIntel && ((ECX >> 22) & 0x1)) {
HasMOVBE = true;
ToggleFeature(X86::FeatureMOVBE);
}
- if (IsIntel && ((ECX >> 23) & 0x1)) {
+ if ((ECX >> 23) & 0x1) {
HasPOPCNT = true;
ToggleFeature(X86::FeaturePOPCNT);
}
- if (IsIntel && ((ECX >> 25) & 0x1)) {
+ if ((ECX >> 25) & 0x1) {
HasAES = true;
ToggleFeature(X86::FeatureAES);
}
- if (IsIntel && ((ECX >> 29) & 0x1)) {
+ if ((ECX >> 29) & 0x1) {
HasF16C = true;
ToggleFeature(X86::FeatureF16C);
}
@@ -247,15 +246,22 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
// If it's Nehalem, unaligned memory access is fast.
- // FIXME: Nehalem is family 6. Also include Westmere and later processors?
- if (Family == 15 && Model == 26) {
+ // Include Westmere and Sandy Bridge as well.
+ // FIXME: add later processors.
+ if (IsIntel && ((Family == 6 && Model == 26) ||
+ (Family == 6 && Model == 44) ||
+ (Family == 6 && Model == 42))) {
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
// Set processor type. Currently only Atom is detected.
- if (Family == 6 && Model == 28) {
+ if (Family == 6 &&
+ (Model == 28 || Model == 38 || Model == 39
+ || Model == 53 || Model == 54)) {
X86ProcFamily = IntelAtom;
+
+ UseLeaForSP = true;
ToggleFeature(X86::FeatureLeaForSP);
}
@@ -289,9 +295,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
}
- if (IsIntel && MaxLevel >= 7) {
+ if (MaxLevel >= 7) {
if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) {
- if (EBX & 0x1) {
+ if (IsIntel && (EBX & 0x1)) {
HasFSGSBase = true;
ToggleFeature(X86::FeatureFSGSBase);
}
@@ -299,12 +305,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasBMI = true;
ToggleFeature(X86::FeatureBMI);
}
- // FIXME: AVX2 codegen support is not ready.
- //if ((EBX >> 5) & 0x1) {
- // X86SSELevel = AVX2;
- // ToggleFeature(X86::FeatureAVX2);
- //}
- if ((EBX >> 8) & 0x1) {
+ if (IsIntel && ((EBX >> 5) & 0x1)) {
+ X86SSELevel = AVX2;
+ ToggleFeature(X86::FeatureAVX2);
+ }
+ if (IsIntel && ((EBX >> 8) & 0x1)) {
HasBMI2 = true;
ToggleFeature(X86::FeatureBMI2);
}
@@ -313,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
+ const std::string &FS,
unsigned StackAlignOverride, bool is64Bit)
: X86GenSubtargetInfo(TT, CPU, FS)
, X86ProcFamily(Others)
@@ -325,8 +330,8 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasPOPCNT(false)
, HasSSE4A(false)
, HasAES(false)
- , HasCLMUL(false)
- , HasFMA3(false)
+ , HasPCLMUL(false)
+ , HasFMA(false)
, HasFMA4(false)
, HasXOP(false)
, HasMOVBE(false)
@@ -395,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
}
}
- if (X86ProcFamily == IntelAtom) {
+ if (X86ProcFamily == IntelAtom)
PostRAScheduler = true;
- InstrItins = getInstrItineraryForCPU(CPUName);
- }
+
+ InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
// target data structure which is shared with MC code emitter, etc.
@@ -424,9 +429,7 @@ bool X86Subtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- //TODO: change back to ANTIDEP_CRITICAL when the
- // X86 subtarget properly sets up post RA liveness.
- Mode = TargetSubtargetInfo::ANTIDEP_NONE;
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 7fd832bf0678..6841c5bafa32 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -55,7 +55,7 @@ protected:
/// X86ProcFamily - X86 processor family: Intel Atom, and others
X86ProcFamilyEnum X86ProcFamily;
-
+
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
@@ -85,11 +85,11 @@ protected:
/// HasAES - Target has AES instructions
bool HasAES;
- /// HasCLMUL - Target has carry-less multiplication
- bool HasCLMUL;
+ /// HasPCLMUL - Target has carry-less multiplication
+ bool HasPCLMUL;
- /// HasFMA3 - Target has 3-operand fused multiply-add
- bool HasFMA3;
+ /// HasFMA - Target has 3-operand fused multiply-add
+ bool HasFMA;
/// HasFMA4 - Target has 4-operand fused multiply-add
bool HasFMA4;
@@ -149,7 +149,7 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
-
+
/// Instruction itineraries for scheduling
InstrItineraryData InstrItins;
@@ -203,8 +203,8 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAES() const { return HasAES; }
- bool hasCLMUL() const { return HasCLMUL; }
- bool hasFMA3() const { return HasFMA3; }
+ bool hasPCLMUL() const { return HasPCLMUL; }
+ bool hasFMA() const { return HasFMA; }
bool hasFMA4() const { return HasFMA4; }
bool hasXOP() const { return HasXOP; }
bool hasMOVBE() const { return HasMOVBE; }
@@ -307,6 +307,8 @@ public:
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const;
+ bool postRAScheduler() const { return PostRAScheduler; }
+
/// getInstrItins = Return the instruction itineraries based on the
/// subtarget selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 89c388415bdc..b7ba568394bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -140,39 +140,48 @@ public:
} // namespace
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new X86PassConfig(this, PM);
+ X86PassConfig *PC = new X86PassConfig(this, PM);
+
+ if (Subtarget.hasCMov())
+ PC->enablePass(&EarlyIfConverterID);
+
+ return PC;
}
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
+ addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
+
+ // For ELF, cleanup any local-dynamic TLS accesses.
+ if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ addPass(createCleanupLocalDynamicTLSPass());
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
if (!getX86Subtarget().is64Bit())
- PM->add(createGlobalBaseRegPass());
+ addPass(createGlobalBaseRegPass());
return false;
}
bool X86PassConfig::addPreRegAlloc() {
- PM->add(createX86MaxStackAlignmentHeuristicPass());
+ addPass(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
bool X86PassConfig::addPostRegAlloc() {
- PM->add(createX86FloatingPointStackifierPass());
+ addPass(createX86FloatingPointStackifierPass());
return true; // -print-machineinstr should print after this.
}
bool X86PassConfig::addPreEmitPass() {
bool ShouldPrint = false;
if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
- PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
ShouldPrint = true;
}
if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
- PM->add(createX86IssueVZeroUpperPass());
+ addPass(createX86IssueVZeroUpperPass());
ShouldPrint = true;
}
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 718f35ea84ac..92aee0dd3fcf 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -9,16 +9,19 @@
#include "X86TargetObjectFile.h"
#include "X86TargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
using namespace dwarf;
-const MCExpr *X8664_MachoTargetObjectFile::
+const MCExpr *X86_64MachoTargetObjectFile::
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const {
@@ -37,8 +40,14 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
-MCSymbol *X8664_MachoTargetObjectFile::
+MCSymbol *X86_64MachoTargetObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
return Mang->getSymbol(GV);
}
+
+void
+X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index a02a36809ef2..2d320c594cb9 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -16,9 +16,9 @@
namespace llvm {
- /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+ /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin
/// x86-64.
- class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
virtual const MCExpr *
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
@@ -32,6 +32,12 @@ namespace llvm {
MachineModuleInfo *MMI) const;
};
+ /// X86LinuxTargetObjectFile - This implementation is used for linux x86
+ /// and x86-64.
+ class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
} // end namespace llvm
#endif
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 2fd78a7231c6..80b75dc5f992 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -145,7 +145,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
// to insert any VZEROUPPER instructions. This is constant-time, so it is
// cheap in the common case of no ymm use.
bool YMMUsed = false;
- const TargetRegisterClass *RC = X86::VR256RegisterClass;
+ const TargetRegisterClass *RC = &X86::VR256RegClass;
for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
i != e; i++) {
if (MRI.isPhysRegUsed(*i)) {
@@ -205,7 +205,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
}
- // The entry MBB for the function may set the inital state to dirty if
+ // The entry MBB for the function may set the initial state to dirty if
// the function receives any YMM incoming arguments
if (MBB == MF.begin()) {
EntryState = ST_CLEAN;
@@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
DebugLoc dl = I->getDebugLoc();
bool isControlFlow = MI->isCall() || MI->isReturn();
- // Shortcut: don't need to check regular instructions in dirty state.
+ // Shortcut: don't need to check regular instructions in dirty state.
if (!isControlFlow && CurState == ST_DIRTY)
continue;
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 0d59572a0d57..ca94f03a6496 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -22,5 +22,7 @@ add_llvm_target(XCoreCodeGen
XCoreSelectionDAGInfo.cpp
)
+add_dependencies(LLVMXCoreCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8906b2459e0a..c76866f47bed 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -18,9 +18,9 @@
#include "XCoreSubtarget.h"
#include "XCoreTargetMachine.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -260,7 +260,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode,
raw_ostream &O) {
- printOperand(MI, OpNo, O);
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ }
+
+printOperand(MI, OpNo, O);
return false;
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 50fda58cf574..a4e56472babc 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -78,8 +78,7 @@ static void storeToStack(MachineBasicBlock &MBB,
//===----------------------------------------------------------------------===//
XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
- STI(sti) {
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0) {
// Do nothing
}
@@ -341,7 +340,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
- const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+ const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
if (LRUsed) {
MF.getRegInfo().setPhysRegUnused(XCore::LR);
@@ -372,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
false));
}
}
-
-void XCoreFrameLowering::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-
-}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 4c51aa5e79cc..db1bbb60d968 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -22,7 +22,6 @@ namespace llvm {
class XCoreSubtarget;
class XCoreFrameLowering: public TargetFrameLowering {
- const XCoreSubtarget &STI;
public:
XCoreFrameLowering(const XCoreSubtarget &STI);
@@ -45,8 +44,6 @@ namespace llvm {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
//! Stack slot size (4 bytes)
static int stackSlotSize() {
return 4;
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index fdf2b783241c..8643ffc19d09 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -66,7 +66,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
Subtarget(*XTM.getSubtargetImpl()) {
// Set up the register classes.
- addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+ addRegisterClass(MVT::i32, &XCore::GRRegsRegClass);
// Compute derived properties from the register classes
computeRegisterProperties();
@@ -485,12 +485,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Entry.Node = BasePtr;
Args.push_back(Entry);
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, IntPtrTy, false, false,
+ TargetLowering::CallLoweringInfo CLI(Chain, IntPtrTy, false, false,
false, false, 0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
Args, DAG, DL);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
@@ -547,12 +547,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Entry.Node = Value;
Args.push_back(Entry);
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()), false, false,
false, false, 0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
@@ -873,14 +874,19 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
/// XCore call implementation
SDValue
-XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
// XCore target does not yet support tail call optimization.
isTailCall = false;
@@ -913,7 +919,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// The ABI dictates there should be one stack slot available to the callee
// on function entry (for saving lr).
@@ -1036,7 +1042,7 @@ XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
@@ -1096,7 +1102,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
@@ -1121,8 +1127,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
llvm_unreachable(0);
}
case MVT::i32:
- unsigned VReg = RegInfo.createVirtualRegister(
- XCore::GRRegsRegisterClass);
+ unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
}
@@ -1172,8 +1177,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
offset -= StackSlotSize;
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
// Move argument from phys reg -> virt reg
- unsigned VReg = RegInfo.createVirtualRegister(
- XCore::GRRegsRegisterClass);
+ unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
RegInfo.addLiveIn(ArgRegs[i], VReg);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
// Move argument from virt reg -> stack
@@ -1201,7 +1205,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
bool XCoreTargetLowering::
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
+ bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -1222,7 +1226,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
@@ -1606,12 +1610,12 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
std::pair<unsigned, const TargetRegisterClass*>
XCoreTargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ EVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default : break;
case 'r':
- return std::make_pair(0U, XCore::GRRegsRegisterClass);
+ return std::make_pair(0U, &XCore::GRRegsRegClass);
}
}
// Use the default implementation in TargetLowering to convert the register
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 0b63ecd0f78e..2874f00e4763 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -151,7 +151,7 @@ namespace llvm {
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const;
+ EVT VT) const;
// Expand specifics
SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
@@ -174,12 +174,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
@@ -191,7 +186,7 @@ namespace llvm {
virtual bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
+ bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
LLVMContext &Context) const;
};
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index b25a08d25c1a..ae646a248524 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -741,14 +741,12 @@ let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
def BL_u10 : _FU10<
- (outs),
- (ins calltarget:$target, variable_ops),
+ (outs), (ins calltarget:$target),
"bl $target",
[(XCoreBranchLink immU10:$target)]>;
def BL_lu10 : _FLU10<
- (outs),
- (ins calltarget:$target, variable_ops),
+ (outs), (ins calltarget:$target),
"bl $target",
[(XCoreBranchLink immU20:$target)]>;
}
@@ -796,7 +794,7 @@ def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
"mkmsk $dst, $size",
- [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
"getr $dst, $type",
@@ -950,10 +948,10 @@ def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
// dgetreg
def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
"msync res[$i]",
- [(int_xcore_msync GRRegs:$i)]>;
+ [(int_xcore_msync GRRegs:$i)]>;
def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
"mjoin res[$i]",
- [(int_xcore_mjoin GRRegs:$i)]>;
+ [(int_xcore_mjoin GRRegs:$i)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
@@ -988,7 +986,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr),
"bla $addr",
[(XCoreBranchLink GRRegs:$addr)]>;
}
@@ -1038,7 +1036,7 @@ def GETET_0R : _F0R<(outs), (ins),
def SSYNC_0r : _F0R<(outs), (ins),
"ssync",
- [(int_xcore_ssync)]>;
+ [(int_xcore_ssync)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
hasSideEffects = 1 in
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index f3b4b4c4f88a..cdd0a0893b98 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -92,6 +92,11 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
}
bool
+XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return requiresRegisterScavenging(MF);
+}
+
+bool
XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
return false;
}
@@ -205,8 +210,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Reg = MI.getOperand(0).getReg();
bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
- assert(XCore::GRRegsRegisterClass->contains(Reg) &&
- "Unexpected register operand");
+ assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand");
MachineBasicBlock &MBB = *MI.getParent();
@@ -217,7 +221,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!RS)
report_fatal_error("eliminateFrameIndex Frame size too big: " +
Twine(Offset));
- unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+ unsigned ScratchReg = RS->scavengeRegister(&XCore::GRRegsRegClass, II,
SPAdj);
loadConstant(MBB, II, ScratchReg, Offset, dl);
switch (MI.getOpcode()) {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 7391cfdf0734..c4dcb6b533c2 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -50,6 +50,8 @@ public:
bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
bool useFPForScavengingIndex(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 5afd5a1affcc..11ec86b0fa8c 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -55,7 +55,7 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool XCorePassConfig::addInstSelector() {
- PM->add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
+ addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
return false;
}