diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-17 20:22:39 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-17 20:22:39 +0000 |
commit | 7af96fb3afd6725a2824a0a5ca5dad34e5e0b056 (patch) | |
tree | 6661ffbabf869009597684462f5a3df3beccc952 /lib/Target/AMDGPU | |
parent | 6b3f41ed88e8e440e11a4fbf20b6600529f80049 (diff) | |
download | src-7af96fb3afd6725a2824a0a5ca5dad34e5e0b056.tar.gz src-7af96fb3afd6725a2824a0a5ca5dad34e5e0b056.zip |
Vendor import of llvm trunk r303291:vendor/llvm/llvm-trunk-r303291
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=318414
svn path=/vendor/llvm/llvm-trunk-r303291/; revision=318415; tag=vendor/llvm/llvm-trunk-r303291
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 30 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.cpp | 14 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIRegisterInfo.cpp | 26 | ||||
-rw-r--r-- | lib/Target/AMDGPU/VOP3Instructions.td | 20 |
6 files changed, 74 insertions, 23 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 7c99752b881f..c3ac796a0a44 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1707,10 +1707,38 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, // FIXME: Look for on separate components if (Src.getOpcode() == ISD::FNEG) { - Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); + Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); Src = Src.getOperand(0); } + if (Src.getOpcode() == ISD::BUILD_VECTOR) { + unsigned VecMods = Mods; + + SDValue Lo = Src.getOperand(0); + SDValue Hi = Src.getOperand(1); + + if (Lo.getOpcode() == ISD::FNEG) { + Lo = Lo.getOperand(0); + Mods ^= SISrcMods::NEG; + } + + if (Hi.getOpcode() == ISD::FNEG) { + Hi = Hi.getOperand(0); + Mods ^= SISrcMods::NEG_HI; + } + + if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { + // Really a scalar input. Just select from the low half of the register to + // avoid packing. + + Src = Lo; + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; + } + + Mods = VecMods; + } + // Packed instructions do not have abs modifiers. // FIXME: Handle abs/neg of individual components. diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index bed7d326b3dd..e543cae07ada 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -289,6 +289,10 @@ public: return getGeneration() >= GFX9; } + bool hasMin3Max3_16() const { + return getGeneration() >= GFX9; + } + bool hasCARRY() const { return (getGeneration() >= EVERGREEN); } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 48a14e4dbea2..286be355bc14 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4491,7 +4491,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY && - VT != MVT::f64) { + VT != MVT::f64 && + ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) { // max(max(a, b), c) -> max3(a, b, c) // min(min(a, b), c) -> min3(a, b, c) if (Op0.getOpcode() == Opc && Op0.hasOneUse()) { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 065fd09eb356..38a16b525a75 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -765,7 +765,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) - .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); + .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); // Add the scratch resource registers as implicit uses because we may end up // needing them, and need to ensure that the reserved registers are // correctly handled. @@ -796,7 +796,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addReg(MFI->getFrameOffsetReg()) // scratch_offset .addImm(0) // offset .addMemOperand(MMO); } @@ -869,7 +869,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) - .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); + .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. @@ -892,10 +892,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) // vaddr - .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset - .addImm(0) // offset + .addFrameIndex(FrameIndex) // vaddr + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getFrameOffsetReg()) // scratch_offset + .addImm(0) // offset .addMemOperand(MMO); } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8820e294562b..06cfc95be96a 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -654,11 +654,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) + .addReg(MFI->getFrameOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); + .addReg(MFI->getFrameOffsetReg()); } BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) @@ -715,11 +715,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) - .addReg(TmpReg, RegState::Kill) // src - .addFrameIndex(Index) // vaddr - .addReg(MFI->getScratchRSrcReg()) // srrsrc - .addReg(MFI->getScratchWaveOffsetReg()) // soffset - .addImm(i * 4) // offset + .addReg(TmpReg, RegState::Kill) // src + .addFrameIndex(Index) // vaddr + .addReg(MFI->getScratchRSrcReg()) // srrsrc + .addReg(MFI->getFrameOffsetReg()) // soffset + .addImm(i * 4) // offset .addMemOperand(MMO); } } @@ -806,11 +806,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) + .addReg(MFI->getFrameOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); + .addReg(MFI->getFrameOffsetReg()); } auto MIB = @@ -853,10 +853,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) - .addFrameIndex(Index) // vaddr - .addReg(MFI->getScratchRSrcReg()) // srsrc - .addReg(MFI->getScratchWaveOffsetReg()) // soffset - .addImm(i * 4) // offset + .addFrameIndex(Index) // vaddr + .addReg(MFI->getScratchRSrcReg()) // srsrc + .addReg(MFI->getFrameOffsetReg()) // soffset + .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index ffa6c60d6b1f..c0b5069948fb 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -300,10 +300,19 @@ def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; + def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>; def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>; def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>; -} + +def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>; +def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>; +def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>; + +def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>; +def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>; +def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>; +} // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// @@ -509,6 +518,15 @@ defm V_OR3_B32 : VOP3_Real_vi <0x202>; defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>; defm V_XAD_U32 : VOP3_Real_vi <0x1f3>; + +defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>; +defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>; +defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>; + +defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>; +defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>; +defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>; + defm V_MED3_F16 : VOP3_Real_vi <0x1fa>; defm V_MED3_I16 : VOP3_Real_vi <0x1fb>; defm V_MED3_U16 : VOP3_Real_vi <0x1fc>; |