src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-12-25 22:30:44 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-12-25 22:30:44 +0000
commit	77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (patch)
tree	5c0eb39553003b9c75a901af6bc4ddabd6f2f28c /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parent	f65dcba83ce5035ab88a85fe17628b447eb56e1b (diff)
download	src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.tar.gz src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.zip

Vendor import of llvm-project main llvmorg-14-init-13186-g0c553cc1af2e.vendor/llvm-project/llvmorg-14-init-13186-g0c553cc1af2e

Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')

-rw-r--r--

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

1 files changed, 34 insertions, 39 deletions

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35b72f5d201b..9f138136e6e9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

@@ -24,6 +24,7 @@

#include "llvm/CodeGen/Analysis.h"

#include "llvm/CodeGen/FunctionLoweringInfo.h"

#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"

+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineLoopInfo.h"

#include "llvm/IR/DiagnosticInfo.h"

@@ -2062,33 +2063,30 @@ void SITargetLowering::allocateSpecialInputSGPRs(

SIMachineFunctionInfo &Info) const {

auto &ArgInfo = Info.getArgInfo();

- // We need to allocate these in place regardless of their use.

- const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;

// TODO: Unify handling with private memory pointers.

- if (IsFixed || Info.hasDispatchPtr())

+ if (Info.hasDispatchPtr())

allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);

- if (IsFixed || Info.hasQueuePtr())

+ if (Info.hasQueuePtr())

allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);

// Implicit arg ptr takes the place of the kernarg segment pointer. This is a

// constant offset from the kernarg segment.

- if (IsFixed || Info.hasImplicitArgPtr())

+ if (Info.hasImplicitArgPtr())

allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);

- if (IsFixed || Info.hasDispatchID())

+ if (Info.hasDispatchID())

allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);

// flat_scratch_init is not applicable for non-kernel functions.

- if (IsFixed || Info.hasWorkGroupIDX())

+ if (Info.hasWorkGroupIDX())

allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);

- if (IsFixed || Info.hasWorkGroupIDY())

+ if (Info.hasWorkGroupIDY())

allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);

- if (IsFixed || Info.hasWorkGroupIDZ())

+ if (Info.hasWorkGroupIDZ())

allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);

}

@@ -2419,10 +2417,9 @@ SDValue SITargetLowering::LowerFormalArguments(

if (IsEntryFunc) {

allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);

allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);

- } else {

+ } else if (!IsGraphics) {

// For the fixed ABI, pass workitem IDs in the last argument register.

- if (AMDGPUTargetMachine::EnableFixedFunctionABI)

- allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);

+ allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);

}

if (IsKernel) {

@@ -2549,17 +2546,13 @@ SDValue SITargetLowering::LowerFormalArguments(

InVals.push_back(Val);

}

- if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {

- // Special inputs come after user arguments.

- allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);

- }

// Start adding system SGPRs.

if (IsEntryFunc) {

allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);

} else {

CCInfo.AllocateReg(Info->getScratchRSrcReg());

- allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);

+ if (!IsGraphics)

+ allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);

}

auto &ArgUsageInfo =

@@ -3123,8 +3116,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,

CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);

- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&

- CallConv != CallingConv::AMDGPU_Gfx) {

+ if (CallConv != CallingConv::AMDGPU_Gfx) {

// With a fixed ABI, allocate fixed registers before user arguments.

passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);

}

@@ -3263,12 +3255,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,

}

- if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&

- CallConv != CallingConv::AMDGPU_Gfx) {

- // Copy special input registers after user input arguments.

- passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);

- }

if (!MemOpChains.empty())

Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);

@@ -6282,10 +6268,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,

}

- // Push back extra arguments.

- for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++)

- VAddrs.push_back(Op.getOperand(ArgOffset + I));

// Check for 16 bit addresses or derivatives and pack if true.

MVT VAddrVT =

Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();

@@ -6298,6 +6280,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,

MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;

IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;

+ // Push back extra arguments.

+ for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {

+ if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {

+ // Special handling of bias when A16 is on. Bias is of type half but

+ // occupies full 32-bit.

+ SDValue bias = DAG.getBuildVector( MVT::v2f16, DL, {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});

+ VAddrs.push_back(bias);

+ } else

+ VAddrs.push_back(Op.getOperand(ArgOffset + I));

+ }

if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {

// 16 bit gradients are supported, but are tied to the A16 control

// so both gradients and addresses must be 16 bit

@@ -7502,8 +7495,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,

assert(NodePtr.getValueType() == MVT::i32 ||

NodePtr.getValueType() == MVT::i64);

- assert(RayDir.getValueType() == MVT::v4f16 ||

- RayDir.getValueType() == MVT::v4f32);

+ assert(RayDir.getValueType() == MVT::v3f16 ||

+ RayDir.getValueType() == MVT::v3f32);

if (!Subtarget->hasGFX10_AEncoding()) {

emitRemovedIntrinsicError(DAG, DL, Op.getValueType());

@@ -9837,11 +9830,13 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,

if (Opcode == AMDGPU::G_FCANONICALIZE)

return true;

- if (Opcode == AMDGPU::G_FCONSTANT) {

- auto F = MI->getOperand(1).getFPImm()->getValueAPF();

- if (F.isNaN() && F.isSignaling())

+ Optional<FPValueAndVReg> FCR;

+ // Constant splat (can be padded with undef) or scalar constant.

+ if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) {

+ if (FCR->Value.isSignaling())

return false;

- return !F.isDenormal() || denormalsEnabledForType(MRI.getType(Reg), MF);

+ return !FCR->Value.isDenormal() ||

+ denormalsEnabledForType(MRI.getType(FCR->VReg), MF);

}

if (MaxDepth == 0)

@@ -11514,7 +11509,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,

// Prefer VGPRs over AGPRs in mAI instructions where possible.

// This saves a chain-copy of registers and better ballance register

// use between vgpr and agpr as agpr tuples tend to be big.

- if (const MCOperandInfo *OpInfo = MI.getDesc().OpInfo) {

+ if (MI.getDesc().OpInfo) {

unsigned Opc = MI.getOpcode();

const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();

for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),

@@ -12477,6 +12472,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,

if (Size <= 256)

return Cost;

- Cost.first = (Size + 255) / 256;

+ Cost.first += (Size + 255) / 256;

return Cost;

}