diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 53 |
1 files changed, 25 insertions, 28 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 62ab5bb55a16..8fb4f93fd4b3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -12,30 +12,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "AMDGPUTargetMachine.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/CodeGen/Passes.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" - +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "amdgpu-lower-kernel-arguments" using namespace llvm; @@ -108,10 +89,14 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { uint64_t ExplicitArgOffset = 0; for (Argument &Arg : F.args()) { - Type *ArgTy = Arg.getType(); - Align ABITypeAlign = DL.getABITypeAlign(ArgTy); - unsigned Size = DL.getTypeSizeInBits(ArgTy); - unsigned AllocSize = DL.getTypeAllocSize(ArgTy); + const bool IsByRef = Arg.hasByRefAttr(); + Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType(); + MaybeAlign ABITypeAlign = IsByRef ? Arg.getParamAlign() : None; + if (!ABITypeAlign) + ABITypeAlign = DL.getABITypeAlign(ArgTy); + + uint64_t Size = DL.getTypeSizeInBits(ArgTy); + uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize; @@ -119,6 +104,19 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { if (Arg.use_empty()) continue; + // If this is byval, the loads are already explicit in the function. We just + // need to rewrite the pointer values. + if (IsByRef) { + Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64( + Builder.getInt8Ty(), KernArgSegment, EltOffset, + Arg.getName() + ".byval.kernarg.offset"); + + Value *CastOffsetPtr = Builder.CreatePointerBitCastOrAddrSpaceCast( + ArgOffsetPtr, Arg.getType()); + Arg.replaceAllUsesWith(CastOffsetPtr); + continue; + } + if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) { // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing // modes on SI to know the high bits are 0 so pointer adds don't wrap. We @@ -224,8 +222,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { Arg.getName() + ".load"); Arg.replaceAllUsesWith(NewVal); } else if (IsV3) { - Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty), - ArrayRef<int>{0, 1, 2}, + Value *Shuf = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 2}, Arg.getName() + ".load"); Arg.replaceAllUsesWith(Shuf); } else { |