aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--lib/CodeGen/CGCUDANV.cpp126
1 files changed, 126 insertions, 0 deletions
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
new file mode 100644
index 000000000000..88a0bdc821d7
--- /dev/null
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -0,0 +1,126 @@
+//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for CUDA code generation targeting the NVIDIA CUDA
+// runtime library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGCUDARuntime.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "clang/AST/Decl.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/CallSite.h"
+
+#include <vector>
+
+using namespace clang;
+using namespace CodeGen;
+
+namespace {
+
+class CGNVCUDARuntime : public CGCUDARuntime {
+
+private:
+ llvm::Type *IntTy, *SizeTy;
+ llvm::PointerType *CharPtrTy, *VoidPtrTy;
+
+ llvm::Constant *getSetupArgumentFn() const;
+ llvm::Constant *getLaunchFn() const;
+
+public:
+ CGNVCUDARuntime(CodeGenModule &CGM);
+
+ void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
+};
+
+}
+
+CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
+ CodeGen::CodeGenTypes &Types = CGM.getTypes();
+ ASTContext &Ctx = CGM.getContext();
+
+ IntTy = Types.ConvertType(Ctx.IntTy);
+ SizeTy = Types.ConvertType(Ctx.getSizeType());
+
+ CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
+ VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
+}
+
+llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
+ // cudaError_t cudaSetupArgument(void *, size_t, size_t)
+ std::vector<llvm::Type*> Params;
+ Params.push_back(VoidPtrTy);
+ Params.push_back(SizeTy);
+ Params.push_back(SizeTy);
+ return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
+ Params, false),
+ "cudaSetupArgument");
+}
+
+llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
+ // cudaError_t cudaLaunch(char *)
+ std::vector<llvm::Type*> Params;
+ Params.push_back(CharPtrTy);
+ return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
+ Params, false),
+ "cudaLaunch");
+}
+
+void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
+ FunctionArgList &Args) {
+ // Build the argument value list and the argument stack struct type.
+ llvm::SmallVector<llvm::Value *, 16> ArgValues;
+ std::vector<llvm::Type *> ArgTypes;
+ for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
+ I != E; ++I) {
+ llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
+ ArgValues.push_back(V);
+ assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
+ ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
+ }
+ llvm::StructType *ArgStackTy = llvm::StructType::get(
+ CGF.getLLVMContext(), ArgTypes);
+
+ llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
+
+ // Emit the calls to cudaSetupArgument
+ llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
+ for (unsigned I = 0, E = Args.size(); I != E; ++I) {
+ llvm::Value *Args[3];
+ llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
+ Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
+ Args[1] = CGF.Builder.CreateIntCast(
+ llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
+ SizeTy, false);
+ Args[2] = CGF.Builder.CreateIntCast(
+ llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
+ SizeTy, false);
+ llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
+ llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
+ llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
+ CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
+ CGF.EmitBlock(NextBlock);
+ }
+
+ // Emit the call to cudaLaunch
+ llvm::Constant *cudaLaunchFn = getLaunchFn();
+ llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
+ CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
+ CGF.EmitBranch(EndBlock);
+
+ CGF.EmitBlock(EndBlock);
+}
+
+CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
+ return new CGNVCUDARuntime(CGM);
+}