aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGExpr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGExpr.cpp')
-rw-r--r--lib/CodeGen/CGExpr.cpp80
1 files changed, 80 insertions, 0 deletions
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index ecee7b4931be..1fe4c18badc6 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -938,6 +938,50 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
unsigned Alignment, QualType Ty,
llvm::MDNode *TBAAInfo) {
+
+ // For better performance, handle vector loads differently.
+ if (Ty->isVectorType()) {
+ llvm::Value *V;
+ const llvm::Type *EltTy =
+ cast<llvm::PointerType>(Addr->getType())->getElementType();
+
+ const llvm::VectorType *VTy = cast<llvm::VectorType>(EltTy);
+
+ // Handle vectors of size 3, like size 4 for better performance.
+ if (VTy->getNumElements() == 3) {
+
+ // Bitcast to vec4 type.
+ llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
+ 4);
+ llvm::PointerType *ptVec4Ty =
+ llvm::PointerType::get(vec4Ty,
+ (cast<llvm::PointerType>(
+ Addr->getType()))->getAddressSpace());
+ llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty,
+ "castToVec4");
+ // Now load value.
+ llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+ // Shuffle vector to get vec3.
+ llvm::SmallVector<llvm::Constant*, 3> Mask;
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 0));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 1));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 2));
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ V = Builder.CreateShuffleVector(LoadVal,
+ llvm::UndefValue::get(vec4Ty),
+ MaskV, "extractVec");
+ return EmitFromMemory(V, Ty);
+ }
+ }
+
llvm::LoadInst *Load = Builder.CreateLoad(Addr);
if (Volatile)
Load->setVolatile(true);
@@ -984,6 +1028,42 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
QualType Ty,
llvm::MDNode *TBAAInfo,
bool isInit) {
+
+ // Handle vectors differently to get better performance.
+ if (Ty->isVectorType()) {
+ llvm::Type *SrcTy = Value->getType();
+ llvm::VectorType *VecTy = cast<llvm::VectorType>(SrcTy);
+ // Handle vec3 special.
+ if (VecTy->getNumElements() == 3) {
+ llvm::LLVMContext &VMContext = getLLVMContext();
+
+ // Our source is a vec3, do a shuffle vector to make it a vec4.
+ llvm::SmallVector<llvm::Constant*, 4> Mask;
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 0));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 1));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 2));
+ Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext)));
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ Value = Builder.CreateShuffleVector(Value,
+ llvm::UndefValue::get(VecTy),
+ MaskV, "extractVec");
+ SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ }
+ llvm::PointerType *DstPtr = cast<llvm::PointerType>(Addr->getType());
+ if (DstPtr->getElementType() != SrcTy) {
+ llvm::Type *MemTy =
+ llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace());
+ Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp");
+ }
+ }
+
Value = EmitToMemory(Value, Ty);
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);