aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp598
1 files changed, 257 insertions, 341 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7d6349cf4e77..c6d9123d6611 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -129,11 +129,11 @@ namespace {
AllocaInfo &Info);
void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
- bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
- uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
- const Type *&IdxTy);
+ bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
@@ -145,6 +145,9 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
@@ -253,7 +256,7 @@ class ConvertToScalarInfo {
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
- const VectorType *VectorTy;
+ VectorType *VectorTy;
/// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
@@ -269,11 +272,11 @@ public:
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
- bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
+ void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
+ bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
- Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+ Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, IRBuilder<> &Builder);
@@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
if (ScalarKind == Unknown)
ScalarKind = Integer;
- // FIXME: It should be possible to promote the vector type up to the alloca's
- // size.
if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
ScalarKind = Integer;
@@ -306,7 +307,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- const Type *NewTy;
+ Type *NewTy;
if (ScalarKind == Vector) {
assert(VectorTy && "Missing type for vector scalar.");
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
@@ -331,20 +332,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// (VectorTy) so far at the offset specified by Offset (which is specified in
/// bytes).
///
-/// There are three cases we handle here:
+/// There are two cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
-/// <2 x float> and <4 x float>. Here we turn element accesses into insert
-/// and extract element operations, and <2 x float> accesses into a cast to
-/// <2 x double>, an extract, and a cast back to <2 x float>.
-/// 3) A fully general blob of memory, which we turn into some (potentially
+/// 2) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
uint64_t Offset) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
@@ -355,7 +352,7 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
- if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
if (MergeInVectorType(VInTy, Offset))
return;
} else if (In->isFloatTy() || In->isDoubleTy() ||
@@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
+ (!VectorTy || EltSize == VectorTy->getElementType()
+ ->getPrimitiveSizeInBits()/8)) {
if (!VectorTy) {
ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
}
-
- unsigned CurrentEltSize = VectorTy->getElementType()
- ->getPrimitiveSizeInBits()/8;
- if (EltSize == CurrentEltSize)
- return;
-
- if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
- return;
+ return;
}
}
@@ -395,74 +385,21 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
/// returning true if the type was successfully merged and false otherwise.
-bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
uint64_t Offset) {
- // TODO: Support nonzero offsets?
- if (Offset != 0)
- return false;
-
- // Only allow vectors that are a power-of-2 away from the size of the alloca.
- if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
- return false;
-
- // If this the first vector we see, remember the type so that we know the
- // element size.
- if (!VectorTy) {
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size. If this is a subsequent access, ignore it
+ // even if it is a differing type but the same size. Worst case we can
+ // bitcast the resultant vectors.
+ if (!VectorTy)
+ VectorTy = VInTy;
ScalarKind = Vector;
- VectorTy = VInTy;
return true;
}
- unsigned BitWidth = VectorTy->getBitWidth();
- unsigned InBitWidth = VInTy->getBitWidth();
-
- // Vectors of the same size can be converted using a simple bitcast.
- if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
- ScalarKind = Vector;
- return true;
- }
-
- const Type *ElementTy = VectorTy->getElementType();
- const Type *InElementTy = VInTy->getElementType();
-
- // Do not allow mixed integer and floating-point accesses from vectors of
- // different sizes.
- if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
- return false;
-
- if (ElementTy->isFloatingPointTy()) {
- // Only allow floating-point vectors of different sizes if they have the
- // same element type.
- // TODO: This could be loosened a bit, but would anything benefit?
- if (ElementTy != InElementTy)
- return false;
-
- // There are no arbitrary-precision floating-point types, which limits the
- // number of legal vector types with larger element types that we can form
- // to bitcast and extract a subvector.
- // TODO: We could support some more cases with mixed fp128 and double here.
- if (!(BitWidth == 64 || BitWidth == 128) ||
- !(InBitWidth == 64 || InBitWidth == 128))
- return false;
- } else {
- assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
- "or floating-point.");
- unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
- unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
-
- // Do not allow integer types smaller than a byte or types whose widths are
- // not a multiple of a byte.
- if (BitWidth < 8 || InBitWidth < 8 ||
- BitWidth % 8 != 0 || InBitWidth % 8 != 0)
- return false;
- }
-
- // Pick the largest of the two vector types.
- ScalarKind = Vector;
- if (InBitWidth > BitWidth)
- VectorTy = VInTy;
-
- return true;
+ return false;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
@@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
@@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
- if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
@@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
- IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
return false;
continue;
@@ -516,7 +454,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
@@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
continue;
}
+ // If this is a lifetime intrinsic, we can handle it.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ continue;
+ }
+ }
+
// Otherwise, we cannot handle this!
return false;
}
@@ -589,7 +535,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
GEP->eraseFromParent();
continue;
@@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
- Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
LI->replaceAllUsesWith(NewLoadVal);
@@ -668,8 +614,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
- const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
- const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
SPTy->getAddressSpace());
@@ -685,8 +631,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
- const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
- const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
DPTy->getAddressSpace());
@@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
continue;
}
- llvm_unreachable("Unsupported operation!");
- }
-}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ // There's no need to preserve these, as the resulting alloca will be
+ // converted to a register anyways.
+ II->eraseFromParent();
+ continue;
+ }
+ }
-/// getScaledElementType - Gets a scaled element type for a partial vector
-/// access of an alloca. The input types must be integer or floating-point
-/// scalar or vector types, and the resulting type is an integer, float or
-/// double.
-static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
- unsigned NewBitWidth) {
- bool IsFP1 = Ty1->isFloatingPointTy() ||
- (Ty1->isVectorTy() &&
- cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
- bool IsFP2 = Ty2->isFloatingPointTy() ||
- (Ty2->isVectorTy() &&
- cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
-
- LLVMContext &Context = Ty1->getContext();
-
- // Prefer floating-point types over integer types, as integer types may have
- // been created by earlier scalar replacement.
- if (IsFP1 || IsFP2) {
- if (NewBitWidth == 32)
- return Type::getFloatTy(Context);
- if (NewBitWidth == 64)
- return Type::getDoubleTy(Context);
+ llvm_unreachable("Unsupported operation!");
}
-
- return Type::getIntNTy(Context, NewBitWidth);
-}
-
-/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
-/// to another vector of the same element type which has the same allocation
-/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
-static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
- IRBuilder<> &Builder) {
- const Type *FromType = FromVal->getType();
- const VectorType *FromVTy = cast<VectorType>(FromType);
- const VectorType *ToVTy = cast<VectorType>(ToType);
- assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
- "Vectors must have the same element type");
- Value *UnV = UndefValue::get(FromType);
- unsigned numEltsFrom = FromVTy->getNumElements();
- unsigned numEltsTo = ToVTy->getNumElements();
-
- SmallVector<Constant*, 3> Args;
- const Type* Int32Ty = Builder.getInt32Ty();
- unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
- unsigned i;
- for (i=0; i != minNumElts; ++i)
- Args.push_back(ConstantInt::get(Int32Ty, i));
-
- if (i < numEltsTo) {
- Constant* UnC = UndefValue::get(Int32Ty);
- for (; i != numEltsTo; ++i)
- Args.push_back(UnC);
- }
- Constant *Mask = ConstantVector::get(Args);
- return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
}
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
@@ -775,50 +674,20 @@ static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
Value *ConvertToScalarInfo::
-ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
- const Type *FromType = FromVal->getType();
+ Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
- if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
- if (FromTypeSize == ToTypeSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (FromType->getPrimitiveSizeInBits() ==
- ToType->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
- else
- return CreateShuffleVectorCast(FromVal, ToType, Builder);
- }
-
- if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
- assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
- "of a smaller vector type at a nonzero offset.");
-
- const Type *CastElementTy = getScaledElementType(FromType, ToType,
- ToTypeSize * 8);
- unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
-
- LLVMContext &Context = FromVal->getContext();
- const Type *CastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Extract, ToType, "tmp");
- }
+ if (FromTypeSize == ToTypeSize)
+ return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -828,40 +697,39 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
- Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
- Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+ Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
if (V->getType() != ToType)
- V = Builder.CreateBitCast(V, ToType, "tmp");
+ V = Builder.CreateBitCast(V, ToType);
return V;
}
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
- if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+ if (StructType *ST = dyn_cast<StructType>(ToType)) {
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
- if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
// Otherwise, this must be a union that was converted to an integer value.
- const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+ IntegerType *NTy = cast<IntegerType>(FromVal->getType());
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
@@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateLShr(FromVal,
- ConstantInt::get(FromVal->getType(),
- ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), ShAmt));
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateShl(FromVal,
- ConstantInt::get(FromVal->getType(),
- -ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
// If the result is an integer, this is a trunc or bitcast.
if (ToType->isIntegerTy()) {
// Should be done.
} else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
- FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateBitCast(FromVal, ToType);
} else {
// Otherwise must be a pointer.
- FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType);
}
assert(FromVal->getType() == ToType && "Didn't convert right?");
return FromVal;
@@ -927,65 +793,30 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t Offset, IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
- const Type *AllocaType = Old->getType();
+ Type *AllocaType = Old->getType();
LLVMContext &Context = Old->getContext();
- if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (VTy->getPrimitiveSizeInBits() ==
- SV->getType()->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
- else
- return CreateShuffleVectorCast(SV, VTy, Builder);
- }
-
- if (isPowerOf2_64(VecSize / ValSize)) {
- assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
- "value of a smaller vector type at a nonzero offset.");
-
- const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
- ValSize);
- unsigned NumCastVectorElements = VecSize / ValSize;
-
- LLVMContext &Context = SV->getContext();
- const Type *OldCastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
-
- Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Insert =
- Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Insert, AllocaType, "tmp");
- }
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType);
// Must be an element insertion.
assert(SV->getType() == VTy->getElementType());
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
unsigned Elt = Offset/EltSize;
- return Builder.CreateInsertElement(Old, SV,
- ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
- "tmp");
+ return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
}
// If SV is a first-class aggregate value, insert each value recursively.
- if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
@@ -993,10 +824,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
return Old;
}
- if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
}
return Old;
@@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
- SV = Builder.CreateBitCast(SV,
- IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+ SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
if (SV->getType()->getPrimitiveSizeInBits() <
AllocaType->getPrimitiveSizeInBits())
- SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ SV = Builder.CreateZExt(SV, AllocaType);
else {
// Truncation may be needed if storing more than the alloca can hold
// (undefined behavior).
- SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SV = Builder.CreateTrunc(SV, AllocaType);
SrcWidth = DestWidth;
SrcStoreWidth = DestStoreWidth;
}
@@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// only some bits in the structure are set.
APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
- SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
- ShAmt), "tmp");
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
Mask <<= ShAmt;
} else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
- SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
- -ShAmt), "tmp");
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
Mask = Mask.lshr(-ShAmt);
}
@@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
// trapping load in the predecessor if it is a critical edge.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the terminator of the predecessor has side-effects (an invoke),
+ // there is no safe place to put a load in the predecessor.
+ if (Pred->getTerminator()->mayHaveSideEffects())
+ return false;
+
+ // If the value is produced by the terminator of the predecessor
+ // (an invoke), there is no valid place to put a load in the predecessor.
+ if (Pred->getTerminator() == InVal)
+ return false;
// If the predecessor has a single successor, then the edge isn't critical.
if (Pred->getTerminator()->getNumSuccessors() == 1)
continue;
-
- Value *InVal = PN->getIncomingValue(i);
-
- // If the InVal is an invoke in the pred, we can't put a load on the edge.
- if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
- if (II->getParent() == Pred)
- return false;
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
@@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
UI != UE; ++UI) {
User *U = *UI;
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI || SI->isVolatile())
+ if (SI->getOperand(0) == AI || !SI->isSimple())
return false; // Don't allow a store OF the AI, only INTO the AI.
continue;
}
@@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (onlyUsedByLifetimeMarkers(BCI)) {
+ InstsToRewrite.insert(BCI);
+ continue;
+ }
+ }
+
return false;
}
@@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// If we have instructions that need to be rewritten for this to be promotable
// take care of it now.
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
+ // This could only be a bitcast used by nothing but lifetime intrinsics.
+ for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
+ I != E;) {
+ Use &U = I.getUse();
+ ++I;
+ cast<Instruction>(U.getUser())->eraseFromParent();
+ }
+ BCI->eraseFromParent();
+ continue;
+ }
+
if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
// Selects in InstsToRewrite only have load uses. Rewrite each as two
// loads with a new select.
@@ -1393,7 +1244,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
- const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
PN->getName()+".ld", PN);
@@ -1483,13 +1334,13 @@ bool SROA::performPromotion(Function &F) {
/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
/// SROA. It must be a struct or array type with a small number of elements.
static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
- const Type *T = AI->getAllocatedType();
+ Type *T = AI->getAllocatedType();
// Do not promote any struct into more than 32 separate vars.
- if (const StructType *ST = dyn_cast<StructType>(T))
+ if (StructType *ST = dyn_cast<StructType>(T))
return ST->getNumElements() <= 32;
// Arrays are much less likely to be safe for SROA; only consider
// them if they are very small.
- if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
return AT->getNumElements() <= 8;
return false;
}
@@ -1594,7 +1445,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList) {
DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
- if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
@@ -1604,9 +1455,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
WorkList.push_back(NA); // Add to worklist for recursive processing
}
} else {
- const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
ElementAllocas.reserve(AT->getNumElements());
- const Type *ElTy = AT->getElementType();
+ Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
@@ -1670,22 +1521,26 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
UI.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
- const Type *SIType = SI->getOperand(0)->getType();
+ Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return MarkUnsafe(Info, User);
} else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
@@ -1725,19 +1580,19 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
- const Type *SIType = SI->getOperand(0)->getType();
+ Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
@@ -1776,8 +1631,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
MarkUnsafe(Info, GEPI);
}
@@ -1786,14 +1640,14 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
/// elements of the same type (which is always true for arrays). If so,
/// return true with NumElts and EltTy set to the number of elements and the
/// element type, respectively.
-static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
- const Type *&EltTy) {
- if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
+ Type *&EltTy) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
EltTy = (NumElts == 0 ? 0 : AT->getElementType());
return true;
}
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
@@ -1807,12 +1661,12 @@ static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
/// "homogeneous" aggregates with the same element type and number of elements.
-static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+static bool isCompatibleAggregate(Type *T1, Type *T2) {
if (T1 == T2)
return true;
unsigned NumElts1, NumElts2;
- const Type *EltTy1, *EltTy2;
+ Type *EltTy1, *EltTy2;
if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
NumElts1 == NumElts2 &&
@@ -1830,7 +1684,7 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
/// unit. If false, it only allows accesses known to be in a single element.
void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore,
+ Type *MemOpType, bool isStore,
AllocaInfo &Info, Instruction *TheAccess,
bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
@@ -1857,7 +1711,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
}
}
// Check if the offset/size correspond to a component within the alloca type.
- const Type *T = Info.AI->getAllocatedType();
+ Type *T = Info.AI->getAllocatedType();
if (TypeHasComponent(T, Offset, MemSize)) {
Info.hasSubelementAccess = true;
return;
@@ -1868,16 +1722,16 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
/// TypeHasComponent - Return true if T has a component type with the
/// specified offset and size. If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
- const Type *EltTy;
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+ Type *EltTy;
uint64_t EltSize;
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
const StructLayout *Layout = TD->getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
EltSize = TD->getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
EltSize = TD->getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
@@ -1924,9 +1778,17 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// address operand will be updated, so nothing else needs to be done.
continue;
}
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
+ }
+ continue;
+ }
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
@@ -1956,7 +1818,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
- const Type *SIType = Val->getType();
+ Type *SIType = Val->getType();
if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// Replace:
// store { i32, i32 } %val, { i32, i32 }* %alloc
@@ -2026,10 +1888,10 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
/// Sets T to the type of the element and Offset to the offset within that
/// element. IdxTy is set to the type of the index result to be used in a
/// GEP instruction.
-uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
- const Type *&IdxTy) {
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy) {
uint64_t Idx = 0;
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
const StructLayout *Layout = TD->getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
@@ -2037,7 +1899,7 @@ uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
IdxTy = Type::getInt32Ty(T->getContext());
return Idx;
}
- const ArrayType *AT = cast<ArrayType>(T);
+ ArrayType *AT = cast<ArrayType>(T);
T = AT->getElementType();
uint64_t EltSize = TD->getTypeAllocSize(T);
Idx = Offset / EltSize;
@@ -2053,13 +1915,12 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
uint64_t OldOffset = Offset;
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
- const Type *T = AI->getAllocatedType();
- const Type *IdxTy;
+ Type *T = AI->getAllocatedType();
+ Type *IdxTy;
uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
if (GEPI->getOperand(0) == AI)
OldIdx = ~0ULL; // Force the GEP to be rewritten.
@@ -2073,7 +1934,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
if (Idx == OldIdx)
return;
- const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ Type *i32Ty = Type::getInt32Ty(AI->getContext());
SmallVector<Value*, 8> NewArgs;
NewArgs.push_back(Constant::getNullValue(i32Ty));
while (EltOffset != 0) {
@@ -2082,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
}
Instruction *Val = NewElts[Idx];
if (NewArgs.size() > 1) {
- Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
- NewArgs.end(), "", GEPI);
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
Val->takeName(GEPI);
}
if (Val->getType() != GEPI->getType())
@@ -2092,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
DeadInsts.push_back(GEPI);
}
+/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
+/// to mark the lifetime of the scalarized memory.
+void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
+ // Put matching lifetime markers on everything from Offset up to
+ // Offset+OldSize.
+ Type *AIType = AI->getAllocatedType();
+ uint64_t NewOffset = Offset;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+
+ IRBuilder<> Builder(II);
+ uint64_t Size = OldSize->getLimitedValue();
+
+ if (NewOffset) {
+ // Splice the first element and index 'NewOffset' bytes in. SROA will
+ // split the alloca again later.
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
+ ++Idx;
+ }
+
+ for (; Idx != NewElts.size() && Size; ++Idx) {
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ }
+ DeadInsts.push_back(II);
+}
+
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
@@ -2139,7 +2055,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If the pointer is not the right type, insert a bitcast to the right
// type.
- const Type *NewTy =
+ Type *NewTy =
PointerType::get(AI->getType()->getElementType(), AddrSpace);
if (OtherPtr->getType() != NewTy)
@@ -2159,16 +2075,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
OtherPtr->getName()+"."+Twine(i),
MI);
uint64_t EltOffset;
- const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
- const Type *OtherTy = OtherPtrTy->getElementType();
- if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+ PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ Type *OtherTy = OtherPtrTy->getElementType();
+ if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
} else {
- const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+ Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
EltOffset = TD->getTypeAllocSize(EltTy)*i;
}
@@ -2181,7 +2097,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
}
Value *EltPtr = NewElts[i];
- const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+ Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
// If we got down to a scalar, insert a load or store as appropriate.
if (EltTy->isSingleValueType()) {
@@ -2207,7 +2123,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// If EltTy is a vector type, get the element type.
- const Type *ValTy = EltTy->getScalarType();
+ Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
unsigned EltSize = TD->getTypeSizeInBits(ValTy);
@@ -2228,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
assert(StoreVal->getType() == ValTy && "Type mismatch!");
// If the requested value was a vector constant, create it.
- if (EltTy != ValTy) {
- unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ if (EltTy->isVectorTy()) {
+ unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
StoreVal = ConstantVector::get(Elts);
}
@@ -2271,7 +2187,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// Extract each element out of the integer according to its structure offset
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
- const Type *AllocaEltTy = AI->getAllocatedType();
+ Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
@@ -2286,12 +2202,12 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
const StructLayout *Layout = TD->getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
- const Type *FieldTy = EltSTy->getElementType(i);
+ Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
if (TD->isBigEndian())
@@ -2327,8 +2243,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
} else {
- const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
- const Type *ArrayEltTy = ATy->getElementType();
+ ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ Type *ArrayEltTy = ATy->getElementType();
uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
@@ -2384,7 +2300,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
- const Type *AllocaEltTy = AI->getAllocatedType();
+ Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
@@ -2394,10 +2310,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
uint64_t ArrayEltBitOffset = 0;
- if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = TD->getStructLayout(EltSTy);
} else {
- const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
}
@@ -2408,14 +2324,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
Value *SrcField = NewElts[i];
- const Type *FieldTy =
+ Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
- const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+ IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
!FieldTy->isVectorTy())
@@ -2468,14 +2384,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding in between the elements that would be split apart
/// by SROA; return false otherwise.
-static bool HasPadding(const Type *Ty, const TargetData &TD) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+static bool HasPadding(Type *Ty, const TargetData &TD) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
// SROA currently handles only Arrays and Structs.
- const StructType *STy = cast<StructType>(Ty);
+ StructType *STy = cast<StructType>(Ty);
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -2530,7 +2446,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// and fusion code.
if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
// If the struct/array just has one element, use basic SRoA.
- if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
if (ST->getNumElements() > 1) return false;
} else {
if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
@@ -2576,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (LI->isVolatile()) return false;
+ if (!LI->isSimple()) return false;
continue;
}