diff options
Diffstat (limited to 'lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 198 |
1 files changed, 139 insertions, 59 deletions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index b70de00db04b..c661c7b87dcb 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -177,15 +177,21 @@ void LoopAccessInfo::RuntimePointerCheck::print( } } -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( +unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( const SmallVectorImpl<int> *PtrPartition) const { unsigned NumPointers = Pointers.size(); + unsigned CheckCount = 0; for (unsigned I = 0; I < NumPointers; ++I) for (unsigned J = I + 1; J < NumPointers; ++J) if (needsChecking(I, J, PtrPartition)) - return true; - return false; + CheckCount++; + return CheckCount; +} + +bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( + const SmallVectorImpl<int> *PtrPartition) const { + return getNumberOfChecks(PtrPartition) != 0; } namespace { @@ -220,10 +226,11 @@ public: } /// \brief Check whether we can check the pointers at runtime for - /// non-intersection. + /// non-intersection. Returns true when we have 0 pointers + /// (a check on 0 pointers for non-intersection will always return true). bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - unsigned &NumComparisons, ScalarEvolution *SE, - Loop *TheLoop, const ValueToValueMap &Strides, + bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -289,29 +296,23 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -/// \brief Check the stride of the pointer and ensure that it does not wrap in -/// the address space. -static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap); - bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, + LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; + NeedRTCheck = false; + if (!IsRTCheckNeeded) return true; + bool IsDepCheckNeeded = isDependencyCheckNeeded(); - NumComparisons = 0; // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. unsigned ASId = 1; for (auto &AS : AST) { - unsigned NumReadPtrChecks = 0; - unsigned NumWritePtrChecks = 0; - // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; @@ -322,11 +323,6 @@ bool AccessAnalysis::canCheckPtrAtRT( bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); MemAccessInfo Access(Ptr, IsWrite); - if (IsWrite) - ++NumWritePtrChecks; - else - ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. @@ -354,16 +350,15 @@ bool AccessAnalysis::canCheckPtrAtRT( } } - if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) - NumComparisons += 0; // Only one dependence set. - else { - NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + - NumWritePtrChecks - 1)); - } - ++ASId; } + // We need a runtime check if there are any accesses that need checking. + // However, some accesses cannot be checked (for example because we + // can't determine their bounds). In these cases we would need a check + // but wouldn't be able to add it. + NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr); + // If the pointers that we would use for the bounds comparison have different // address spaces, assume the values aren't directly comparable, so we can't // use them for the runtime check. We also have to assume they could @@ -510,8 +505,8 @@ static bool isInBoundsGep(Value *Ptr) { } /// \brief Check whether the access through \p Ptr has a constant stride. -static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { +int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap) { const Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -678,6 +673,42 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, return false; } +/// \brief Check the dependence for two accesses with the same stride \p Stride. +/// \p Distance is the positive distance and \p TypeByteSize is type size in +/// bytes. +/// +/// \returns true if they are independent. +static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, + unsigned TypeByteSize) { + assert(Stride > 1 && "The stride must be greater than 1"); + assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); + assert(Distance > 0 && "The distance must be non-zero"); + + // Skip if the distance is not multiple of type byte size. + if (Distance % TypeByteSize) + return false; + + unsigned ScaledDist = Distance / TypeByteSize; + + // No dependence if the scaled distance is not multiple of the stride. + // E.g. + // for (i = 0; i < 1024 ; i += 4) + // A[i+2] = A[i] + 1; + // + // Two accesses in memory (scaled distance is 2, stride is 4): + // | A[0] | | | | A[4] | | | | + // | | | A[2] | | | | A[6] | | + // + // E.g. + // for (i = 0; i < 1024 ; i += 3) + // A[i+4] = A[i] + 1; + // + // Two accesses in memory (scaled distance is 4, stride is 3): + // | A[0] | | | A[3] | | | A[6] | | | + // | | | | | A[4] | | | A[7] | | + return ScaledDist % Stride; +} + MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, unsigned BIdx, @@ -778,34 +809,87 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned Distance = (unsigned) Val.getZExtValue(); + unsigned Stride = std::abs(StrideAPtr); + if (Stride > 1 && + areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) + return Dependence::NoDep; + // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? VectorizerParams::VectorizationInterleave : 1); + // The minimum number of iterations for a vectorized/unrolled version. + unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); + + // It's not vectorizable if the distance is smaller than the minimum distance + // needed for a vectroized/unrolled version. Vectorizing one iteration in + // front needs TypeByteSize * Stride. Vectorizing the last iteration needs + // TypeByteSize (No need to plus the last gap distance). + // + // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. + // foo(int *A) { + // int *B = (int *)((char *)A + 14); + // for (i = 0 ; i < 1024 ; i += 2) + // B[i] = A[i] + 1; + // } + // + // Two accesses in memory (stride is 2): + // | A[0] | | A[2] | | A[4] | | A[6] | | + // | B[0] | | B[2] | | B[4] | + // + // Distance needs for vectorizing iterations except the last iteration: + // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4. + // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. + // + // If MinNumIter is 2, it is vectorizable as the minimum distance needed is + // 12, which is less than distance. + // + // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), + // the minimum distance needed is 28, which is greater than distance. It is + // not safe to do vectorization. + unsigned MinDistanceNeeded = + TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; + if (MinDistanceNeeded > Distance) { + DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance + << '\n'); + return Dependence::Backward; + } - // The distance must be bigger than the size needed for a vectorized version - // of the operation and the size of the vectorized operation must not be - // bigger than the currrent maximum size. - if (Distance < 2*TypeByteSize || - 2*TypeByteSize > MaxSafeDepDistBytes || - Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { - DEBUG(dbgs() << "LAA: Failure because of Positive distance " - << Val.getSExtValue() << '\n'); + // Unsafe if the minimum distance needed is greater than max safe distance. + if (MinDistanceNeeded > MaxSafeDepDistBytes) { + DEBUG(dbgs() << "LAA: Failure because it needs at least " + << MinDistanceNeeded << " size in bytes"); return Dependence::Backward; } // Positive distance bigger than max vectorization factor. - MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? - Distance : MaxSafeDepDistBytes; + // FIXME: Should use max factor instead of max distance in bytes, which could + // not handle different types. + // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. + // void foo (int *A, char *B) { + // for (unsigned i = 0; i < 1024; i++) { + // A[i+2] = A[i] + 1; + // B[i+2] = B[i] + 1; + // } + // } + // + // This case is currently unsafe according to the max safe distance. If we + // analyze the two accesses on array B, the max safe dependence distance + // is 2. Then we analyze the accesses on array A, the minimum distance needed + // is 8, which is less than 2 and forbidden vectorization, But actually + // both A and B could be vectorized by 2 iterations. + MaxSafeDepDistBytes = + Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes; bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; - DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << - " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); + DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() + << " with max VF = " + << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n'); return Dependence::BackwardVectorizable; } @@ -1066,7 +1150,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (Seen.insert(Ptr).second) { ++NumReadWrites; - AliasAnalysis::Location Loc = AA->getLocation(ST); + AliasAnalysis::Location Loc = MemoryLocation::get(ST); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. @@ -1102,7 +1186,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { IsReadOnlyPtr = true; } - AliasAnalysis::Location Loc = AA->getLocation(LD); + AliasAnalysis::Location Loc = MemoryLocation::get(LD); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. @@ -1123,22 +1207,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Build dependence sets and check whether we need a runtime pointer bounds // check. Accesses.buildDependenceSets(); - bool NeedRTCheck = Accesses.isRTCheckNeeded(); // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRT = false; - if (NeedRTCheck) - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, - Strides); - - DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << - " pointer comparisons.\n"); + bool NeedRTCheck; + bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, + NeedRTCheck, SE, + TheLoop, Strides); - // If we only have one set of dependences to check pointers among we don't - // need a runtime check. - if (NumComparisons == 0 && NeedRTCheck) - NeedRTCheck = false; + DEBUG(dbgs() << "LAA: We need to do " + << PtrRtCheck.getNumberOfChecks(nullptr) + << " pointer comparisons.\n"); // Check that we found the bounds for the pointer. if (CanDoRT) @@ -1171,10 +1250,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtCheck.reset(); PtrRtCheck.Need = true; - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE, TheLoop, Strides, true); + // Check that we found the bounds for the pointer. - if (!CanDoRT && NumComparisons > 0) { + if (NeedRTCheck && !CanDoRT) { emitAnalysis(LoopAccessReport() << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); @@ -1319,7 +1399,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), + : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { |