diff options
Diffstat (limited to 'llvm/lib/CodeGen/AtomicExpandPass.cpp')
-rw-r--r-- | llvm/lib/CodeGen/AtomicExpandPass.cpp | 204 |
1 files changed, 129 insertions, 75 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4026022caa07..125a3be585cb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -78,12 +78,14 @@ namespace { StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); + AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); Value * insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, - AtomicOrdering MemOpOrder, + Align AddrAlign, AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); void expandAtomicOpToLLSC( - Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, + Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); void expandPartwordAtomicRMW( AtomicRMWInst *I, @@ -95,8 +97,8 @@ namespace { AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); static Value *insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultType, Value *Addr, - AtomicOrdering MemOpOrder, + IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -234,12 +236,13 @@ bool AtomicExpand::runOnFunction(Function &F) { TLI->shouldExpandAtomicCmpXchgInIR(CASI) == TargetLoweringBase::AtomicExpansionKind::None && (isReleaseOrStronger(CASI->getSuccessOrdering()) || - isAcquireOrStronger(CASI->getSuccessOrdering()))) { + isAcquireOrStronger(CASI->getSuccessOrdering()) || + isAcquireOrStronger(CASI->getFailureOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by // expandAtomicCmpXchg in that case. - FenceOrdering = CASI->getSuccessOrdering(); + FenceOrdering = CASI->getMergedOrdering(); CASI->setSuccessOrdering(AtomicOrdering::Monotonic); CASI->setFailureOrdering(AtomicOrdering::Monotonic); } @@ -280,9 +283,18 @@ bool AtomicExpand::runOnFunction(Function &F) { if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { MadeChange = true; } else { + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + if (Op == AtomicRMWInst::Xchg && + RMWI->getValOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + RMWI = convertAtomicXchgToIntegerType(RMWI); + assert(RMWI->getValOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(RMWI); - AtomicRMWInst::BinOp Op = RMWI->getOperation(); if (ValueSize < MinCASSize && (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || Op == AtomicRMWInst::And)) { @@ -362,13 +374,40 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { return NewLI; } +AtomicRMWInst * +AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { + auto *M = RMWI->getModule(); + Type *NewTy = + getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); + + IRBuilder<> Builder(RMWI); + + Value *Addr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); + Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + Value *NewVal = Builder.CreateBitCast(Val, NewTy); + + auto *NewRMWI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, + RMWI->getAlign(), RMWI->getOrdering()); + NewRMWI->setVolatile(RMWI->isVolatile()); + LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); + + Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + RMWI->replaceAllUsesWith(NewRVal); + RMWI->eraseFromParent(); + return NewRMWI; +} + bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: expandAtomicOpToLLSC( - LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(), + LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), + LI->getOrdering(), [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); return true; case TargetLoweringBase::AtomicExpansionKind::LLOnly: @@ -386,8 +425,8 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // On some architectures, load-linked instructions are atomic for larger // sizes than normal loads. For example, the only 64-bit load guaranteed // to be single-copy atomic by ARM is an ldrexd (A3.5.3). - Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), + LI->getPointerOperand(), LI->getOrdering()); TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); @@ -403,11 +442,11 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { Order = AtomicOrdering::Monotonic; Value *Addr = LI->getPointerOperand(); - Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); + Type *Ty = LI->getType(); Constant *DummyVal = Constant::getNullValue(Ty); Value *Pair = Builder.CreateAtomicCmpXchg( - Addr, DummyVal, DummyVal, Order, + Addr, DummyVal, DummyVal, LI->getAlign(), Order, AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); @@ -454,9 +493,9 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); + AtomicRMWInst *AI = Builder.CreateAtomicRMW( + AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), + SI->getAlign(), SI->getOrdering()); SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. @@ -464,8 +503,8 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { } static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, - Value *Loaded, Value *NewVal, - AtomicOrdering MemOpOrder, + Value *Loaded, Value *NewVal, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { Type *OrigTy = NewVal->getType(); @@ -479,9 +518,9 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Loaded = Builder.CreateBitCast(Loaded, IntTy); } - Value* Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, AddrAlign, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); Success = Builder.CreateExtractValue(Pair, 1, "success"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); @@ -546,7 +585,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { AI->getValOperand()); }; expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), - AI->getOrdering(), PerformOp); + AI->getAlign(), AI->getOrdering(), PerformOp); } return true; } @@ -581,6 +620,7 @@ struct PartwordMaskValues { Type *WordType = nullptr; Type *ValueType = nullptr; Value *AlignedAddr = nullptr; + Align AlignedAddrAlignment; // The remaining fields can be null. Value *ShiftAmt = nullptr; Value *Mask = nullptr; @@ -603,6 +643,7 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { PrintObj(PMV.ValueType); O << " AlignedAddr: "; PrintObj(PMV.AlignedAddr); + O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; O << " ShiftAmt: "; PrintObj(PMV.ShiftAmt); O << " Mask: "; @@ -633,6 +674,7 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, + Align AddrAlign, unsigned MinWordSize) { PartwordMaskValues PMV; @@ -646,6 +688,9 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, : ValueType; if (PMV.ValueType == PMV.WordType) { PMV.AlignedAddr = Addr; + PMV.AlignedAddrAlignment = AddrAlign; + PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); + PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); return PMV; } @@ -654,10 +699,12 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *WordPtrType = PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + // TODO: we could skip some of this if AddrAlign >= MinWordSize. Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); PMV.AlignedAddr = Builder.CreateIntToPtr( Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); + PMV.AlignedAddrAlignment = Align(MinWordSize); Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { @@ -760,12 +807,13 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { AtomicOrdering MemOpOrder = AI->getOrdering(); + SyncScope::ID SSID = AI->getSyncScopeID(); IRBuilder<> Builder(AI); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), @@ -778,13 +826,15 @@ void AtomicExpand::expandPartwordAtomicRMW( Value *OldResult; if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { - OldResult = - insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, - PerformPartwordOp, createCmpXchgInstFun); + OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, + PMV.AlignedAddrAlignment, MemOpOrder, + SSID, PerformPartwordOp, + createCmpXchgInstFun); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, - MemOpOrder, PerformPartwordOp); + PMV.AlignedAddrAlignment, MemOpOrder, + PerformPartwordOp); } Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); @@ -803,7 +853,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), @@ -817,8 +867,9 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { else NewOperand = ValOperand_Shifted; - AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, - NewOperand, AI->getOrdering()); + AtomicRMWInst *NewAI = + Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, + PMV.AlignedAddrAlignment, AI->getOrdering()); Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); @@ -871,8 +922,6 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { IRBuilder<> Builder(CI); LLVMContext &Ctx = Builder.getContext(); - const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8; - BasicBlock *EndBB = BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); auto FailureBB = @@ -884,8 +933,9 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - PartwordMaskValues PMV = createMaskInstrs( - Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize); + PartwordMaskValues PMV = + createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, + CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); // Shift the incoming values over, into the right location in the word. Value *NewVal_Shifted = @@ -909,8 +959,8 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( - PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), - CI->getFailureOrdering(), CI->getSyncScopeID()); + PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, + CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong @@ -953,11 +1003,12 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { } void AtomicExpand::expandAtomicOpToLLSC( - Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder, + Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { IRBuilder<> Builder(I); - Value *Loaded = - insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp); + Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, + MemOpOrder, PerformOp); I->replaceAllUsesWith(Loaded); I->eraseFromParent(); @@ -968,7 +1019,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); // The value operand must be sign-extended for signed min/max so that the // target's signed comparison instructions can be used. Otherwise, just @@ -994,7 +1045,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { PartwordMaskValues PMV = createMaskInstrs( Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Value *CmpVal_Shifted = Builder.CreateShl( Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, @@ -1004,7 +1055,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { "NewVal_Shifted"); Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, - CI->getSuccessOrdering()); + CI->getMergedOrdering()); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); @@ -1017,13 +1068,17 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { } Value *AtomicExpand::insertRMWLLSCLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); + assert(AddrAlign >= + F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) && + "Expected at least natural alignment at this point."); + // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: @@ -1048,7 +1103,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); Value *NewVal = PerformOp(Builder, Loaded); @@ -1082,11 +1137,9 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); - - auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, - CI->getSuccessOrdering(), - CI->getFailureOrdering(), - CI->getSyncScopeID()); + auto *NewCI = Builder.CreateAtomicCmpXchg( + NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), + CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); @@ -1117,8 +1170,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); - AtomicOrdering MemOpOrder = - ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder; + AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic + ? AtomicOrdering::Monotonic + : CI->getMergedOrdering(); // In implementations which use a barrier to achieve release semantics, we can // delay emitting this barrier until we know a store is actually going to be @@ -1211,13 +1265,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { PartwordMaskValues PMV = createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, - TLI->getMinCmpXchgSizeInBits() / 8); + CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); Value *UnreleasedLoad = - TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); Value *UnreleasedLoadExtract = extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( @@ -1250,7 +1304,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + SecondLoad = + TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, CI->getCompareOperand(), "should_store"); @@ -1379,8 +1434,8 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { } Value *AtomicExpand::insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, - AtomicOrdering MemOpOrder, + IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, CreateCmpXchgInstFun CreateCmpXchg) { LLVMContext &Ctx = Builder.getContext(); @@ -1411,9 +1466,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); - // Atomics require at least natural alignment. - InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8)); + LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1426,11 +1479,11 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Value *NewLoaded = nullptr; Value *Success = nullptr; - CreateCmpXchg(Builder, Addr, Loaded, NewVal, + CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder == AtomicOrdering::Unordered ? AtomicOrdering::Monotonic : MemOpOrder, - Success, NewLoaded); + SSID, Success, NewLoaded); assert(Success && NewLoaded); Loaded->addIncoming(NewLoaded, LoopBB); @@ -1466,7 +1519,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { IRBuilder<> Builder(AI); Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( - Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(), + Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), + AI->getOrdering(), AI->getSyncScopeID(), [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); @@ -1614,20 +1668,20 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { // libcalls (add/sub/etc) and we needed a generic. So, expand to a // CAS libcall, via a CAS loop, instead. if (!Success) { - expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr, - Value *Loaded, Value *NewVal, - AtomicOrdering MemOpOrder, - Value *&Success, Value *&NewLoaded) { - // Create the CAS instruction normally... - AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); - Success = Builder.CreateExtractValue(Pair, 1, "success"); - NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); - - // ...and then expand the CAS into a libcall. - expandAtomicCASToLibcall(Pair); - }); + expandAtomicRMWToCmpXchg( + I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, + Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { + // Create the CAS instruction normally... + AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, Alignment, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + + // ...and then expand the CAS into a libcall. + expandAtomicCASToLibcall(Pair); + }); } } |