diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
40 files changed, 296 insertions, 221 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 72b6dfa181e8..8dcffe45c644 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4322,6 +4322,10 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (match(I, m_Intrinsic<Intrinsic::is_constant>())) return nullptr; + // Don't simplify freeze. + if (isa<FreezeInst>(I)) + return nullptr; + // Replace Op with RepOp in instruction operands. SmallVector<Value *, 8> NewOps; bool AnyReplaced = false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index 8ee1f19e083e..1cca56fc19cf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -8154,6 +8154,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, IRBuilder<> Builder(Branch); if (UI->getParent() != Branch->getParent()) UI->moveBefore(Branch); + UI->dropPoisonGeneratingFlags(); Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, ConstantInt::get(UI->getType(), 0)); LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); @@ -8167,6 +8168,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, IRBuilder<> Builder(Branch); if (UI->getParent() != Branch->getParent()) UI->moveBefore(Branch); + UI->dropPoisonGeneratingFlags(); Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, ConstantInt::get(UI->getType(), 0)); LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 772229215e79..61ddc858ba44 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -591,8 +591,8 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, UseMI.getOpcode() == TargetOpcode::G_ZEXT || (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { const auto &MMO = LoadMI->getMMO(); - // For atomics, only form anyextending loads. - if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT) + // Don't do anything for atomics. + if (MMO.isAtomic()) continue; // Check for legality. if (!isPreLegalize()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c0c22e36004f..47d045ac4817 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4180,6 +4180,10 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, } } + // Set the insert point after the existing PHIs + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); + // Merge small outputs into MI's def. if (NumLeftovers) { mergeMixedSubvectors(MI.getReg(0), OutputRegs); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 246aa88b09ac..ee499c41c558 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr, MachineRegisterInfo &MRI) { BaseIndexOffset Info; Register PtrAddRHS; - if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) { - Info.BaseReg = Ptr; - Info.IndexReg = Register(); - Info.IsIndexSignExt = false; + Register BaseReg; + if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) { + Info.setBase(Ptr); + Info.setOffset(0); return Info; } - + Info.setBase(BaseReg); auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI); if (RHSCst) - Info.Offset = RHSCst->Value.getSExtValue(); + Info.setOffset(RHSCst->Value.getSExtValue()); // Just recognize a simple case for now. In future we'll need to match // indexing patterns for base + index + constant. - Info.IndexReg = PtrAddRHS; - Info.IsIndexSignExt = false; + Info.setIndex(PtrAddRHS); return Info; } @@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI); BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI); - if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid()) + if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid()) return false; int64_t Size1 = LdSt1->getMemSize(); int64_t Size2 = LdSt2->getMemSize(); int64_t PtrDiff; - if (BasePtr0.BaseReg == BasePtr1.BaseReg) { - PtrDiff = BasePtr1.Offset - BasePtr0.Offset; + if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() && + BasePtr1.hasValidOffset()) { + PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset(); // If the size of memory access is unknown, do not use it to do analysis. // One example of unknown size memory access is to load/store scalable // vector objects on the stack. @@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, // able to calculate their relative offset if at least one arises // from an alloca. However, these allocas cannot overlap and we // can infer there is no alias. - auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI); - auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI); + auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI); + auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI); if (!Base0Def || !Base1Def) return false; // Couldn't tell anything. @@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, Register StoreAddr = StoreMI.getPointerReg(); auto BIO = getPointerInfo(StoreAddr, *MRI); - Register StoreBase = BIO.BaseReg; - uint64_t StoreOffCst = BIO.Offset; + Register StoreBase = BIO.getBase(); if (C.Stores.empty()) { + C.BasePtr = StoreBase; + if (!BIO.hasValidOffset()) { + C.CurrentLowestOffset = 0; + } else { + C.CurrentLowestOffset = BIO.getOffset(); + } // This is the first store of the candidate. // If the offset can't possibly allow for a lower addressed store with the // same base, don't bother adding it. - if (StoreOffCst < ValueTy.getSizeInBytes()) + if (BIO.hasValidOffset() && + BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes())) return false; - C.BasePtr = StoreBase; - C.CurrentLowestOffset = StoreOffCst; C.Stores.emplace_back(&StoreMI); LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: " << StoreMI); @@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, // writes to the next lowest adjacent address. if (C.BasePtr != StoreBase) return false; - if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) != - static_cast<uint64_t>(StoreOffCst)) + // If we don't have a valid offset, we can't guarantee to be an adjacent + // offset. + if (!BIO.hasValidOffset()) + return false; + if ((C.CurrentLowestOffset - + static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset()) return false; // This writes to an adjacent address. Allow it. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index f2d5c3c867c2..bbb0b654dc67 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -877,6 +877,9 @@ public: if (LI->isAtomic()) return false; + if (!DL.typeSizeEqualsStoreSize(Result.VTy->getElementType())) + return false; + // Get the base polynomial computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e806e0f0731f..5038f8a1fc15 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9636,8 +9636,15 @@ static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) return false; + // The fold is not valid if the sum of the shift values doesn't fit in the + // given shift amount type. + bool Overflow = false; + APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow); + if (Overflow) + return false; + // The fold is not valid if the sum of the shift values exceeds bitwidth. - if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + if (NewShiftAmt.uge(V.getScalarValueSizeInBits())) return false; return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5ce1013f30fd..7406a8ac1611 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10888,7 +10888,7 @@ static void tryToElideArgumentCopy( } // Perform the elision. Delete the old stack object and replace its only use - // in the variable info map. Mark the stack object as mutable. + // in the variable info map. Mark the stack object as mutable and aliased. LLVM_DEBUG({ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' << " Replacing frame index " << OldIndex << " with " << FixedIndex @@ -10896,6 +10896,7 @@ static void tryToElideArgumentCopy( }); MFI.RemoveStackObject(OldIndex); MFI.setIsImmutableObjectIndex(FixedIndex, false); + MFI.setIsAliasedObjectIndex(FixedIndex, true); AllocaIndex = FixedIndex; ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); for (SDValue ArgVal : ArgVals) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 55c5bbc66a3f..862aefe46193 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -181,13 +181,14 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( } for (unsigned E = FT->getNumParams(); I != E; ++I) { - Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne(); #if 0 // FIXME: Need more information about argument size; see // https://reviews.llvm.org/D132926 uint64_t ArgSizeBytes = AttrList.getParamArm64ECArgSizeBytes(I); + Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne(); #else uint64_t ArgSizeBytes = 0; + Align ParamAlign = Align(); #endif Type *Arm64Ty, *X64Ty; canonicalizeThunkType(FT->getParamType(I), ParamAlign, @@ -297,7 +298,7 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes; if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) { Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes; - if (Alignment.value() >= 8 && !T->isPointerTy()) + if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); Arm64Ty = T; if (TotalSizeBytes <= 8) { @@ -328,7 +329,7 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( Out << "m"; if (TypeSize != 4) Out << TypeSize; - if (Alignment.value() >= 8 && !T->isPointerTy()) + if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); // FIXME: Try to canonicalize Arm64Ty more thoroughly? Arm64Ty = T; @@ -513,7 +514,14 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { // Call the function passed to the thunk. Value *Callee = Thunk->getArg(0); Callee = IRB.CreateBitCast(Callee, PtrTy); - Value *Call = IRB.CreateCall(Arm64Ty, Callee, Args); + CallInst *Call = IRB.CreateCall(Arm64Ty, Callee, Args); + + auto SRetAttr = F->getAttributes().getParamAttr(0, Attribute::StructRet); + auto InRegAttr = F->getAttributes().getParamAttr(0, Attribute::InReg); + if (SRetAttr.isValid() && !InRegAttr.isValid()) { + Thunk->addParamAttr(1, SRetAttr); + Call->addParamAttr(0, SRetAttr); + } Value *RetVal = Call; if (TransformDirectToSRet) { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 95d8ab95b2c0..bcfd0253e73c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22122,7 +22122,8 @@ SDValue performCONDCombine(SDNode *N, SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); unsigned CondOpcode = SubsNode->getOpcode(); - if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0)) + if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) || + !SubsNode->hasOneUse()) return SDValue(); // There is a SUBS feeding this condition. Is it fed by a mask we can diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 03baa7497615..ac61dd8745d4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4885,19 +4885,9 @@ defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), (zext (v8i8 V64:$opB))))), (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; -def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (v8i8 V64:$opA)), - (zext (v8i8 V64:$opB))), - (AArch64vashr v8i16:$src, (i32 15))))), - (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; -def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), - (zext (extract_high_v16i8 (v16i8 V128:$opB)))), - (AArch64vashr v8i16:$src, (i32 15))))), - (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), (zext (v4i16 V64:$opB))))), (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp index 92db89cc0915..80fe4bcb8b58 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC( case CmpInst::FCMP_UNE: CondCode = AArch64CC::NE; break; + case CmpInst::FCMP_TRUE: + CondCode = AArch64CC::AL; + break; + case CmpInst::FCMP_FALSE: + CondCode = AArch64CC::NV; + break; } } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 4b9d549e7911..de3c89e925a2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -877,6 +877,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64})) + .moreElementsToNextPow2(0) .widenVectorEltsToVectorMinSize(0, 64); getActionDefinitionsBuilder(G_BUILD_VECTOR) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index b8e5e7bbdaba..06cdd7e4ef48 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -623,8 +623,11 @@ bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { EltTy = GV->getValueType(); // Look at the first element of the struct to determine the type we are // loading - while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) + while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) { + if (StructEltTy->getNumElements() == 0) + break; EltTy = StructEltTy->getTypeAtIndex(0U); + } // Look at the first element of the array to determine its type if (isa<ArrayType>(EltTy)) EltTy = EltTy->getArrayElementType(); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6ecb1c8bf6e1..7a3198612f86 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1832,7 +1832,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // not, we need to ensure the subtarget is capable of backing off barrier // instructions in case there are any outstanding memory operations that may // cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here. - if (MI.getOpcode() == AMDGPU::S_BARRIER && + if (TII->isBarrierStart(MI.getOpcode()) && !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) { Wait = Wait.combined( AMDGPU::Waitcnt::allZero(ST->hasExtendedWaitCounts(), ST->hasVscnt())); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1c9dacc09f81..626d903c0c69 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -908,6 +908,17 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; } + // Check to see if opcode is for a barrier start. Pre gfx12 this is just the + // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want + // to check for the barrier start (S_BARRIER_SIGNAL*) + bool isBarrierStart(unsigned Opcode) const { + return Opcode == AMDGPU::S_BARRIER || + Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || + Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || + Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || + Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; + } + static bool doesNotReadTiedSource(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 84b9330ef963..50d8bfa87508 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2358,6 +2358,11 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( bool Changed = false; + if (IsNonTemporal) { + // Set non-temporal hint for all cache levels. + Changed |= setTH(MI, AMDGPU::CPol::TH_NT); + } + if (IsVolatile) { Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); @@ -2370,11 +2375,6 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( Position::AFTER); } - if (IsNonTemporal) { - // Set non-temporal hint for all cache levels. - Changed |= setTH(MI, AMDGPU::CPol::TH_NT); - } - return Changed; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td index ae5ef0541929..5762efde73f0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1786,7 +1786,7 @@ def : GCNPat< let SubtargetPredicate = isNotGFX12Plus in def : GCNPat <(int_amdgcn_s_wait_event_export_ready), (S_WAIT_EVENT (i16 0))>; let SubtargetPredicate = isGFX12Plus in - def : GCNPat <(int_amdgcn_s_wait_event_export_ready), (S_WAIT_EVENT (i16 1))>; + def : GCNPat <(int_amdgcn_s_wait_event_export_ready), (S_WAIT_EVENT (i16 2))>; // The first 10 bits of the mode register are the core FP mode on all // subtargets. diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp index d9465e86d896..0830b02370cd 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" @@ -116,9 +117,20 @@ private: // sure that they can be replaced. static bool hasReplaceableUsers(GlobalVariable &GV) { for (User *CurrentUser : GV.users()) { - // Instruction users are always valid. - if (isa<Instruction>(CurrentUser)) + if (auto *I = dyn_cast<Instruction>(CurrentUser)) { + // Do not merge globals in exception pads. + if (I->isEHPad()) + return false; + + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + // Some intrinsics require a plain global. + if (II->getIntrinsicID() == Intrinsic::eh_typeid_for) + return false; + } + + // Other instruction users are always valid. continue; + } // We cannot replace GlobalValue users because they are not just nodes // in IR. To replace a user like this we would need to create a new @@ -278,13 +290,6 @@ bool PPCMergeStringPool::mergeModuleStringPool(Module &M) { return true; } -static bool userHasOperand(User *TheUser, GlobalVariable *GVOperand) { - for (Value *Op : TheUser->operands()) - if (Op == GVOperand) - return true; - return false; -} - // For pooled strings we need to add the offset into the pool for each string. // This is done by adding a Get Element Pointer (GEP) before each user. This // function adds the GEP. @@ -295,62 +300,13 @@ void PPCMergeStringPool::replaceUsesWithGEP(GlobalVariable *GlobalToReplace, Indices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), 0)); Indices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), ElementIndex)); - // Need to save a temporary copy of each user list because we remove uses - // as we replace them. - SmallVector<User *> Users; - for (User *CurrentUser : GlobalToReplace->users()) - Users.push_back(CurrentUser); - - for (User *CurrentUser : Users) { - Instruction *UserInstruction = dyn_cast<Instruction>(CurrentUser); - Constant *UserConstant = dyn_cast<Constant>(CurrentUser); - - // At this point we expect that the user is either an instruction or a - // constant. - assert((UserConstant || UserInstruction) && - "Expected the user to be an instruction or a constant."); - - // The user was not found so it must have been replaced earlier. - if (!userHasOperand(CurrentUser, GlobalToReplace)) - continue; - - // We cannot replace operands in globals so we ignore those. - if (isa<GlobalValue>(CurrentUser)) - continue; - - if (!UserInstruction) { - // User is a constant type. - Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr( - PooledStructType, GPool, Indices); - UserConstant->handleOperandChange(GlobalToReplace, ConstGEP); - continue; - } - - if (PHINode *UserPHI = dyn_cast<PHINode>(UserInstruction)) { - // GEP instructions cannot be added before PHI nodes. - // With getInBoundsGetElementPtr we create the GEP and then replace it - // inline into the PHI. - Constant *ConstGEP = ConstantExpr::getInBoundsGetElementPtr( - PooledStructType, GPool, Indices); - UserPHI->replaceUsesOfWith(GlobalToReplace, ConstGEP); - continue; - } - // The user is a valid instruction that is not a PHINode. - GetElementPtrInst *GEPInst = - GetElementPtrInst::Create(PooledStructType, GPool, Indices); - GEPInst->insertBefore(UserInstruction); - - LLVM_DEBUG(dbgs() << "Inserting GEP before:\n"); - LLVM_DEBUG(UserInstruction->dump()); - - LLVM_DEBUG(dbgs() << "Replacing this global:\n"); - LLVM_DEBUG(GlobalToReplace->dump()); - LLVM_DEBUG(dbgs() << "with this:\n"); - LLVM_DEBUG(GEPInst->dump()); - - // After the GEP is inserted the GV can be replaced. - CurrentUser->replaceUsesOfWith(GlobalToReplace, GEPInst); - } + Constant *ConstGEP = + ConstantExpr::getInBoundsGetElementPtr(PooledStructType, GPool, Indices); + LLVM_DEBUG(dbgs() << "Replacing this global:\n"); + LLVM_DEBUG(GlobalToReplace->dump()); + LLVM_DEBUG(dbgs() << "with this:\n"); + LLVM_DEBUG(ConstGEP->dump()); + GlobalToReplace->replaceAllUsesWith(ConstGEP); } } // namespace diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 961b8f0afe22..cdf7c048a4bf 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -31,12 +31,13 @@ using namespace llvm; // This part is for ELF object output. RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) - : RISCVTargetStreamer(S), CurrentVendor("riscv"), STI(STI) { + : RISCVTargetStreamer(S), CurrentVendor("riscv") { MCAssembler &MCA = getStreamer().getAssembler(); const FeatureBitset &Features = STI.getFeatureBits(); auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend()); setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, MAB.getTargetOptions().getABIName())); + setFlagsFromFeatures(STI); // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a // relocation to ensure the jump target is correct after linking. This is due // to a limitation that shouldForceRelocation has to make the decision upfront @@ -87,14 +88,13 @@ void RISCVTargetELFStreamer::finishAttributeSection() { void RISCVTargetELFStreamer::finish() { RISCVTargetStreamer::finish(); MCAssembler &MCA = getStreamer().getAssembler(); - const FeatureBitset &Features = STI.getFeatureBits(); RISCVABI::ABI ABI = getTargetABI(); unsigned EFlags = MCA.getELFHeaderEFlags(); - if (Features[RISCV::FeatureStdExtC]) + if (hasRVC()) EFlags |= ELF::EF_RISCV_RVC; - if (Features[RISCV::FeatureStdExtZtso]) + if (hasTSO()) EFlags |= ELF::EF_RISCV_TSO; switch (ABI) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index a6f54bf67b5d..e8f29cd8449b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -46,7 +46,6 @@ private: StringRef CurrentVendor; MCSection *AttributeSection = nullptr; - const MCSubtargetInfo &STI; void emitAttribute(unsigned Attribute, unsigned Value) override; void emitTextAttribute(unsigned Attribute, StringRef String) override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index 254a9a4bc0ef..b8e0f3a867f4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -207,8 +207,6 @@ void RISCVMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { case VK_RISCV_TLS_GOT_HI: case VK_RISCV_TLS_GD_HI: case VK_RISCV_TLSDESC_HI: - case VK_RISCV_TLSDESC_ADD_LO: - case VK_RISCV_TLSDESC_LOAD_LO: break; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index ac4861bf113e..eee78a8c161f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -48,6 +48,11 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) { TargetABI = ABI; } +void RISCVTargetStreamer::setFlagsFromFeatures(const MCSubtargetInfo &STI) { + HasRVC = STI.hasFeature(RISCV::FeatureStdExtC); + HasTSO = STI.hasFeature(RISCV::FeatureStdExtZtso); +} + void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign) { if (EmitStackAlign) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h index 070e72fb157a..cb8bc21cb635 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h @@ -33,6 +33,8 @@ struct RISCVOptionArchArg { class RISCVTargetStreamer : public MCTargetStreamer { RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; + bool HasRVC = false; + bool HasTSO = false; public: RISCVTargetStreamer(MCStreamer &S); @@ -58,6 +60,9 @@ public: void emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign); void setTargetABI(RISCVABI::ABI ABI); RISCVABI::ABI getTargetABI() const { return TargetABI; } + void setFlagsFromFeatures(const MCSubtargetInfo &STI); + bool hasRVC() const { return HasRVC; } + bool hasTSO() const { return HasTSO; } }; // This part is for ascii assembly output diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index b2e9cd87373b..87bd9b4048cd 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -100,7 +100,7 @@ public: bool emitDirectiveOptionArch(); private: - void emitAttributes(); + void emitAttributes(const MCSubtargetInfo &SubtargetInfo); void emitNTLHint(const MachineInstr *MI); @@ -385,8 +385,32 @@ void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) { if (const MDString *ModuleTargetABI = dyn_cast_or_null<MDString>(M.getModuleFlag("target-abi"))) RTS.setTargetABI(RISCVABI::getTargetABI(ModuleTargetABI->getString())); + + MCSubtargetInfo SubtargetInfo = *TM.getMCSubtargetInfo(); + + // Use module flag to update feature bits. + if (auto *MD = dyn_cast_or_null<MDNode>(M.getModuleFlag("riscv-isa"))) { + for (auto &ISA : MD->operands()) { + if (auto *ISAString = dyn_cast_or_null<MDString>(ISA)) { + auto ParseResult = llvm::RISCVISAInfo::parseArchString( + ISAString->getString(), /*EnableExperimentalExtension=*/true, + /*ExperimentalExtensionVersionCheck=*/true); + if (!errorToBool(ParseResult.takeError())) { + auto &ISAInfo = *ParseResult; + for (const auto &Feature : RISCVFeatureKV) { + if (ISAInfo->hasExtension(Feature.Key) && + !SubtargetInfo.hasFeature(Feature.Value)) + SubtargetInfo.ToggleFeature(Feature.Key); + } + } + } + } + + RTS.setFlagsFromFeatures(SubtargetInfo); + } + if (TM.getTargetTriple().isOSBinFormatELF()) - emitAttributes(); + emitAttributes(SubtargetInfo); } void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -398,13 +422,13 @@ void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) { EmitHwasanMemaccessSymbols(M); } -void RISCVAsmPrinter::emitAttributes() { +void RISCVAsmPrinter::emitAttributes(const MCSubtargetInfo &SubtargetInfo) { RISCVTargetStreamer &RTS = static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer()); // Use MCSubtargetInfo from TargetMachine. Individual functions may have // attributes that differ from other functions in the module and we have no // way to know which function is correct. - RTS.emitTargetAttributes(*TM.getMCSubtargetInfo(), /*EmitStackAlign*/ true); + RTS.emitTargetAttributes(SubtargetInfo, /*EmitStackAlign*/ true); } void RISCVAsmPrinter::emitFunctionEntryLabel() { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 0a314fdd41cb..89207640ee54 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -317,8 +317,9 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, .addReg(MBBI->getOperand(1).getReg()) .add(MBBI->getOperand(2)); if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { - // FIXME: Zdinx RV32 can not work on unaligned memory. - assert(!STI->hasFastUnalignedAccess()); + // FIXME: Zdinx RV32 can not work on unaligned scalar memory. + assert(!STI->hasFastUnalignedAccess() && + !STI->enableUnalignedScalarMem()); assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td index 26451c80f57b..1bb6b6a561f4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1025,6 +1025,11 @@ def FeatureFastUnalignedAccess "true", "Has reasonably performant unaligned " "loads and stores (both scalar and vector)">; +def FeatureUnalignedScalarMem + : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", + "true", "Has reasonably performant unaligned scalar " + "loads and stores">; + def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a0cec426002b..3fe7ddfdd427 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1883,7 +1883,8 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, // replace. If we don't support unaligned scalar mem, prefer the constant // pool. // TODO: Can the caller pass down the alignment? - if (!Subtarget.hasFastUnalignedAccess()) + if (!Subtarget.hasFastUnalignedAccess() && + !Subtarget.enableUnalignedScalarMem()) return true; // Prefer to keep the load if it would require many instructions. @@ -14559,7 +14560,7 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc DL(N); SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); - EVT OtherOpVT = OtherOp->getValueType(0); + EVT OtherOpVT = OtherOp.getValueType(); SDValue IdentityOperand = DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); if (!Commutative) @@ -19772,8 +19773,10 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( unsigned *Fast) const { if (!VT.isVector()) { if (Fast) - *Fast = Subtarget.hasFastUnalignedAccess(); - return Subtarget.hasFastUnalignedAccess(); + *Fast = Subtarget.hasFastUnalignedAccess() || + Subtarget.enableUnalignedScalarMem(); + return Subtarget.hasFastUnalignedAccess() || + Subtarget.enableUnalignedScalarMem(); } // All vector implementations must support element alignment diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index bf6547cc87ec..2f2dc6b80792 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -70,49 +70,62 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - // Get two load or store instructions. Use the original instruction for one - // of them (arbitrarily the second here) and create a clone for the other. - MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI); - MBB->insert(MI, EarlierMI); + // Get two load or store instructions. Use the original instruction for + // one of them and create a clone for the other. + MachineInstr *HighPartMI = MF.CloneMachineInstr(&*MI); + MachineInstr *LowPartMI = &*MI; + MBB->insert(LowPartMI, HighPartMI); // Set up the two 64-bit registers and remember super reg and its flags. - MachineOperand &HighRegOp = EarlierMI->getOperand(0); - MachineOperand &LowRegOp = MI->getOperand(0); + MachineOperand &HighRegOp = HighPartMI->getOperand(0); + MachineOperand &LowRegOp = LowPartMI->getOperand(0); Register Reg128 = LowRegOp.getReg(); unsigned Reg128Killed = getKillRegState(LowRegOp.isKill()); unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef()); HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64)); - if (MI->mayStore()) { - // Add implicit uses of the super register in case one of the subregs is - // undefined. We could track liveness and skip storing an undefined - // subreg, but this is hopefully rare (discovered with llvm-stress). - // If Reg128 was killed, set kill flag on MI. - unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit); - MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl); - MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed)); - } - // The address in the first (high) instruction is already correct. // Adjust the offset in the second (low) instruction. - MachineOperand &HighOffsetOp = EarlierMI->getOperand(2); - MachineOperand &LowOffsetOp = MI->getOperand(2); + MachineOperand &HighOffsetOp = HighPartMI->getOperand(2); + MachineOperand &LowOffsetOp = LowPartMI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); - // Clear the kill flags on the registers in the first instruction. - if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse()) - EarlierMI->getOperand(0).setIsKill(false); - EarlierMI->getOperand(1).setIsKill(false); - EarlierMI->getOperand(3).setIsKill(false); - // Set the opcodes. unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm()); unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm()); assert(HighOpcode && LowOpcode && "Both offsets should be in range"); + HighPartMI->setDesc(get(HighOpcode)); + LowPartMI->setDesc(get(LowOpcode)); + + MachineInstr *FirstMI = HighPartMI; + if (MI->mayStore()) { + FirstMI->getOperand(0).setIsKill(false); + // Add implicit uses of the super register in case one of the subregs is + // undefined. We could track liveness and skip storing an undefined + // subreg, but this is hopefully rare (discovered with llvm-stress). + // If Reg128 was killed, set kill flag on MI. + unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit); + MachineInstrBuilder(MF, HighPartMI).addReg(Reg128, Reg128UndefImpl); + MachineInstrBuilder(MF, LowPartMI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed)); + } else { + // If HighPartMI clobbers any of the address registers, it needs to come + // after LowPartMI. + auto overlapsAddressReg = [&](Register Reg) -> bool { + return RI.regsOverlap(Reg, MI->getOperand(1).getReg()) || + RI.regsOverlap(Reg, MI->getOperand(3).getReg()); + }; + if (overlapsAddressReg(HighRegOp.getReg())) { + assert(!overlapsAddressReg(LowRegOp.getReg()) && + "Both loads clobber address!"); + MBB->splice(HighPartMI, MBB, LowPartMI); + FirstMI = LowPartMI; + } + } - EarlierMI->setDesc(get(HighOpcode)); - MI->setDesc(get(LowOpcode)); + // Clear the kill flags on the address registers in the first instruction. + FirstMI->getOperand(1).setIsKill(false); + FirstMI->getOperand(3).setIsKill(false); } // Split ADJDYNALLOC instruction MI. diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td index e89ddcc570c9..1aff5f9fad97 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td @@ -867,6 +867,7 @@ def ProcessorFeatures { // Nehalem list<SubtargetFeature> NHMFeatures = X86_64V2Features; list<SubtargetFeature> NHMTuning = [TuningMacroFusion, + TuningSlowDivide64, TuningInsertVZEROUPPER, TuningNoDomainDelayMov]; @@ -1336,6 +1337,7 @@ def ProcessorFeatures { FeatureCMOV, FeatureX86_64]; list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, + TuningSlowDivide64, TuningSlowSHLD, TuningSBBDepBreaking, TuningInsertVZEROUPPER]; @@ -1358,6 +1360,7 @@ def ProcessorFeatures { list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, TuningFastScalarShiftMasks, TuningFastVectorShiftMasks, + TuningSlowDivide64, TuningSlowSHLD, TuningSBBDepBreaking, TuningInsertVZEROUPPER]; @@ -1380,6 +1383,7 @@ def ProcessorFeatures { TuningFastVectorShiftMasks, TuningFastMOVBE, TuningSBBDepBreaking, + TuningSlowDivide64, TuningSlowSHLD]; list<SubtargetFeature> BtVer2Features = !listconcat(BtVer1Features, BtVer2AdditionalFeatures); @@ -1404,6 +1408,7 @@ def ProcessorFeatures { FeatureLWP, FeatureLAHFSAHF64]; list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, + TuningSlowDivide64, TuningFast11ByteNOP, TuningFastScalarShiftMasks, TuningBranchFusion, @@ -1483,6 +1488,7 @@ def ProcessorFeatures { TuningFastScalarShiftMasks, TuningFastVariablePerLaneShuffle, TuningFastMOVBE, + TuningSlowDivide64, TuningSlowSHLD, TuningSBBDepBreaking, TuningInsertVZEROUPPER, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 833f058253d8..553d338b7790 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2923,11 +2923,10 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, } bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { - // Cannot use 32 bit constants to reference objects in kernel code model. - // Cannot use 32 bit constants to reference objects in large PIC mode since - // GOTOFF is 64 bits. + // Cannot use 32 bit constants to reference objects in kernel/large code + // model. if (TM.getCodeModel() == CodeModel::Kernel || - (TM.getCodeModel() == CodeModel::Large && TM.isPositionIndependent())) + TM.getCodeModel() == CodeModel::Large) return false; // In static codegen with small code model, we can get the address of a label diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 96bbd981ff24..911d9986a879 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7295,7 +7295,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || - CVT == MVT::f16 || + (CVT == MVT::f16 && Subtarget.hasAVX2()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) @@ -29844,7 +29844,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return R; // AVX512 implicitly uses modulo rotation amounts. - if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { + if ((Subtarget.hasVLX() || + (Subtarget.hasAVX512() && Subtarget.hasEVEX512())) && + 32 <= EltSizeInBits) { // Attempt to rotate by immediate. if (IsCstSplat) { unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI; @@ -47038,10 +47040,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47057,29 +47062,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) + return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); - // skipping types without corresponding sext/zext and - // ShlConst that is not one of [56,48,32,24,16] + // Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); - SarConst = SarConst - (Size - ShiftSize); - if (SarConst == 0) + if (SraConst.eq(ShlConst)) return NN; - if (SarConst.isNegative()) + if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CVT)); + DAG.getConstant(ShlConst - SraConst, DL, CVT)); return DAG.getNode(ISD::SRA, DL, VT, NN, - DAG.getConstant(SarConst, DL, CVT)); + DAG.getConstant(SraConst - ShlConst, DL, CVT)); } return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td index fdca58141f0f..12d3b889c211 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td @@ -826,7 +826,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info, // A 128-bit extract from bits [255:128] of a 512-bit vector should use a // smaller extract to enable EVEX->VEX. -let Predicates = [NoVLX] in { +let Predicates = [NoVLX, HasEVEX512] in { def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), (v2i64 (VEXTRACTI128rr (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), @@ -3080,7 +3080,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; @@ -3111,7 +3111,7 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; @@ -3505,7 +3505,7 @@ multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't // available. Use a 512-bit operation and extract. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; @@ -3517,7 +3517,7 @@ let Predicates = [HasAVX512, NoVLX] in { defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; @@ -5010,8 +5010,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, SchedWriteVecALU, HasAVX512, 1>, T8; -// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasDQI, NoVLX] in { +// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512. +let Predicates = [HasDQI, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr @@ -5067,7 +5067,7 @@ multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { sub_xmm)>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; defm : avx512_min_max_lowering<"VPMINUQZ", umin>; defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; @@ -6044,7 +6044,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SchedWriteVecShift>; // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZrr @@ -6173,14 +6173,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecS defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; -defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; -defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; -defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; -defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; +defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>; +defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>; +defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>; +defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>; // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLVQZrr @@ -6231,7 +6231,7 @@ let Predicates = [HasAVX512, NoVLX] in { } // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORVQZrr @@ -9828,7 +9828,7 @@ defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), (v8i16 (EXTRACT_SUBREG (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), @@ -9839,7 +9839,7 @@ def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), VR256X:$src, sub_ymm)))), sub_xmm))>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm))), sub_xmm))>; @@ -10382,7 +10382,7 @@ multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; } - let Predicates = [prd, NoVLX] in { + let Predicates = [prd, NoVLX, HasEVEX512] in { defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; } @@ -11169,7 +11169,7 @@ defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SchedWriteVecALU>; // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (abs VR256X:$src)), (EXTRACT_SUBREG (VPABSQZrr @@ -11185,7 +11185,7 @@ let Predicates = [HasAVX512, NoVLX] in { // Use 512bit version to implement 128/256 bit. multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> { - let Predicates = [prd, NoVLX] in { + let Predicates = [prd, NoVLX, HasEVEX512] in { def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), (EXTRACT_SUBREG (!cast<Instruction>(InstrStr # "Zrr") @@ -11804,7 +11804,7 @@ let Predicates = [HasAVX512] in { (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v16i8 (vnot VR128X:$src)), (EXTRACT_SUBREG (VPTERNLOGQZrri diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h index a458b5f9ec8f..4d55a084b730 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h @@ -244,7 +244,8 @@ public: // TODO: Currently we're always allowing widening on CPUs without VLX, // because for many cases we don't have a better option. bool canExtendTo512DQ() const { - return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); + return hasAVX512() && hasEVEX512() && + (!hasVLX() || getPreferVectorWidth() >= 512); } bool canExtendTo512BW() const { return hasBWI() && canExtendTo512DQ(); diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp index 4466d50458e1..848b531dd8dd 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp @@ -1266,8 +1266,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_AVX2); if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(X86::FEATURE_BMI2); - if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) { setFeature(X86::FEATURE_AVX512F); + setFeature(X86::FEATURE_EVEX512); + } if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512DQ); if (HasLeaf7 && ((EBX >> 19) & 1)) @@ -1772,6 +1774,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; + if (Features["avx512f"]) + Features["evex512"] = true; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 7ebf265e17ba..27c411250d53 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1186,10 +1186,15 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, switch (RVI->getOpcode()) { // Extend the analysis by looking upwards. case Instruction::BitCast: - case Instruction::GetElementPtr: case Instruction::AddrSpaceCast: FlowsToReturn.insert(RVI->getOperand(0)); continue; + case Instruction::GetElementPtr: + if (cast<GEPOperator>(RVI)->isInBounds()) { + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + } + return false; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(RVI); FlowsToReturn.insert(SI->getTrueValue()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 951372adcfa9..619b3f612f25 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2212,6 +2212,9 @@ static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U) { static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, bool &RenameTarget) { + if (GA.isWeakForLinker()) + return false; + RenameTarget = false; bool Ret = false; if (hasUseOtherThanLLVMUsed(GA, U)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9f220ec003ec..86a39cf2ee93 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2606,7 +2606,7 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC, // %cnd = icmp slt i32 %rem, 0 // %add = add i32 %rem, %n // %sel = select i1 %cnd, i32 %add, i32 %rem - if (match(TrueVal, m_Add(m_Value(RemRes), m_Value(Remainder))) && + if (match(TrueVal, m_Add(m_Specific(RemRes), m_Value(Remainder))) && match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) && IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true) && FalseVal == RemRes) @@ -3201,7 +3201,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // pattern. static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, const APInt *Cond1, Value *CtlzOp, - unsigned BitWidth) { + unsigned BitWidth, + bool &ShouldDropNUW) { // The challenge in recognizing std::bit_ceil(X) is that the operand is used // for the CTLZ proper and select condition, each possibly with some // operation like add and sub. @@ -3224,6 +3225,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, ConstantRange CR = ConstantRange::makeExactICmpRegion( CmpInst::getInversePredicate(Pred), *Cond1); + ShouldDropNUW = false; + // Match the operation that's used to compute CtlzOp from CommonAncestor. If // CtlzOp == CommonAncestor, return true as no operation is needed. If a // match is found, execute the operation on CR, update CR, and return true. @@ -3237,6 +3240,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, return true; } if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) { + ShouldDropNUW = true; CR = ConstantRange(*C).sub(CR); return true; } @@ -3306,14 +3310,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) { Pred = CmpInst::getInversePredicate(Pred); } + bool ShouldDropNUW; + if (!match(FalseVal, m_One()) || !match(TrueVal, m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth), m_Value(Ctlz)))))) || !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) || - !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth)) + !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth, + ShouldDropNUW)) return nullptr; + if (ShouldDropNUW) + cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false); + // Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth // is an integer constant. Masking with BitWidth-1 comes free on some diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 9df28747570c..104e8ceb7967 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -279,6 +279,9 @@ bool InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); + if (!LHS->getType()->isIntegerTy()) + return false; + // Canonicalize to the `Index Pred Invariant` comparison if (IsLoopInvariant(LHS)) { std::swap(LHS, RHS); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1fbd69e38eae..0a9e2c7f49f5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); - auto *CEI = cast<CallInst>(VL0); for (unsigned I : seq<unsigned>(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { + CallInst *CEI = cast<CallInst>(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,25 +11671,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } - auto GetOperandSignedness = [&](unsigned Idx) { - const TreeEntry *OpE = getOperandEntry(E, Idx); - bool IsSigned = false; - auto It = MinBWs.find(OpE); - if (It != MinBWs.end()) - IsSigned = It->second.second; - else - IsSigned = any_of(OpE->Scalars, [&](Value *R) { - return !isKnownNonNegative(R, SimplifyQuery(*DL)); - }); - return IsSigned; - }; - ScalarArg = CEI->getArgOperand(I); - if (cast<VectorType>(OpVec->getType())->getElementType() != - ScalarArg->getType()) { - auto *CastTy = FixedVectorType::get(ScalarArg->getType(), - VecTy->getNumElements()); - OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); - } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) |