diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-02-20 14:39:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-02-20 14:39:23 +0000 |
commit | c4bd2b43b293827b7ec880a10a6e491f0cc94211 (patch) | |
tree | acde8cf5ca883ea6e4fa6c9026bb8c6e3c14377b /llvm/lib | |
parent | 3f25e997d96a3150a192777c3c389c258c5cf7ee (diff) |
Vendor import of llvm-project branch release/13.x llvmorg-13.0.1-0-g75e33f71c2da.vendor/llvm-project/llvmorg-13.0.1-0-g75e33f71c2davendor/llvm-project/release-13.x
Diffstat (limited to 'llvm/lib')
37 files changed, 292 insertions, 163 deletions
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index e7445e225d52..1da712eb9d26 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -697,14 +697,16 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, case Instruction::AtomicRMW: return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); case Instruction::Call: - return getModRefInfo((const CallInst *)I, Loc, AAQIP); + case Instruction::CallBr: case Instruction::Invoke: - return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); + return getModRefInfo((const CallBase *)I, Loc, AAQIP); case Instruction::CatchPad: return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); case Instruction::CatchRet: return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); default: + assert(!I->mayReadOrWriteMemory() && + "Unhandled memory access instruction!"); return ModRefInfo::NoModRef; } } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 4c2413e14435..e8f79a28a8e8 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -354,6 +354,7 @@ protected: bool simplifyCallSite(Function *F, CallBase &Call); template <typename Callable> bool simplifyInstruction(Instruction &I, Callable Evaluate); + bool simplifyIntrinsicCallIsConstant(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); /// Return true if the given argument to the function being considered for @@ -1471,6 +1472,27 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { return true; } +/// Try to simplify a call to llvm.is.constant. +/// +/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since +/// we expect calls of this specific intrinsic to be infrequent. +/// +/// FIXME: Given that we know CB's parent (F) caller +/// (CandidateCall->getParent()->getParent()), we might be able to determine +/// whether inlining F into F's caller would change how the call to +/// llvm.is.constant would evaluate. +bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { + Value *Arg = CB.getArgOperand(0); + auto *C = dyn_cast<Constant>(Arg); + + if (!C) + C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg)); + + Type *RT = CB.getFunctionType()->getReturnType(); + SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0); + return true; +} + bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { @@ -2091,6 +2113,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) SROAArgValues[II] = SROAArg; return true; + case Intrinsic::is_constant: + return simplifyIntrinsicCallIsConstant(Call); } } diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 5ca1e91cc5f4..fde7b942665d 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -318,6 +318,11 @@ public: return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI); } + bool doFinalization(Module &M) override { + RewindFunction = nullptr; + return false; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetPassConfig>(); AU.addRequired<TargetTransformInfoWrapperPass>(); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 707161d5a8b0..68920e2e50df 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" @@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, // every definition of it, meaning we can switch all the DBG_VALUEs over // to just reference the stack slot. SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg]; - SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>> + SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2> SpilledOperandsMap; for (MachineOperand *MO : LRIDbgOperands) SpilledOperandsMap[MO->getParent()].push_back(MO); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2e4a656ea0c8..4bbb5beb21f3 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1417,3 +1417,16 @@ std::string TargetInstrInfo::createMIROperandComment( } TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} + +bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Some instrumentations create special TargetOpcode at the start which + // expands to special code sequences which must be present. + auto First = MBB.getFirstNonDebugInstr(); + if (First != MBB.end() && + (First->getOpcode() == TargetOpcode::FENTRY_CALL || + First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER)) + return false; + + return true; +} diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 7b0dab799e1a..2180eedb58f7 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1592,6 +1592,12 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) { assert((!New || isa<ValueAsMetadata>(New)) && "DIArgList must be passed a ValueAsMetadata"); untrack(); + bool Uniq = isUniqued(); + if (Uniq) { + // We need to update the uniqueness once the Args are updated since they + // form the key to the DIArgLists store. + eraseFromStore(); + } ValueAsMetadata *NewVM = cast_or_null<ValueAsMetadata>(New); for (ValueAsMetadata *&VM : Args) { if (&VM == OldVMPtr) { @@ -1601,6 +1607,10 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) { VM = ValueAsMetadata::get(UndefValue::get(VM->getValue()->getType())); } } + if (Uniq) { + if (uniquify() != this) + storeDistinctInContext(); + } track(); } void DIArgList::track() { diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 99819602c545..85ac63eaa1aa 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -55,8 +55,15 @@ LLVMContextImpl::~LLVMContextImpl() { // Drop references for MDNodes. Do this before Values get deleted to avoid // unnecessary RAUW when nodes are still unresolved. - for (auto *I : DistinctMDNodes) + for (auto *I : DistinctMDNodes) { + // We may have DIArgList that were uniqued, and as it has a custom + // implementation of dropAllReferences, it needs to be explicitly invoked. + if (auto *AL = dyn_cast<DIArgList>(I)) { + AL->dropAllReferences(); + continue; + } I->dropAllReferences(); + } #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ for (auto *I : CLASS##s) \ I->dropAllReferences(); diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 2ae23fdc95a8..655319eb1c99 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -391,8 +391,9 @@ template <> struct MDNodeKeyImpl<DIEnumerator> { IsUnsigned(N->isUnsigned()) {} bool isKeyOf(const DIEnumerator *RHS) const { - return APInt::isSameValue(Value, RHS->getValue()) && - IsUnsigned == RHS->isUnsigned() && Name == RHS->getRawName(); + return Value.getBitWidth() == RHS->getValue().getBitWidth() && + Value == RHS->getValue() && IsUnsigned == RHS->isUnsigned() && + Name == RHS->getRawName(); } unsigned getHashValue() const { return hash_combine(Value, Name); } diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 70d69fc8dd32..6a9a174a1b6a 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -502,6 +502,23 @@ static bool hasPrefix(StringRef SectionName, StringRef Prefix) { return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back(); } +static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName, + unsigned Type) { + if (TT.getArch() == Triple::x86_64) { + // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame, + // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't + // error for SHT_PROGBITS .eh_frame + return SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS; + } + if (TT.isMIPS()) { + // MIPS .debug_* sections should have SHT_MIPS_DWARF section type to + // distinguish among sections contain DWARF and ECOFF debug formats, + // but in assembly files these sections have SHT_PROGBITS type. + return hasPrefix(SectionName, ".debug_") && Type == ELF::SHT_PROGBITS; + } + return false; +} + bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { StringRef SectionName; @@ -659,16 +676,14 @@ EndStmt: getContext().getELFSection(SectionName, Type, Flags, Size, GroupName, IsComdat, UniqueID, LinkedToSym); getStreamer().SwitchSection(Section, Subsection); - // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame, - // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't error - // for SHT_PROGBITS .eh_frame - if (Section->getType() != Type && - !(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS)) - Error(loc, "changed section type for " + SectionName + ", expected: 0x" + - utohexstr(Section->getType())); // Check that flags are used consistently. However, the GNU assembler permits // to leave out in subsequent uses of the same sections; for compatibility, // do likewise. + if (!TypeName.empty() && Section->getType() != Type && + !allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName, + Type)) + Error(loc, "changed section type for " + SectionName + ", expected: 0x" + + utohexstr(Section->getType())); if ((extraFlags || Size || !TypeName.empty()) && Section->getFlags() != Flags) Error(loc, "changed section flags for " + SectionName + ", expected: 0x" + utohexstr(Section->getFlags())); diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 9a2e1003da5a..71e3a1362f7e 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -151,7 +151,12 @@ static std::atomic<int> TaskGroupInstances; // lock, only allow the first TaskGroup to run tasks parallelly. In the scenario // of nested parallel_for_each(), only the outermost one runs parallelly. TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {} -TaskGroup::~TaskGroup() { --TaskGroupInstances; } +TaskGroup::~TaskGroup() { + // We must ensure that all the workloads have finished before decrementing the + // instances count. + L.sync(); + --TaskGroupInstances; +} void TaskGroup::spawn(std::function<void()> F) { if (Parallel) { diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index be88e7db1400..b83477e0e4cc 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -29,14 +29,6 @@ #include <zircon/syscalls.h> #endif -#if defined(__mips__) -# if defined(__OpenBSD__) -# include <mips64/sysarch.h> -# elif !defined(__FreeBSD__) -# include <sys/cachectl.h> -# endif -#endif - #if defined(__APPLE__) extern "C" void sys_icache_invalidate(const void *Addr, size_t len); #else diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index b2eee2845ba9..5bbf4f97c54a 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1049,6 +1049,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::MOVaddrEXT: { // Expand into ADRP + ADD. Register DstReg = MI.getOperand(0).getReg(); + assert(DstReg != AArch64::XZR); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) .add(MI.getOperand(1)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 091a62aa4ada..f29bb83c2d2e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6923,6 +6923,8 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const { + if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags)) + return false; // Check if LR is available through all of the MBB. If it's not, then set // a flag. assert(MBB.getParent()->getRegInfo().tracksLiveness() && diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 12744e4de09b..f3da6bf057c2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -673,40 +673,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in { // removed, along with the AArch64Wrapper node. let AddedComplexity = 10 in -def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), - [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, +def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), + [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, Sched<[WriteLDAdr]>; // The MOVaddr instruction should match only when the add is not folded // into a load or store address. def MOVaddr - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrJT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), tjumptable:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrCP - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), tconstpool:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrBA - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), tblockaddress:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrTLS - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), tglobaltlsaddr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrEXT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), texternalsym:$low))]>, Sched<[WriteAdrAdr]>; // Normally AArch64addlow either gets folded into a following ldr/str, @@ -714,8 +714,8 @@ def MOVaddrEXT // might appear without either of them, so allow lowering it into a plain // add. def ADDlowTLS - : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow GPR64:$src, + : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), + [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, tglobaltlsaddr:$low))]>, Sched<[WriteAdr]>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 8c34027f7bb3..94a0ce09afed 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -13,6 +13,8 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -271,6 +273,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case AArch64::WSeqPairsClassRegClassID: case AArch64::XSeqPairsClassRegClassID: case AArch64::MatrixIndexGPR32_12_15RegClassID: + case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: return getRegBank(AArch64::GPRRegBankID); case AArch64::CCRRegClassID: return getRegBank(AArch64::CCRegBankID); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 493c1ad87f93..d6dd807afbce 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -2048,7 +2048,7 @@ SILoadStoreOptimizer::collectMergeableInsts( // adjacent to each other in the list, which will make it easier to find // matches. MergeList.sort( - [] (const CombineInfo &A, CombineInfo &B) { + [] (const CombineInfo &A, const CombineInfo &B) { return A.Offset < B.Offset; }); ++I; diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index aff7ec8d2ed6..256a95b94f6c 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -525,7 +525,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo & MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP) .addImm(ArgAssigner.StackOffset) - .addImm(0) + .addImm(-1ULL) .add(predOps(ARMCC::AL)); return true; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 28a076edd6dc..9224c2221f4d 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2022,7 +2022,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs, unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(NumBytes).addImm(0)); + .addImm(NumBytes).addImm(-1ULL)); // Now the return value. if (RetVT != MVT::isVoid) { diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9c7055deaaf8..7c238a1099d8 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -79,6 +79,10 @@ public: void Select(SDNode *N) override; + /// Return true as some complex patterns, like those that call + /// canExtractShiftFromMul can modify the DAG inplace. + bool ComplexPatternFuncMutatesDAG() const override { return true; } + bool hasNoVMLxHazardUse(SDNode *N) const; bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index ef07b2839bc9..4c9b8b5fbfa9 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1520,6 +1520,7 @@ def tTBH_JT : tPseudoInst<(outs), let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, [(set R0, ARMthread_pointer)]>, + Requires<[IsThumb, IsReadTPSoft]>, Sched<[WriteBr]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index e7eed2a0bbb1..f8b226b84e98 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -4670,6 +4670,9 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, } +// Reading thread pointer from coprocessor register +def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 3)>, + Requires<[IsThumb2, IsReadTPHard]>; //===----------------------------------------------------------------------===// // ARMv8.1 Privilege Access Never extension diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7be5fc33a0af..04a835f08855 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -1027,12 +1027,13 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), + CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), MVT::Glue, CurDAG->getRegister(Mips::HWR29, MVT::i32), CurDAG->getTargetConstant(0, DL, MVT::i32)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + SDValue(Rdhwr, 0), SDValue(Rdhwr, 1)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT, + Chain.getValue(1)); ReplaceNode(Node, ResNode.getNode()); return true; } diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 7631bb4bccfb..392de0f251a2 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1576,6 +1576,16 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, std::swap(Operands[2], Operands[1]); } + // Handle base mnemonic for atomic loads where the EH bit is zero. + if (Name == "lqarx" || Name == "ldarx" || Name == "lwarx" || + Name == "lharx" || Name == "lbarx") { + if (Operands.size() != 5) + return false; + PPCOperand &EHOp = (PPCOperand &)*Operands[4]; + if (EHOp.isU1Imm() && EHOp.getImm() == 0) + Operands.pop_back(); + } + return false; } @@ -1745,7 +1755,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, } PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); - if (Op.isImm() && Op.getImm() == ImmVal) + if (Op.isU3Imm() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 207101763ac2..7dab7a52ac53 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -999,7 +999,7 @@ bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const { // More accurate safety checking is done in getOutliningCandidateInfo. - return true; + return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); } // Enum values indicating how an outlined call should be constructed. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 7df7cc93d6eb..53495489cef8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -2173,7 +2173,7 @@ let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>; def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>; let hasNoSchedulingInfo = 1 in - def EXRL_Pseudo : Pseudo<(outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1, + def EXRL_Pseudo : Alias<6, (outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1, bdaddr12only:$bdl1, bdaddr12only:$bd2), []>; } diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 4add8d30e010..65ffe6621545 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -657,35 +657,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs( EntryBlk->end()); TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); - int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm(); - Register BaseReg; - uint64_t FrameOffset = - X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed(); - uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm(); + uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm(); + uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm(); // TODO: add support for YMM and ZMM here. unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. - for (int64_t OpndIdx = 3, RegIdx = 0; + for (int64_t OpndIdx = 7, RegIdx = 0; OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; OpndIdx++, RegIdx++) { - - int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16; - - MachineMemOperand *MMO = Func->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset), - MachineMemOperand::MOStore, - /*Size=*/16, Align(16)); - - BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)) - .addReg(BaseReg) - .addImm(/*Scale=*/1) - .addReg(/*IndexReg=*/0) - .addImm(/*Disp=*/Offset) - .addReg(/*Segment=*/0) - .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()) - .addMemOperand(MMO); + auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16); + else + NewMI.add(VAStartPseudoInstr->getOperand(i + 1)); + } + NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()); assert(Register::isPhysicalRegister( VAStartPseudoInstr->getOperand(OpndIdx).getReg())); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 032db2a80a77..4b13b5b540b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3533,13 +3533,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( SmallVector<SDValue, 12> SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back( - DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); + SaveXMMOps.push_back(RSFIN); SaveXMMOps.push_back( DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); llvm::append_range(SaveXMMOps, LiveXMMRegs); - MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, - MVT::Other, SaveXMMOps)); + MachineMemOperand *StoreMMO = + DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), + Offset), + MachineMemOperand::MOStore, 128, Align(16)); + MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, + DL, DAG.getVTList(MVT::Other), + SaveXMMOps, MVT::i8, StoreMMO)); } if (!MemOps.empty()) @@ -44070,32 +44076,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, "Unexpected horizontal add/sub opcode"); if (!shouldUseHorizontalOp(true, DAG, Subtarget)) { - // For slow-hop targets, if we have a hop with a single op, see if we already - // have another user that we can reuse and shuffle the result. MVT VT = N->getSimpleValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (VT.is128BitVector() && LHS == RHS) { - for (SDNode *User : LHS->uses()) { - if (User != N && User->getOpcode() == N->getOpcode()) { - MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; - if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) { - return DAG.getBitcast( - VT, - DAG.getVectorShuffle(ShufVT, SDLoc(N), - DAG.getBitcast(ShufVT, SDValue(User, 0)), - DAG.getUNDEF(ShufVT), {0, 1, 0, 1})); - } - if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) { - return DAG.getBitcast( - VT, - DAG.getVectorShuffle(ShufVT, SDLoc(N), - DAG.getBitcast(ShufVT, SDValue(User, 0)), - DAG.getUNDEF(ShufVT), {2, 3, 2, 3})); - } - } - } - } // HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)). if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 869857bcc0d6..8b18b5981e86 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -627,10 +627,6 @@ namespace llvm { // packed single precision. DPBF16PS, - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - // Windows's _chkstk call to do stack probing. WIN_ALLOCA, @@ -848,6 +844,10 @@ namespace llvm { AESENCWIDE256KL, AESDECWIDE256KL, + // Save xmm argument registers to the stack, according to %al. An operator + // is needed so that this can be expanded with control flow. + VASTART_SAVE_XMM_REGS, + // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 202d320cd731..aa14c8016a83 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. -let hasSideEffects = 1, Defs = [EFLAGS] in { +let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), - (ins GR8:$al, - i32imm:$regsavefi, i32imm:$offset, - variable_ops), - "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", - [(X86vastart_save_xmm_regs GR8:$al, - timm:$regsavefi, - timm:$offset), + (ins GR8:$al, i8mem:$regsavefi, variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi", + [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi), (implicit EFLAGS)]>; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 34afedb5bad2..489ea7fb127a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, - SDTCisVT<1, iPTR>, - SDTCisVT<2, iPTR>]>; + SDTCisPtrTy<1>]>; def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, @@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, - [SDNPHasChain, SDNPVariadic]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>; def X86vaarg64 : SDNode<"X86ISD::VAARG_64", SDT_X86VAARG, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 48c27051a872..355ddf26e3bb 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -583,7 +583,7 @@ def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst), //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in - def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB, REP; + def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB; def : InstAlias<"xstorerng", (XSTORE)>; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index b6932dbbfc3f..fc83befe3950 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -1174,6 +1175,15 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { DenseMap<Value *, Value *> ResolvedValues; BasicBlock *UnconditionalSucc = nullptr; + assert(InitialInst->getModule()); + const DataLayout &DL = InitialInst->getModule()->getDataLayout(); + + auto TryResolveConstant = [&ResolvedValues](Value *V) { + auto It = ResolvedValues.find(V); + if (It != ResolvedValues.end()) + V = It->second; + return dyn_cast<ConstantInt>(V); + }; Instruction *I = InitialInst; while (I->isTerminator() || @@ -1190,47 +1200,65 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { } if (auto *BR = dyn_cast<BranchInst>(I)) { if (BR->isUnconditional()) { - BasicBlock *BB = BR->getSuccessor(0); + BasicBlock *Succ = BR->getSuccessor(0); if (I == InitialInst) - UnconditionalSucc = BB; - scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); - I = BB->getFirstNonPHIOrDbgOrLifetime(); + UnconditionalSucc = Succ; + scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues); + I = Succ->getFirstNonPHIOrDbgOrLifetime(); + continue; + } + + BasicBlock *BB = BR->getParent(); + // Handle the case the condition of the conditional branch is constant. + // e.g., + // + // br i1 false, label %cleanup, label %CoroEnd + // + // It is possible during the transformation. We could continue the + // simplifying in this case. + if (ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true)) { + // Handle this branch in next iteration. + I = BB->getTerminator(); continue; } } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) { + // If the case number of suspended switch instruction is reduced to + // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. auto *BR = dyn_cast<BranchInst>(I->getNextNode()); - if (BR && BR->isConditional() && CondCmp == BR->getCondition()) { - // If the case number of suspended switch instruction is reduced to - // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. - // And the comparsion looks like : %cond = icmp eq i8 %V, constant. - ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); - if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) { - Value *V = CondCmp->getOperand(0); - auto it = ResolvedValues.find(V); - if (it != ResolvedValues.end()) - V = it->second; - - if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) { - BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue()) - ? BR->getSuccessor(0) - : BR->getSuccessor(1); - scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); - I = BB->getFirstNonPHIOrDbgOrLifetime(); - continue; - } - } - } + if (!BR || !BR->isConditional() || CondCmp != BR->getCondition()) + return false; + + // And the comparsion looks like : %cond = icmp eq i8 %V, constant. + // So we try to resolve constant for the first operand only since the + // second operand should be literal constant by design. + ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0)); + auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); + if (!Cond0 || !Cond1) + return false; + + // Both operands of the CmpInst are Constant. So that we could evaluate + // it immediately to get the destination. + auto *ConstResult = + dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands( + CondCmp->getPredicate(), Cond0, Cond1, DL)); + if (!ConstResult) + return false; + + CondCmp->replaceAllUsesWith(ConstResult); + CondCmp->eraseFromParent(); + + // Handle this branch in next iteration. + I = BR; + continue; } else if (auto *SI = dyn_cast<SwitchInst>(I)) { - Value *V = SI->getCondition(); - auto it = ResolvedValues.find(V); - if (it != ResolvedValues.end()) - V = it->second; - if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { - BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); - scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); - I = BB->getFirstNonPHIOrDbgOrLifetime(); - continue; - } + ConstantInt *Cond = TryResolveConstant(SI->getCondition()); + if (!Cond) + return false; + + BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); + scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); + I = BB->getFirstNonPHIOrDbgOrLifetime(); + continue; } return false; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4e3b18e805ee..71b3a411cc18 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2843,6 +2843,26 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, } assert(FreeInstrBB->size() == 1 && "Only the branch instruction should remain"); + + // Now that we've moved the call to free before the NULL check, we have to + // remove any attributes on its parameter that imply it's non-null, because + // those attributes might have only been valid because of the NULL check, and + // we can get miscompiles if we keep them. This is conservative if non-null is + // also implied by something other than the NULL check, but it's guaranteed to + // be correct, and the conservativeness won't matter in practice, since the + // attributes are irrelevant for the call to free itself and the pointer + // shouldn't be used after the call. + AttributeList Attrs = FI.getAttributes(); + Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull); + Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable); + if (Dereferenceable.isValid()) { + uint64_t Bytes = Dereferenceable.getDereferenceableBytes(); + Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, + Attribute::Dereferenceable); + Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes); + } + FI.setAttributes(Attrs); + return &FI; } diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d22b3f409585..9d8130d1ac02 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1303,17 +1303,10 @@ struct DSEState { /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. bool isGuaranteedLoopInvariant(const Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [](const Value *Ptr) { Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast<Instruction>(Ptr)) { - if (isa<AllocaInst>(Ptr)) - return true; - - if (isAllocLikeFn(I, &TLI)) - return true; - - return false; - } + if (auto *I = dyn_cast<Instruction>(Ptr)) + return I->getParent()->isEntryBlock(); return true; }; diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp index f13f24ad2027..a04d4ef3c086 100644 --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -154,6 +154,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { return {}; } Value *const Addr = LoadI->getOperand(0); + if (Addr->getType()->getPointerAddressSpace() != 0) { + LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n"); + return {}; + } auto *const GEP = dyn_cast<GetElementPtrInst>(Addr); if (!GEP) return {}; diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index 85e5adaeaf5e..3127432dc6c9 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -144,6 +144,10 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) { Value *Offset = Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + // Insert the call to load.relative instrinsic before LOAD. + // GEP might not be immediately followed by a LOAD, like it can be hoisted + // outside the loop or another instruction might be inserted them in between. + Builder.SetInsertPoint(Load); Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( &M, Intrinsic::load_relative, {Index->getType()}); Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy()); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cc3f5c7d4b48..1d06bc7d79a7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5430,8 +5430,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar so we add the new BitCast // to ExternalUses list to make sure that an extract will be generated // in the future. - if (getTreeEntry(PO)) - ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0); + if (TreeEntry *Entry = getTreeEntry(PO)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(PO); + ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane); + } NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); } else { @@ -5474,8 +5477,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar, so add the new BitCast to // ExternalUses to make sure that an extract will be generated in the // future. - if (getTreeEntry(ScalarPtr)) - ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0)); + if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(ScalarPtr); + ExternalUses.push_back( + ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane)); + } Value *V = propagateMetadata(ST, E->Scalars); @@ -5577,8 +5584,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The scalar argument uses an in-tree scalar so we add the new vectorized // call to ExternalUses list to make sure that an extract will be // generated in the future. - if (ScalarArg && getTreeEntry(ScalarArg)) - ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0)); + if (ScalarArg) { + if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(ScalarArg); + ExternalUses.push_back( + ExternalUser(ScalarArg, cast<User>(V), FoundLane)); + } + } propagateIRFlags(V, E->Scalars, VL0); ShuffleBuilder.addMask(E->ReuseShuffleIndices); |