diff options
Diffstat (limited to 'llvm/lib')
38 files changed, 517 insertions, 207 deletions
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp index 608fc0388af0..c3b039b05f30 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp @@ -132,19 +132,20 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data, uint64_t end_offset = Offset + full_length; while (*offset_ptr < end_offset) { + uint64_t EntryOffset = *offset_ptr; arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); - if (arangeDescriptor.Length == 0) { - // Each set of tuples is terminated by a 0 for the address and 0 - // for the length. - if (arangeDescriptor.Address == 0 && *offset_ptr == end_offset) + // Each set of tuples is terminated by a 0 for the address and 0 + // for the length. + if (arangeDescriptor.Length == 0 && arangeDescriptor.Address == 0) { + if (*offset_ptr == end_offset) return ErrorSuccess(); return createStringError( errc::invalid_argument, "address range table at offset 0x%" PRIx64 - " has an invalid tuple (length = 0) at offset 0x%" PRIx64, - Offset, *offset_ptr - tuple_size); + " has a premature terminator entry at offset 0x%" PRIx64, + Offset, EntryOffset); } ArangeDescriptors.push_back(arangeDescriptor); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 3d4cecce27db..d27fd08db14e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -286,10 +286,14 @@ const DWARFUnitIndex::Entry *DWARFUnitIndex::getFromHash(uint64_t S) const { auto H = S & Mask; auto HP = ((S >> 32) & Mask) | 1; - while (Rows[H].getSignature() != S && Rows[H].getSignature() != 0) + // The spec says "while 0 is a valid hash value, the row index in a used slot + // will always be non-zero". Loop until we find a match or an empty slot. + while (Rows[H].getSignature() != S && Rows[H].Index != nullptr) H = (H + HP) & Mask; - if (Rows[H].getSignature() != S) + // If the slot is empty, we don't care whether the signature matches (it could + // be zero and still match the zeros in the empty slot). + if (Rows[H].Index == nullptr) return nullptr; return &Rows[H]; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 7e9b0690ccea..04f541b59557 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -308,7 +308,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)Addr) << " flags: " << *FlagsOrErr << "\n"); - GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, *JITSymFlags); + if (!Name.empty()) // Skip absolute symbol relocations. + GlobalSymbolTable[Name] = + SymbolTableEntry(SectionID, Addr, *JITSymFlags); } else if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data || SymType == object::SymbolRef::ST_Unknown || @@ -340,8 +342,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)SectOffset) << " flags: " << *FlagsOrErr << "\n"); - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); + if (!Name.empty()) // Skip absolute symbol relocations + GlobalSymbolTable[Name] = + SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); } } @@ -769,8 +772,9 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, LLVM_DEBUG(dbgs() << "Allocating common symbol " << Name << " address " << format("%p", Addr) << "\n"); - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags)); + if (!Name.empty()) // Skip absolute symbol relocations. + GlobalSymbolTable[Name] = + SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags)); Offset += Size; Addr += Size; } @@ -930,6 +934,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, if (Loc == GlobalSymbolTable.end()) { ExternalSymbolRelocations[SymbolName].push_back(RE); } else { + assert(!SymbolName.empty() && + "Empty symbol should not be in GlobalSymbolTable"); // Copy the RE since we want to modify its addend. RelocationEntry RECopy = RE; const auto &SymInfo = Loc->second; @@ -1234,7 +1240,8 @@ void RuntimeDyldImpl::finalizeAsync( for (auto &RelocKV : SharedThis->ExternalSymbolRelocations) { StringRef Name = RelocKV.first(); - assert(!Name.empty() && "Symbol has no name?"); + if (Name.empty()) // Skip absolute symbol relocations. + continue; assert(!SharedThis->GlobalSymbolTable.count(Name) && "Name already processed. RuntimeDyld instances can not be re-used " "when finalizing with finalizeAsync."); diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index f3c3e9ad9f69..c20d0955f3d8 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1589,7 +1589,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1, const GlobalValue *GV2) { auto isGlobalUnsafeForEquality = [](const GlobalValue *GV) { - if (GV->hasExternalWeakLinkage() || GV->hasWeakAnyLinkage()) + if (GV->isInterposable() || GV->hasGlobalUnnamedAddr()) return true; if (const auto *GVar = dyn_cast<GlobalVariable>(GV)) { Type *Ty = GVar->getValueType(); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index b77a9635f64c..b9b4416fde21 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -317,6 +317,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { break; case Triple::ppc64: case Triple::ppc64le: + case Triple::aarch64: + case Triple::aarch64_be: case Triple::x86_64: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index e5ab13bc719d..fb8215ef2281 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -644,10 +644,13 @@ EndStmt: !(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS)) Error(loc, "changed section type for " + SectionName + ", expected: 0x" + utohexstr(Section->getType())); - if (Section->getFlags() != Flags) + // Check that flags are used consistently. However, the GNU assembler permits + // to leave out in subsequent uses of the same sections; for compatibility, + // do likewise. + if ((Flags || Size || !TypeName.empty()) && Section->getFlags() != Flags) Error(loc, "changed section flags for " + SectionName + ", expected: 0x" + utohexstr(Section->getFlags())); - if (Section->getEntrySize() != Size) + if ((Flags || Size || !TypeName.empty()) && Section->getEntrySize() != Size) Error(loc, "changed section entsize for " + SectionName + ", expected: " + Twine(Section->getEntrySize())); diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp index e945e8cecce9..4594368fc0e9 100644 --- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -243,6 +243,8 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) { CurrentStoreGroupID = 0; if (GroupID == CurrentLoadBarrierGroupID) CurrentLoadBarrierGroupID = 0; + if (GroupID == CurrentStoreBarrierGroupID) + CurrentStoreBarrierGroupID = 0; } } diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 658c1ee74cfe..36cecf9b2a16 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -760,14 +760,15 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, *Type = X86::INTEL_GOLDMONT_PLUS; break; case 0x86: + CPU = "tremont"; *Type = X86::INTEL_TREMONT; break; + // Xeon Phi (Knights Landing + Knights Mill): case 0x57: - CPU = "tremont"; + CPU = "knl"; *Type = X86::INTEL_KNL; break; - case 0x85: CPU = "knm"; *Type = X86::INTEL_KNM; diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index e352beb77616..a4ffc0ec4313 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -19,7 +19,6 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/WindowsError.h" #include <fcntl.h> -#include <io.h> #include <sys/stat.h> #include <sys/types.h> @@ -352,13 +351,13 @@ std::error_code is_local(const Twine &path, bool &result) { static std::error_code realPathFromHandle(HANDLE H, SmallVectorImpl<wchar_t> &Buffer) { DWORD CountChars = ::GetFinalPathNameByHandleW( - H, Buffer.begin(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED); - if (CountChars > Buffer.capacity()) { + H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED); + if (CountChars && CountChars >= Buffer.capacity()) { // The buffer wasn't big enough, try again. In this case the return value // *does* indicate the size of the null terminator. Buffer.reserve(CountChars); CountChars = ::GetFinalPathNameByHandleW( - H, Buffer.data(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED); + H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED); } if (CountChars == 0) return mapWindowsError(GetLastError()); @@ -403,6 +402,20 @@ std::error_code is_local(int FD, bool &Result) { } static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { + // First, check if the file is on a network (non-local) drive. If so, don't + // set DeleteFile to true, since it prevents opening the file for writes. + SmallVector<wchar_t, 128> FinalPath; + if (std::error_code EC = realPathFromHandle(Handle, FinalPath)) + return EC; + + bool IsLocal; + if (std::error_code EC = is_local_internal(FinalPath, IsLocal)) + return EC; + + if (!IsLocal) + return std::error_code(); + + // The file is on a local drive, set the DeleteFile to true. FILE_DISPOSITION_INFO Disposition; Disposition.DeleteFile = Delete; if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 3a94820dac8d..7ec7ffe309f7 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -89,6 +89,8 @@ public: void emitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned JTI); + void emitFunctionEntryLabel() override; + void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI); void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, @@ -822,6 +824,19 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, OutStreamer->emitValue(Value, Size); } +void AArch64AsmPrinter::emitFunctionEntryLabel() { + if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall || + MF->getFunction().getCallingConv() == + CallingConv::AArch64_SVE_VectorCall || + STI->getRegisterInfo()->hasSVEArgsOrReturn(MF)) { + auto *TS = + static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer()); + TS->emitDirectiveVariantPCS(CurrentFnSym); + } + + return AsmPrinter::emitFunctionEntryLabel(); +} + /// Small jump tables contain an unsigned byte or half, representing the offset /// from the lowest-addressed possible destination to the desired basic /// block. Since all instructions are 4-byte aligned, this is further compressed diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp index 84ec5afcc9c1..9ae2b465e247 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -35,6 +35,9 @@ static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; +static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2, + AArch64::Z3, AArch64::Z4, AArch64::Z5, + AArch64::Z6, AArch64::Z7}; static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, @@ -97,6 +100,8 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, RegList = DRegList; else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector()) RegList = QRegList; + else if (LocVT.isScalableVector()) + RegList = ZRegList; else { // Not an array we want to split up after all. return false; @@ -141,6 +146,10 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } + if (LocVT.isScalableVector()) + report_fatal_error( + "Passing consecutive scalable vector registers unsupported"); + // Mark all regs in the class as unavailable for (auto Reg : RegList) State.AllocateReg(Reg); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 45bfa85bdc07..48ca9039b1bd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4366,6 +4366,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; + if (!Outs[i].IsFixed && ArgVT.isScalableVector()) + report_fatal_error("Passing SVE types to variadic functions is " + "currently not supported"); + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/ !Outs[i].IsFixed); @@ -6168,6 +6172,10 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { Chain = VAList.getValue(1); VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT); + if (VT.isScalableVector()) + report_fatal_error("Passing SVE types to variadic functions is " + "currently not supported"); + if (Align && *Align > MinSlotSize) { VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align->value() - 1, DL, PtrVT)); @@ -14702,7 +14710,14 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { - return Ty->isArrayTy(); + if (Ty->isArrayTy()) + return true; + + const TypeSize &TySize = Ty->getPrimitiveSizeInBits(); + if (TySize.isScalable() && TySize.getKnownMinSize() > 128) + return true; + + return false; } bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 0ac09c4f96f0..e72ae0e62cb7 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -179,6 +179,8 @@ private: bool parseDirectiveCFINegateRAState(); bool parseDirectiveCFIBKeyFrame(); + bool parseDirectiveVariantPCS(SMLoc L); + bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, SmallVectorImpl<SMLoc> &Loc); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -5077,6 +5079,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveCFIBKeyFrame(); else if (IDVal == ".arch_extension") parseDirectiveArchExtension(Loc); + else if (IDVal == ".variant_pcs") + parseDirectiveVariantPCS(Loc); else if (IsMachO) { if (IDVal == MCLOHDirectiveName()) parseDirectiveLOH(IDVal, Loc); @@ -5507,6 +5511,32 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() { return false; } +/// parseDirectiveVariantPCS +/// ::= .variant_pcs symbolname +bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) { + MCAsmParser &Parser = getParser(); + + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return TokError("expected symbol name"); + + StringRef SymbolName = Tok.getIdentifier(); + + MCSymbol *Sym = getContext().lookupSymbol(SymbolName); + if (!Sym) + return TokError("unknown symbol in '.variant_pcs' directive"); + + Parser.Lex(); // Eat the symbol + + // Shouldn't be any more tokens + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(" in '.variant_pcs' directive"); + + getTargetStreamer().emitDirectiveVariantPCS(Sym); + + return false; +} + bool AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, AArch64MCExpr::VariantKind &ELFRefKind, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 408f0cb77e73..7733fe7f7b24 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -289,14 +289,15 @@ private: getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore = false) const; - /// Instructions that accept extend modifiers like UXTW expect the register - /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a - /// subregister copy if necessary. Return either ExtReg, or the result of the - /// new copy. - Register narrowExtendRegIfNeeded(Register ExtReg, - MachineIRBuilder &MIB) const; - Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size, - MachineIRBuilder &MIB) const; + /// Move \p Reg to \p RC if \p Reg is not already on \p RC. + /// + /// \returns Either \p Reg if no change was necessary, or the new register + /// created by moving \p Reg. + /// + /// Note: This uses emitCopy right now. + Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, + MachineIRBuilder &MIB) const; + ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, @@ -1195,10 +1196,10 @@ MachineInstr *AArch64InstructionSelector::emitTestBit( // TBNZW work. bool UseWReg = Bit < 32; unsigned NecessarySize = UseWReg ? 32 : 64; - if (Size < NecessarySize) - TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB); - else if (Size > NecessarySize) - TestReg = narrowExtendRegIfNeeded(TestReg, MIB); + if (Size != NecessarySize) + TestReg = moveScalarRegClass( + TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, + MIB); static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, {AArch64::TBZW, AArch64::TBNZW}}; @@ -4904,9 +4905,19 @@ AArch64InstructionSelector::selectExtendedSHL( return None; unsigned OffsetOpc = OffsetInst->getOpcode(); - if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) - return None; + bool LookedThroughZExt = false; + if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { + // Try to look through a ZEXT. + if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) + return None; + + OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); + OffsetOpc = OffsetInst->getOpcode(); + LookedThroughZExt = true; + if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) + return None; + } // Make sure that the memory op is a valid size. int64_t LegalShiftVal = Log2_32(SizeInBytes); if (LegalShiftVal == 0) @@ -4957,21 +4968,24 @@ AArch64InstructionSelector::selectExtendedSHL( unsigned SignExtend = 0; if (WantsExt) { - // Check if the offset is defined by an extend. - MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); - auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); - if (Ext == AArch64_AM::InvalidShiftExtend) - return None; + // Check if the offset is defined by an extend, unless we looked through a + // G_ZEXT earlier. + if (!LookedThroughZExt) { + MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); + auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; - SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; - // We only support SXTW for signed extension here. - if (SignExtend && Ext != AArch64_AM::SXTW) - return None; + SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; + // We only support SXTW for signed extension here. + if (SignExtend && Ext != AArch64_AM::SXTW) + return None; + OffsetReg = ExtInst->getOperand(1).getReg(); + } // Need a 32-bit wide register here. MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); - OffsetReg = ExtInst->getOperand(1).getReg(); - OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); + OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); } // We can use the LHS of the GEP as the base, and the LHS of the shift as an @@ -5143,8 +5157,8 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, // Need a 32-bit wide register. MachineIRBuilder MIB(*PtrAdd); - Register ExtReg = - narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); + Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), + AArch64::GPR32RegClass, MIB); unsigned SignExtend = Ext == AArch64_AM::SXTW; // Base is LHS, offset is ExtReg. @@ -5418,67 +5432,21 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( } } -Register AArch64InstructionSelector::narrowExtendRegIfNeeded( - Register ExtReg, MachineIRBuilder &MIB) const { +Register AArch64InstructionSelector::moveScalarRegClass( + Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { MachineRegisterInfo &MRI = *MIB.getMRI(); - if (MRI.getType(ExtReg).getSizeInBits() == 32) - return ExtReg; - - // Insert a copy to move ExtReg to GPR32. - Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); + auto Ty = MRI.getType(Reg); + assert(!Ty.isVector() && "Expected scalars only!"); + if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) + return Reg; - // Select the copy into a subregister copy. + // Create a copy and immediately select it. + // FIXME: We should have an emitCopy function? + auto Copy = MIB.buildCopy({&RC}, {Reg}); selectCopy(*Copy, TII, MRI, TRI, RBI); return Copy.getReg(0); } -Register AArch64InstructionSelector::widenGPRBankRegIfNeeded( - Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const { - assert(WideSize >= 8 && "WideSize is smaller than all possible registers?"); - MachineRegisterInfo &MRI = *MIB.getMRI(); - unsigned NarrowSize = MRI.getType(Reg).getSizeInBits(); - assert(WideSize >= NarrowSize && - "WideSize cannot be smaller than NarrowSize!"); - - // If the sizes match, just return the register. - // - // If NarrowSize is an s1, then we can select it to any size, so we'll treat - // it as a don't care. - if (NarrowSize == WideSize || NarrowSize == 1) - return Reg; - - // Now check the register classes. - const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI); - const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize); - const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize); - assert(OrigRC && "Could not determine narrow RC?"); - assert(WideRC && "Could not determine wide RC?"); - - // If the sizes differ, but the register classes are the same, there is no - // need to insert a SUBREG_TO_REG. - // - // For example, an s8 that's supposed to be a GPR will be selected to either - // a GPR32 or a GPR64 register. Note that this assumes that the s8 will - // always end up on a GPR32. - if (OrigRC == WideRC) - return Reg; - - // We have two different register classes. Insert a SUBREG_TO_REG. - unsigned SubReg = 0; - getSubRegForClass(OrigRC, TRI, SubReg); - assert(SubReg && "Couldn't determine subregister?"); - - // Build the SUBREG_TO_REG and return the new, widened register. - auto SubRegToReg = - MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) - .addImm(0) - .addUse(Reg) - .addImm(SubReg); - constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); - return SubRegToReg.getReg(0); -} - /// Select an "extended register" operand. This operand folds in an extend /// followed by an optional left shift. InstructionSelector::ComplexRendererFns @@ -5539,7 +5507,7 @@ AArch64InstructionSelector::selectArithExtendedRegister( // We require a GPR32 here. Narrow the ExtReg if needed using a subregister // copy. MachineIRBuilder MIB(*RootDef); - ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); + ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, [=](MachineInstrBuilder &MIB) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 2eaec0b970fa..4ffde2a7e3c4 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -97,15 +97,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .moreElementsToNextPow2(0); getActionDefinitionsBuilder(G_SHL) - .legalFor({{s32, s32}, {s64, s64}, - {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}}) - .clampScalar(1, s32, s64) - .clampScalar(0, s32, s64) - .widenScalarToNextPow2(0) - .clampNumElements(0, v2s32, v4s32) - .clampNumElements(0, v2s64, v2s64) - .moreElementsToNextPow2(0) - .minScalarSameAs(1, 0); + .customIf([=](const LegalityQuery &Query) { + const auto &SrcTy = Query.Types[0]; + const auto &AmtTy = Query.Types[1]; + return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && + AmtTy.getSizeInBits() == 32; + }) + .legalFor({{s32, s32}, + {s64, s64}, + {s32, s64}, + {v2s32, v2s32}, + {v4s32, v4s32}, + {v2s64, v2s64}}) + .clampScalar(1, s32, s64) + .clampScalar(0, s32, s64) + .widenScalarToNextPow2(0) + .clampNumElements(0, v2s32, v4s32) + .clampNumElements(0, v2s64, v2s64) + .moreElementsToNextPow2(0) + .minScalarSameAs(1, 0); getActionDefinitionsBuilder(G_PTR_ADD) .legalFor({{p0, s64}, {v2p0, v2s64}}) @@ -710,16 +720,14 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr( // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the // imported patterns can select it later. Either way, it will be legal. Register AmtReg = MI.getOperand(2).getReg(); - auto *CstMI = MRI.getVRegDef(AmtReg); - assert(CstMI && "expected to find a vreg def"); - if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) + auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI); + if (!VRegAndVal) return true; // Check the shift amount is in range for an immediate form. - unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); + int64_t Amount = VRegAndVal->Value; if (Amount > 31) return true; // This will have to remain a register variant. - assert(MRI.getType(AmtReg).getSizeInBits() == 32); - auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); + auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); MI.getOperand(2).setReg(ExtCst.getReg(0)); return true; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 7e3ff1948dad..93213f5977e5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -261,6 +261,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case AArch64::GPR64common_and_GPR64noipRegClassID: case AArch64::GPR64noip_and_tcGPR64RegClassID: case AArch64::tcGPR64RegClassID: + case AArch64::rtcGPR64RegClassID: case AArch64::WSeqPairsClassRegClassID: case AArch64::XSeqPairsClassRegClassID: return getRegBank(AArch64::GPRRegBankID); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index fe4c34be1519..6dfda8217628 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -47,6 +47,10 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { void emitInst(uint32_t Inst) override; + void emitDirectiveVariantPCS(MCSymbol *Symbol) override { + OS << "\t.variant_pcs " << Symbol->getName() << "\n"; + } + public: AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); }; @@ -194,6 +198,10 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { getStreamer().emitInst(Inst); } +void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) { + cast<MCSymbolELF>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS); +} + MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index 3a0c5d8318dd..1af978a806d1 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -36,6 +36,9 @@ public: /// Callback used to implement the .inst directive. virtual void emitInst(uint32_t Inst); + /// Callback used to implement the .variant_pcs directive. + virtual void emitDirectiveVariantPCS(MCSymbol *Symbol) {}; + virtual void EmitARM64WinCFIAllocStack(unsigned Size) {} virtual void EmitARM64WinCFISaveFPLR(int Offset) {} virtual void EmitARM64WinCFISaveFPLRX(int Offset) {} @@ -63,6 +66,7 @@ private: AArch64ELFStreamer &getStreamer(); void emitInst(uint32_t Inst) override; + void emitDirectiveVariantPCS(MCSymbol *Symbol) override; public: AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {} diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index f31c722db1b2..442be886a8ac 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -254,16 +254,24 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator(); - if (MBBE != MBB.end()) { - MachineInstr &MI = *MBBE; + MachineBasicBlock::iterator TermI = MBBE; + // Check first terminator for VCC branches to optimize + if (TermI != MBB.end()) { + MachineInstr &MI = *TermI; switch (MI.getOpcode()) { case AMDGPU::S_CBRANCH_VCCZ: case AMDGPU::S_CBRANCH_VCCNZ: Changed |= optimizeVccBranch(MI); continue; - case AMDGPU::SI_RETURN_TO_EPILOG: - // FIXME: This is not an optimization and should be - // moved somewhere else. + default: + break; + } + } + // Check all terminators for SI_RETURN_TO_EPILOG + // FIXME: This is not an optimization and should be moved somewhere else. + while (TermI != MBB.end()) { + MachineInstr &MI = *TermI; + if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()); // Graphics shaders returning non-void shouldn't contain S_ENDPGM, @@ -281,11 +289,11 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) { .addMBB(EmptyMBBAtEnd); MI.eraseFromParent(); MBBE = MBB.getFirstTerminator(); + TermI = MBBE; + continue; } - break; - default: - break; } + TermI++; } if (!ST.hasVGPRIndexMode()) diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index d407edfbd966..77f565fb5957 100644 --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -254,7 +254,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, const LoadSDNode *LD = cast<LoadSDNode>(Node); uint64_t size = LD->getMemOperand()->getSize(); - if (!size || size > 8 || (size & (size - 1))) + if (!size || size > 8 || (size & (size - 1)) || !LD->isSimple()) return; SDNode *LDAddrNode = LD->getOperand(1).getNode(); @@ -342,7 +342,7 @@ bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node, unsigned char *ByteSeq) { const GlobalVariable *V = dyn_cast<GlobalVariable>(Node->getGlobal()); - if (!V || !V->hasInitializer()) + if (!V || !V->hasInitializer() || !V->isConstant()) return false; const Constant *Init = V->getInitializer(); diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h index 3932def87854..3932def87854 100755..100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h diff --git a/llvm/lib/Target/Sparc/LeonFeatures.td b/llvm/lib/Target/Sparc/LeonFeatures.td index 75273eff1868..75273eff1868 100755..100644 --- a/llvm/lib/Target/Sparc/LeonFeatures.td +++ b/llvm/lib/Target/Sparc/LeonFeatures.td diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp index e9d3aaeb9cfe..e9d3aaeb9cfe 100755..100644 --- a/llvm/lib/Target/Sparc/LeonPasses.cpp +++ b/llvm/lib/Target/Sparc/LeonPasses.cpp diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index b165bc93780f..b165bc93780f 100755..100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 069e43c6f544..7845a18b14c1 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -351,7 +351,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_Immediate: - O << (int)MO.getImm(); + O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: MO.getMBB()->getSymbol()->print(O, MAI); diff --git a/llvm/lib/Target/Sparc/SparcSchedule.td b/llvm/lib/Target/Sparc/SparcSchedule.td index 0f05372b7050..0f05372b7050 100755..100644 --- a/llvm/lib/Target/Sparc/SparcSchedule.td +++ b/llvm/lib/Target/Sparc/SparcSchedule.td diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 8a0092a3f298..c2a0d3e01740 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -58,6 +58,9 @@ class WebAssemblyFastISel final : public FastISel { int FI; } Base; + // Whether the base has been determined yet + bool IsBaseSet = false; + int64_t Offset = 0; const GlobalValue *GV = nullptr; @@ -74,8 +77,9 @@ class WebAssemblyFastISel final : public FastISel { bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { assert(isRegBase() && "Invalid base register access!"); - assert(Base.Reg == 0 && "Overwriting non-zero register"); + assert(!IsBaseSet && "Base cannot be reset"); Base.Reg = Reg; + IsBaseSet = true; } unsigned getReg() const { assert(isRegBase() && "Invalid base register access!"); @@ -83,8 +87,9 @@ class WebAssemblyFastISel final : public FastISel { } void setFI(unsigned FI) { assert(isFIBase() && "Invalid base frame index access!"); - assert(Base.FI == 0 && "Overwriting non-zero frame index"); + assert(!IsBaseSet && "Base cannot be reset"); Base.FI = FI; + IsBaseSet = true; } unsigned getFI() const { assert(isFIBase() && "Invalid base frame index access!"); @@ -98,13 +103,7 @@ class WebAssemblyFastISel final : public FastISel { int64_t getOffset() const { return Offset; } void setGlobalValue(const GlobalValue *G) { GV = G; } const GlobalValue *getGlobalValue() const { return GV; } - bool isSet() const { - if (isRegBase()) { - return Base.Reg != 0; - } else { - return Base.FI != 0; - } - } + bool isSet() const { return IsBaseSet; } }; /// Keep a pointer to the WebAssemblySubtarget around so that we can make the diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 5ff0d73534a6..085910f01ee6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -328,7 +328,9 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm), } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), - (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>; + (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)), + (CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 130589c9df8c..6b6394a58339 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -101,10 +101,12 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( WebAssemblyFrameLowering::getOpcConst(MF) && MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) { MachineOperand &ImmMO = Def->getOperand(1); - ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); - MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameRegister, /*isDef=*/false); - return; + if (ImmMO.isImm()) { + ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameRegister, /*isDef=*/false); + return; + } } } } diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp index 540ad98b6d54..540ad98b6d54 100755..100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToVex.cpp diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index c7ca6fb2a4fc..db6b68659493 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric( const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); uint64_t ProbeChunk = StackProbeSize * 8; + uint64_t MaxAlign = + TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; + // Synthesize a loop or unroll it, depending on the number of iterations. + // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left + // between the unaligned rsp and current rsp. if (Offset > ProbeChunk) { - emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset); + emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, + MaxAlign % StackProbeSize); } else { - emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset); + emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, + MaxAlign % StackProbeSize); } } void X86FrameLowering::emitStackProbeInlineGenericBlock( MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - uint64_t Offset) const { + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, + uint64_t AlignOffset) const { const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + uint64_t CurrentOffset = 0; - // 0 Thanks to return address being saved on the stack - uint64_t CurrentProbeOffset = 0; - // For the first N - 1 pages, just probe. I tried to take advantage of + assert(AlignOffset < StackProbeSize); + + // If the offset is so small it fits within a page, there's nothing to do. + if (StackProbeSize < Offset + AlignOffset) { + + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize - AlignOffset) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + NumFrameExtraProbe++; + CurrentOffset = StackProbeSize - AlignOffset; + } + + // For the next N - 1 pages, just probe. I tried to take advantage of // natural probes but it implies much more logic and there was very few // interesting natural probes to interleave. while (CurrentOffset + StackProbeSize < Offset) { @@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( .setMIFlag(MachineInstr::FrameSetup); NumFrameExtraProbe++; CurrentOffset += StackProbeSize; - CurrentProbeOffset += StackProbeSize; } + // No need to probe the tail, it is smaller than a Page. uint64_t ChunkSize = Offset - CurrentOffset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) @@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( void X86FrameLowering::emitStackProbeInlineGenericLoop( MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - uint64_t Offset) const { + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, + uint64_t AlignOffset) const { assert(Offset && "null offset"); const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); @@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + if (AlignOffset) { + if (AlignOffset < StackProbeSize) { + // Perform a first smaller allocation followed by a probe. + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(AlignOffset) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + NumFrameExtraProbe++; + Offset -= AlignOffset; + } + } + // Synthesize a loop NumFrameLoopProbe++; const BasicBlock *LLVM_BB = MBB.getBasicBlock(); @@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( // save loop bound { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed) + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); + BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) .addReg(FinalStackProbed) .addImm(Offset / StackProbeSize * StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); @@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( // allocate a page { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); - BuildMI(testMBB, DL, TII.get(Opc), StackPtr) + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) .addReg(StackPtr) .addImm(StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); @@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, uint64_t MaxAlign) const { uint64_t Val = -MaxAlign; unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) - .addReg(Reg) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + MachineFunction &MF = *MBB.getParent(); + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); + + // We want to make sure that (in worst case) less than StackProbeSize bytes + // are not probed after the AND. This assumption is used in + // emitStackProbeInlineGeneric. + if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { + { + NumFrameLoopProbe++; + MachineBasicBlock *entryMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *headMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *bodyMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *footMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + + MachineFunction::iterator MBBIter = MBB.getIterator(); + MF.insert(MBBIter, entryMBB); + MF.insert(MBBIter, headMBB); + MF.insert(MBBIter, bodyMBB); + MF.insert(MBBIter, footMBB); + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + + // Setup entry block + { + + entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI); + BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + MachineInstr *MI = + BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) + .addReg(FinalStackProbed) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + + BuildMI(entryMBB, DL, + TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) + .addMBB(&MBB) + .addImm(X86::COND_E) + .setMIFlag(MachineInstr::FrameSetup); + entryMBB->addSuccessor(headMBB); + entryMBB->addSuccessor(&MBB); + } + + // Loop entry block + + { + const unsigned SUBOpc = + getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + + BuildMI(headMBB, DL, + TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + // jump + BuildMI(headMBB, DL, TII.get(X86::JCC_1)) + .addMBB(footMBB) + .addImm(X86::COND_B) + .setMIFlag(MachineInstr::FrameSetup); + + headMBB->addSuccessor(bodyMBB); + headMBB->addSuccessor(footMBB); + } + + // setup loop body + { + addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + + const unsigned SUBOpc = + getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + + // cmp with stack pointer bound + BuildMI(bodyMBB, DL, + TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + // jump + BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) + .addMBB(bodyMBB) + .addImm(X86::COND_B) + .setMIFlag(MachineInstr::FrameSetup); + bodyMBB->addSuccessor(bodyMBB); + bodyMBB->addSuccessor(footMBB); + } + + // setup loop footer + { + BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) + .addReg(FinalStackProbed) + .setMIFlag(MachineInstr::FrameSetup); + addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + footMBB->addSuccessor(&MBB); + } + + recomputeLiveIns(*headMBB); + recomputeLiveIns(*bodyMBB); + recomputeLiveIns(*footMBB); + recomputeLiveIns(MBB); + } + } else { + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) + .addReg(Reg) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } } bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index c0b4be95f88d..bb2e83205e71 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -213,14 +213,14 @@ private: void emitStackProbeInlineGenericBlock(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, - uint64_t Offset) const; + const DebugLoc &DL, uint64_t Offset, + uint64_t Align) const; void emitStackProbeInlineGenericLoop(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, - uint64_t Offset) const; + const DebugLoc &DL, uint64_t Offset, + uint64_t Align) const; /// Emit a stub to later inline the target stack probe. MachineInstr *emitStackProbeInlineStub(MachineFunction &MF, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1671917157f4..56690c3c555b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30285,6 +30285,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(V); return; } + case ISD::BITREVERSE: + assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); + assert(Subtarget.hasXOP() && "Expected XOP"); + // We can use VPPERM by copying to a vector register and back. We'll need + // to move the scalar in two i32 pieces. + Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG)); + return; } } @@ -31876,7 +31883,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, BuildMI(testMBB, DL, TII->get(X86::JCC_1)) .addMBB(tailMBB) - .addImm(X86::COND_L); + .addImm(X86::COND_GE); testMBB->addSuccessor(blockMBB); testMBB->addSuccessor(tailMBB); @@ -31892,9 +31899,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, // // The property we want to enforce is to never have more than [page alloc] between two probes. - const unsigned MovMIOpc = - TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi; - addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) + const unsigned XORMIOpc = + TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; + addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) .addImm(0); BuildMI(blockMBB, DL, @@ -36018,8 +36025,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); // Share broadcast with the longest vector and extract low subvector (free). + // Ensure the same SDValue from the SDNode use is being used. for (SDNode *User : Src->uses()) if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && + Src == User->getOperand(0) && User->getValueSizeInBits(0) > VT.getSizeInBits()) { return extractSubVector(SDValue(User, 0), 0, DAG, DL, VT.getSizeInBits()); @@ -39588,10 +39597,14 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, // vselect Cond, 000..., X -> andn Cond, X if (TValIsAllZeros) { - MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64); - SDValue CastCond = DAG.getBitcast(AndNVT, Cond); - SDValue CastRHS = DAG.getBitcast(AndNVT, RHS); - SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS); + SDValue CastRHS = DAG.getBitcast(CondVT, RHS); + SDValue AndN; + // The canonical form differs for i1 vectors - x86andnp is not used + if (CondVT.getScalarType() == MVT::i1) + AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), + CastRHS); + else + AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS); return DAG.getBitcast(VT, AndN); } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 4aea7bc253bb..4aea7bc253bb 100755..100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 7fc96d1eda89..7fc96d1eda89 100755..100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 54c51b6e7161..f2588938d964 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -357,7 +357,7 @@ DeadArgumentEliminationPass::Liveness DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { // We're live if our use or its Function is already marked as live. - if (LiveFunctions.count(Use.F) || LiveValues.count(Use)) + if (IsLive(Use)) return Live; // We're maybe live otherwise, but remember that we must become live if @@ -657,10 +657,18 @@ void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L, MarkLive(RA); break; case MaybeLive: - // Note any uses of this value, so this return value can be - // marked live whenever one of the uses becomes live. - for (const auto &MaybeLiveUse : MaybeLiveUses) - Uses.insert(std::make_pair(MaybeLiveUse, RA)); + assert(!IsLive(RA) && "Use is already live!"); + for (const auto &MaybeLiveUse : MaybeLiveUses) { + if (IsLive(MaybeLiveUse)) { + // A use is live, so this value is live. + MarkLive(RA); + break; + } else { + // Note any uses of this value, so this value can be + // marked live whenever one of the uses becomes live. + Uses.insert(std::make_pair(MaybeLiveUse, RA)); + } + } break; } } @@ -686,17 +694,20 @@ void DeadArgumentEliminationPass::MarkLive(const Function &F) { /// mark any values that are used by this value (according to Uses) live as /// well. void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) { - if (LiveFunctions.count(RA.F)) - return; // Function was already marked Live. + if (IsLive(RA)) + return; // Already marked Live. - if (!LiveValues.insert(RA).second) - return; // We were already marked Live. + LiveValues.insert(RA); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking " << RA.getDescription() << " live\n"); PropagateLiveness(RA); } +bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) { + return LiveFunctions.count(RA.F) || LiveValues.count(RA); +} + /// PropagateLiveness - Given that RA is a live value, propagate it's liveness /// to any other values it uses (according to Uses). void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index fa695c39cd1e..1e43014e7d32 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -782,25 +782,24 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, // Match unsigned saturated add of 2 variables with an unnecessary 'not'. // There are 8 commuted variants. - // Canonicalize -1 (saturated result) to true value of the select. Just - // swapping the compare operands is legal, because the selected value is the - // same in case of equality, so we can interchange u< and u<=. + // Canonicalize -1 (saturated result) to true value of the select. if (match(FVal, m_AllOnes())) { std::swap(TVal, FVal); - std::swap(Cmp0, Cmp1); + Pred = CmpInst::getInversePredicate(Pred); } if (!match(TVal, m_AllOnes())) return nullptr; - // Canonicalize predicate to 'ULT'. - if (Pred == ICmpInst::ICMP_UGT) { - Pred = ICmpInst::ICMP_ULT; + // Canonicalize predicate to less-than or less-or-equal-than. + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { std::swap(Cmp0, Cmp1); + Pred = CmpInst::getSwappedPredicate(Pred); } - if (Pred != ICmpInst::ICMP_ULT) + if (Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_ULE) return nullptr; // Match unsigned saturated add of 2 variables with an unnecessary 'not'. + // Strictness of the comparison is irrelevant. Value *Y; if (match(Cmp0, m_Not(m_Value(X))) && match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) { @@ -809,6 +808,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y); } // The 'not' op may be included in the sum but not the compare. + // Strictness of the comparison is irrelevant. X = Cmp0; Y = Cmp1; if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) { @@ -819,7 +819,9 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1)); } // The overflow may be detected via the add wrapping round. - if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && + // This is only valid for strict comparison! + if (Pred == ICmpInst::ICMP_ULT && + match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) { // ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y) // ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index ee09a4d9db7e..1557fad4d372 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -792,7 +792,7 @@ private: StringRef InternalSuffix); Instruction *CreateAsanModuleDtor(Module &M); - bool canInstrumentAliasedGlobal(const GlobalAlias &GA) const; + const GlobalVariable *getExcludedAliasedGlobal(const GlobalAlias &GA) const; bool shouldInstrumentGlobal(GlobalVariable *G) const; bool ShouldUseMachOGlobalsSection() const; StringRef getGlobalMetadataSection() const; @@ -1784,20 +1784,22 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls( } } -bool ModuleAddressSanitizer::canInstrumentAliasedGlobal( - const GlobalAlias &GA) const { +const GlobalVariable * +ModuleAddressSanitizer::getExcludedAliasedGlobal(const GlobalAlias &GA) const { // In case this function should be expanded to include rules that do not just // apply when CompileKernel is true, either guard all existing rules with an // 'if (CompileKernel) { ... }' or be absolutely sure that all these rules // should also apply to user space. assert(CompileKernel && "Only expecting to be called when compiling kernel"); + const Constant *C = GA.getAliasee(); + // When compiling the kernel, globals that are aliased by symbols prefixed // by "__" are special and cannot be padded with a redzone. if (GA.getName().startswith("__")) - return false; + return dyn_cast<GlobalVariable>(C->stripPointerCastsAndAliases()); - return true; + return nullptr; } bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { @@ -2256,14 +2258,12 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, *CtorComdat = false; // Build set of globals that are aliased by some GA, where - // canInstrumentAliasedGlobal(GA) returns false. + // getExcludedAliasedGlobal(GA) returns the relevant GlobalVariable. SmallPtrSet<const GlobalVariable *, 16> AliasedGlobalExclusions; if (CompileKernel) { for (auto &GA : M.aliases()) { - if (const auto *GV = dyn_cast<GlobalVariable>(GA.getAliasee())) { - if (!canInstrumentAliasedGlobal(GA)) - AliasedGlobalExclusions.insert(GV); - } + if (const GlobalVariable *GV = getExcludedAliasedGlobal(GA)) + AliasedGlobalExclusions.insert(GV); } } |