aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp13
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp19
-rw-r--r--llvm/lib/IR/ConstantFold.cpp2
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp2
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp7
-rw-r--r--llvm/lib/MCA/HardwareUnits/LSUnit.cpp2
-rw-r--r--llvm/lib/Support/Host.cpp5
-rw-r--r--llvm/lib/Support/Windows/Path.inc21
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp17
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp30
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp134
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp38
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp8
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp24
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp4
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/Hexagon/HexagonAsmPrinter.h0
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/Sparc/LeonFeatures.td0
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/Sparc/LeonPasses.cpp0
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/Sparc/LeonPasses.h0
-rw-r--r--llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/Sparc/SparcSchedule.td0
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp17
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp10
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/X86/X86EvexToVex.cpp0
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp222
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.h8
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/X86/X86SchedBroadwell.td0
-rw-r--r--[-rwxr-xr-x]llvm/lib/Target/X86/X86SchedSkylakeServer.td0
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp29
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp20
38 files changed, 517 insertions, 207 deletions
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index 608fc0388af0..c3b039b05f30 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -132,19 +132,20 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
uint64_t end_offset = Offset + full_length;
while (*offset_ptr < end_offset) {
+ uint64_t EntryOffset = *offset_ptr;
arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
- if (arangeDescriptor.Length == 0) {
- // Each set of tuples is terminated by a 0 for the address and 0
- // for the length.
- if (arangeDescriptor.Address == 0 && *offset_ptr == end_offset)
+ // Each set of tuples is terminated by a 0 for the address and 0
+ // for the length.
+ if (arangeDescriptor.Length == 0 && arangeDescriptor.Address == 0) {
+ if (*offset_ptr == end_offset)
return ErrorSuccess();
return createStringError(
errc::invalid_argument,
"address range table at offset 0x%" PRIx64
- " has an invalid tuple (length = 0) at offset 0x%" PRIx64,
- Offset, *offset_ptr - tuple_size);
+ " has a premature terminator entry at offset 0x%" PRIx64,
+ Offset, EntryOffset);
}
ArangeDescriptors.push_back(arangeDescriptor);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 3d4cecce27db..d27fd08db14e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -286,10 +286,14 @@ const DWARFUnitIndex::Entry *DWARFUnitIndex::getFromHash(uint64_t S) const {
auto H = S & Mask;
auto HP = ((S >> 32) & Mask) | 1;
- while (Rows[H].getSignature() != S && Rows[H].getSignature() != 0)
+ // The spec says "while 0 is a valid hash value, the row index in a used slot
+ // will always be non-zero". Loop until we find a match or an empty slot.
+ while (Rows[H].getSignature() != S && Rows[H].Index != nullptr)
H = (H + HP) & Mask;
- if (Rows[H].getSignature() != S)
+ // If the slot is empty, we don't care whether the signature matches (it could
+ // be zero and still match the zeros in the empty slot).
+ if (Rows[H].Index == nullptr)
return nullptr;
return &Rows[H];
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 7e9b0690ccea..04f541b59557 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -308,7 +308,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
<< " SID: " << SectionID
<< " Offset: " << format("%p", (uintptr_t)Addr)
<< " flags: " << *FlagsOrErr << "\n");
- GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, *JITSymFlags);
+ if (!Name.empty()) // Skip absolute symbol relocations.
+ GlobalSymbolTable[Name] =
+ SymbolTableEntry(SectionID, Addr, *JITSymFlags);
} else if (SymType == object::SymbolRef::ST_Function ||
SymType == object::SymbolRef::ST_Data ||
SymType == object::SymbolRef::ST_Unknown ||
@@ -340,8 +342,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
<< " SID: " << SectionID
<< " Offset: " << format("%p", (uintptr_t)SectOffset)
<< " flags: " << *FlagsOrErr << "\n");
- GlobalSymbolTable[Name] =
- SymbolTableEntry(SectionID, SectOffset, *JITSymFlags);
+ if (!Name.empty()) // Skip absolute symbol relocations
+ GlobalSymbolTable[Name] =
+ SymbolTableEntry(SectionID, SectOffset, *JITSymFlags);
}
}
@@ -769,8 +772,9 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
LLVM_DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
<< format("%p", Addr) << "\n");
- GlobalSymbolTable[Name] =
- SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags));
+ if (!Name.empty()) // Skip absolute symbol relocations.
+ GlobalSymbolTable[Name] =
+ SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags));
Offset += Size;
Addr += Size;
}
@@ -930,6 +934,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
if (Loc == GlobalSymbolTable.end()) {
ExternalSymbolRelocations[SymbolName].push_back(RE);
} else {
+ assert(!SymbolName.empty() &&
+ "Empty symbol should not be in GlobalSymbolTable");
// Copy the RE since we want to modify its addend.
RelocationEntry RECopy = RE;
const auto &SymInfo = Loc->second;
@@ -1234,7 +1240,8 @@ void RuntimeDyldImpl::finalizeAsync(
for (auto &RelocKV : SharedThis->ExternalSymbolRelocations) {
StringRef Name = RelocKV.first();
- assert(!Name.empty() && "Symbol has no name?");
+ if (Name.empty()) // Skip absolute symbol relocations.
+ continue;
assert(!SharedThis->GlobalSymbolTable.count(Name) &&
"Name already processed. RuntimeDyld instances can not be re-used "
"when finalizing with finalizeAsync.");
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index f3c3e9ad9f69..c20d0955f3d8 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -1589,7 +1589,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1,
const GlobalValue *GV2) {
auto isGlobalUnsafeForEquality = [](const GlobalValue *GV) {
- if (GV->hasExternalWeakLinkage() || GV->hasWeakAnyLinkage())
+ if (GV->isInterposable() || GV->hasGlobalUnnamedAddr())
return true;
if (const auto *GVar = dyn_cast<GlobalVariable>(GV)) {
Type *Ty = GVar->getValueType();
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index b77a9635f64c..b9b4416fde21 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -317,6 +317,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
break;
case Triple::ppc64:
case Triple::ppc64le:
+ case Triple::aarch64:
+ case Triple::aarch64_be:
case Triple::x86_64:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index e5ab13bc719d..fb8215ef2281 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -644,10 +644,13 @@ EndStmt:
!(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS))
Error(loc, "changed section type for " + SectionName + ", expected: 0x" +
utohexstr(Section->getType()));
- if (Section->getFlags() != Flags)
+ // Check that flags are used consistently. However, the GNU assembler permits
+ // to leave out in subsequent uses of the same sections; for compatibility,
+ // do likewise.
+ if ((Flags || Size || !TypeName.empty()) && Section->getFlags() != Flags)
Error(loc, "changed section flags for " + SectionName + ", expected: 0x" +
utohexstr(Section->getFlags()));
- if (Section->getEntrySize() != Size)
+ if ((Flags || Size || !TypeName.empty()) && Section->getEntrySize() != Size)
Error(loc, "changed section entsize for " + SectionName +
", expected: " + Twine(Section->getEntrySize()));
diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
index e945e8cecce9..4594368fc0e9 100644
--- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -243,6 +243,8 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) {
CurrentStoreGroupID = 0;
if (GroupID == CurrentLoadBarrierGroupID)
CurrentLoadBarrierGroupID = 0;
+ if (GroupID == CurrentStoreBarrierGroupID)
+ CurrentStoreBarrierGroupID = 0;
}
}
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 658c1ee74cfe..36cecf9b2a16 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -760,14 +760,15 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Type = X86::INTEL_GOLDMONT_PLUS;
break;
case 0x86:
+ CPU = "tremont";
*Type = X86::INTEL_TREMONT;
break;
+ // Xeon Phi (Knights Landing + Knights Mill):
case 0x57:
- CPU = "tremont";
+ CPU = "knl";
*Type = X86::INTEL_KNL;
break;
-
case 0x85:
CPU = "knm";
*Type = X86::INTEL_KNM;
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index e352beb77616..a4ffc0ec4313 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -19,7 +19,6 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/WindowsError.h"
#include <fcntl.h>
-#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -352,13 +351,13 @@ std::error_code is_local(const Twine &path, bool &result) {
static std::error_code realPathFromHandle(HANDLE H,
SmallVectorImpl<wchar_t> &Buffer) {
DWORD CountChars = ::GetFinalPathNameByHandleW(
- H, Buffer.begin(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED);
- if (CountChars > Buffer.capacity()) {
+ H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED);
+ if (CountChars && CountChars >= Buffer.capacity()) {
// The buffer wasn't big enough, try again. In this case the return value
// *does* indicate the size of the null terminator.
Buffer.reserve(CountChars);
CountChars = ::GetFinalPathNameByHandleW(
- H, Buffer.data(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED);
+ H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED);
}
if (CountChars == 0)
return mapWindowsError(GetLastError());
@@ -403,6 +402,20 @@ std::error_code is_local(int FD, bool &Result) {
}
static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
+ // First, check if the file is on a network (non-local) drive. If so, don't
+ // set DeleteFile to true, since it prevents opening the file for writes.
+ SmallVector<wchar_t, 128> FinalPath;
+ if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
+ return EC;
+
+ bool IsLocal;
+ if (std::error_code EC = is_local_internal(FinalPath, IsLocal))
+ return EC;
+
+ if (!IsLocal)
+ return std::error_code();
+
+ // The file is on a local drive, set the DeleteFile to true.
FILE_DISPOSITION_INFO Disposition;
Disposition.DeleteFile = Delete;
if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 3a94820dac8d..7ec7ffe309f7 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -89,6 +89,8 @@ public:
void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned JTI);
+ void emitFunctionEntryLabel() override;
+
void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@@ -822,6 +824,19 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
OutStreamer->emitValue(Value, Size);
}
+void AArch64AsmPrinter::emitFunctionEntryLabel() {
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall ||
+ MF->getFunction().getCallingConv() ==
+ CallingConv::AArch64_SVE_VectorCall ||
+ STI->getRegisterInfo()->hasSVEArgsOrReturn(MF)) {
+ auto *TS =
+ static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->emitDirectiveVariantPCS(CurrentFnSym);
+ }
+
+ return AsmPrinter::emitFunctionEntryLabel();
+}
+
/// Small jump tables contain an unsigned byte or half, representing the offset
/// from the lowest-addressed possible destination to the desired basic
/// block. Since all instructions are 4-byte aligned, this is further compressed
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
index 84ec5afcc9c1..9ae2b465e247 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -35,6 +35,9 @@ static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
AArch64::Q3, AArch64::Q4, AArch64::Q5,
AArch64::Q6, AArch64::Q7};
+static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
+ AArch64::Z3, AArch64::Z4, AArch64::Z5,
+ AArch64::Z6, AArch64::Z7};
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
@@ -97,6 +100,8 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
RegList = DRegList;
else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
RegList = QRegList;
+ else if (LocVT.isScalableVector())
+ RegList = ZRegList;
else {
// Not an array we want to split up after all.
return false;
@@ -141,6 +146,10 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
+ if (LocVT.isScalableVector())
+ report_fatal_error(
+ "Passing consecutive scalable vector registers unsupported");
+
// Mark all regs in the class as unavailable
for (auto Reg : RegList)
State.AllocateReg(Reg);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 45bfa85bdc07..48ca9039b1bd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4366,6 +4366,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Outs[i].VT;
+ if (!Outs[i].IsFixed && ArgVT.isScalableVector())
+ report_fatal_error("Passing SVE types to variadic functions is "
+ "currently not supported");
+
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
/*IsVarArg=*/ !Outs[i].IsFixed);
@@ -6168,6 +6172,10 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
Chain = VAList.getValue(1);
VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
+ if (VT.isScalableVector())
+ report_fatal_error("Passing SVE types to variadic functions is "
+ "currently not supported");
+
if (Align && *Align > MinSlotSize) {
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align->value() - 1, DL, PtrVT));
@@ -14702,7 +14710,14 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
- return Ty->isArrayTy();
+ if (Ty->isArrayTy())
+ return true;
+
+ const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
+ if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
+ return true;
+
+ return false;
}
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 0ac09c4f96f0..e72ae0e62cb7 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -179,6 +179,8 @@ private:
bool parseDirectiveCFINegateRAState();
bool parseDirectiveCFIBKeyFrame();
+ bool parseDirectiveVariantPCS(SMLoc L);
+
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -5077,6 +5079,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveCFIBKeyFrame();
else if (IDVal == ".arch_extension")
parseDirectiveArchExtension(Loc);
+ else if (IDVal == ".variant_pcs")
+ parseDirectiveVariantPCS(Loc);
else if (IsMachO) {
if (IDVal == MCLOHDirectiveName())
parseDirectiveLOH(IDVal, Loc);
@@ -5507,6 +5511,32 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
return false;
}
+/// parseDirectiveVariantPCS
+/// ::= .variant_pcs symbolname
+bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef SymbolName = Tok.getIdentifier();
+
+ MCSymbol *Sym = getContext().lookupSymbol(SymbolName);
+ if (!Sym)
+ return TokError("unknown symbol in '.variant_pcs' directive");
+
+ Parser.Lex(); // Eat the symbol
+
+ // Shouldn't be any more tokens
+ if (parseToken(AsmToken::EndOfStatement))
+ return addErrorSuffix(" in '.variant_pcs' directive");
+
+ getTargetStreamer().emitDirectiveVariantPCS(Sym);
+
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 408f0cb77e73..7733fe7f7b24 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -289,14 +289,15 @@ private:
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
bool IsLoadStore = false) const;
- /// Instructions that accept extend modifiers like UXTW expect the register
- /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
- /// subregister copy if necessary. Return either ExtReg, or the result of the
- /// new copy.
- Register narrowExtendRegIfNeeded(Register ExtReg,
- MachineIRBuilder &MIB) const;
- Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
- MachineIRBuilder &MIB) const;
+ /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
+ ///
+ /// \returns Either \p Reg if no change was necessary, or the new register
+ /// created by moving \p Reg.
+ ///
+ /// Note: This uses emitCopy right now.
+ Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
+ MachineIRBuilder &MIB) const;
+
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
@@ -1195,10 +1196,10 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
// TBNZW work.
bool UseWReg = Bit < 32;
unsigned NecessarySize = UseWReg ? 32 : 64;
- if (Size < NecessarySize)
- TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
- else if (Size > NecessarySize)
- TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
+ if (Size != NecessarySize)
+ TestReg = moveScalarRegClass(
+ TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
+ MIB);
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
{AArch64::TBZW, AArch64::TBNZW}};
@@ -4904,9 +4905,19 @@ AArch64InstructionSelector::selectExtendedSHL(
return None;
unsigned OffsetOpc = OffsetInst->getOpcode();
- if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
- return None;
+ bool LookedThroughZExt = false;
+ if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
+ // Try to look through a ZEXT.
+ if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
+ return None;
+
+ OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
+ OffsetOpc = OffsetInst->getOpcode();
+ LookedThroughZExt = true;
+ if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
+ return None;
+ }
// Make sure that the memory op is a valid size.
int64_t LegalShiftVal = Log2_32(SizeInBytes);
if (LegalShiftVal == 0)
@@ -4957,21 +4968,24 @@ AArch64InstructionSelector::selectExtendedSHL(
unsigned SignExtend = 0;
if (WantsExt) {
- // Check if the offset is defined by an extend.
- MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
- auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
- if (Ext == AArch64_AM::InvalidShiftExtend)
- return None;
+ // Check if the offset is defined by an extend, unless we looked through a
+ // G_ZEXT earlier.
+ if (!LookedThroughZExt) {
+ MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
+ auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
- SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
- // We only support SXTW for signed extension here.
- if (SignExtend && Ext != AArch64_AM::SXTW)
- return None;
+ SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
+ // We only support SXTW for signed extension here.
+ if (SignExtend && Ext != AArch64_AM::SXTW)
+ return None;
+ OffsetReg = ExtInst->getOperand(1).getReg();
+ }
// Need a 32-bit wide register here.
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
- OffsetReg = ExtInst->getOperand(1).getReg();
- OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
+ OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
}
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
@@ -5143,8 +5157,8 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
// Need a 32-bit wide register.
MachineIRBuilder MIB(*PtrAdd);
- Register ExtReg =
- narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
+ Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
+ AArch64::GPR32RegClass, MIB);
unsigned SignExtend = Ext == AArch64_AM::SXTW;
// Base is LHS, offset is ExtReg.
@@ -5418,67 +5432,21 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
}
}
-Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
- Register ExtReg, MachineIRBuilder &MIB) const {
+Register AArch64InstructionSelector::moveScalarRegClass(
+ Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
- if (MRI.getType(ExtReg).getSizeInBits() == 32)
- return ExtReg;
-
- // Insert a copy to move ExtReg to GPR32.
- Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
+ auto Ty = MRI.getType(Reg);
+ assert(!Ty.isVector() && "Expected scalars only!");
+ if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
+ return Reg;
- // Select the copy into a subregister copy.
+ // Create a copy and immediately select it.
+ // FIXME: We should have an emitCopy function?
+ auto Copy = MIB.buildCopy({&RC}, {Reg});
selectCopy(*Copy, TII, MRI, TRI, RBI);
return Copy.getReg(0);
}
-Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
- Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
- assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
- assert(WideSize >= NarrowSize &&
- "WideSize cannot be smaller than NarrowSize!");
-
- // If the sizes match, just return the register.
- //
- // If NarrowSize is an s1, then we can select it to any size, so we'll treat
- // it as a don't care.
- if (NarrowSize == WideSize || NarrowSize == 1)
- return Reg;
-
- // Now check the register classes.
- const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
- const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
- const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
- assert(OrigRC && "Could not determine narrow RC?");
- assert(WideRC && "Could not determine wide RC?");
-
- // If the sizes differ, but the register classes are the same, there is no
- // need to insert a SUBREG_TO_REG.
- //
- // For example, an s8 that's supposed to be a GPR will be selected to either
- // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
- // always end up on a GPR32.
- if (OrigRC == WideRC)
- return Reg;
-
- // We have two different register classes. Insert a SUBREG_TO_REG.
- unsigned SubReg = 0;
- getSubRegForClass(OrigRC, TRI, SubReg);
- assert(SubReg && "Couldn't determine subregister?");
-
- // Build the SUBREG_TO_REG and return the new, widened register.
- auto SubRegToReg =
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
- .addImm(0)
- .addUse(Reg)
- .addImm(SubReg);
- constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
- return SubRegToReg.getReg(0);
-}
-
/// Select an "extended register" operand. This operand folds in an extend
/// followed by an optional left shift.
InstructionSelector::ComplexRendererFns
@@ -5539,7 +5507,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
// copy.
MachineIRBuilder MIB(*RootDef);
- ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
+ ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
[=](MachineInstrBuilder &MIB) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 2eaec0b970fa..4ffde2a7e3c4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -97,15 +97,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.moreElementsToNextPow2(0);
getActionDefinitionsBuilder(G_SHL)
- .legalFor({{s32, s32}, {s64, s64},
- {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
- .clampScalar(1, s32, s64)
- .clampScalar(0, s32, s64)
- .widenScalarToNextPow2(0)
- .clampNumElements(0, v2s32, v4s32)
- .clampNumElements(0, v2s64, v2s64)
- .moreElementsToNextPow2(0)
- .minScalarSameAs(1, 0);
+ .customIf([=](const LegalityQuery &Query) {
+ const auto &SrcTy = Query.Types[0];
+ const auto &AmtTy = Query.Types[1];
+ return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
+ AmtTy.getSizeInBits() == 32;
+ })
+ .legalFor({{s32, s32},
+ {s64, s64},
+ {s32, s64},
+ {v2s32, v2s32},
+ {v4s32, v4s32},
+ {v2s64, v2s64}})
+ .clampScalar(1, s32, s64)
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(0)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0)
+ .minScalarSameAs(1, 0);
getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s64}, {v2p0, v2s64}})
@@ -710,16 +720,14 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
// imported patterns can select it later. Either way, it will be legal.
Register AmtReg = MI.getOperand(2).getReg();
- auto *CstMI = MRI.getVRegDef(AmtReg);
- assert(CstMI && "expected to find a vreg def");
- if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
+ auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
+ if (!VRegAndVal)
return true;
// Check the shift amount is in range for an immediate form.
- unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
+ int64_t Amount = VRegAndVal->Value;
if (Amount > 31)
return true; // This will have to remain a register variant.
- assert(MRI.getType(AmtReg).getSizeInBits() == 32);
- auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
+ auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
MI.getOperand(2).setReg(ExtCst.getReg(0));
return true;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 7e3ff1948dad..93213f5977e5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -261,6 +261,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AArch64::GPR64common_and_GPR64noipRegClassID:
case AArch64::GPR64noip_and_tcGPR64RegClassID:
case AArch64::tcGPR64RegClassID:
+ case AArch64::rtcGPR64RegClassID:
case AArch64::WSeqPairsClassRegClassID:
case AArch64::XSeqPairsClassRegClassID:
return getRegBank(AArch64::GPRRegBankID);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index fe4c34be1519..6dfda8217628 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -47,6 +47,10 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
void emitInst(uint32_t Inst) override;
+ void emitDirectiveVariantPCS(MCSymbol *Symbol) override {
+ OS << "\t.variant_pcs " << Symbol->getName() << "\n";
+ }
+
public:
AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
};
@@ -194,6 +198,10 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
getStreamer().emitInst(Inst);
}
+void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) {
+ cast<MCSymbolELF>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS);
+}
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 3a0c5d8318dd..1af978a806d1 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -36,6 +36,9 @@ public:
/// Callback used to implement the .inst directive.
virtual void emitInst(uint32_t Inst);
+ /// Callback used to implement the .variant_pcs directive.
+ virtual void emitDirectiveVariantPCS(MCSymbol *Symbol) {};
+
virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
@@ -63,6 +66,7 @@ private:
AArch64ELFStreamer &getStreamer();
void emitInst(uint32_t Inst) override;
+ void emitDirectiveVariantPCS(MCSymbol *Symbol) override;
public:
AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index f31c722db1b2..442be886a8ac 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -254,16 +254,24 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
- if (MBBE != MBB.end()) {
- MachineInstr &MI = *MBBE;
+ MachineBasicBlock::iterator TermI = MBBE;
+ // Check first terminator for VCC branches to optimize
+ if (TermI != MBB.end()) {
+ MachineInstr &MI = *TermI;
switch (MI.getOpcode()) {
case AMDGPU::S_CBRANCH_VCCZ:
case AMDGPU::S_CBRANCH_VCCNZ:
Changed |= optimizeVccBranch(MI);
continue;
- case AMDGPU::SI_RETURN_TO_EPILOG:
- // FIXME: This is not an optimization and should be
- // moved somewhere else.
+ default:
+ break;
+ }
+ }
+ // Check all terminators for SI_RETURN_TO_EPILOG
+ // FIXME: This is not an optimization and should be moved somewhere else.
+ while (TermI != MBB.end()) {
+ MachineInstr &MI = *TermI;
+ if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
// Graphics shaders returning non-void shouldn't contain S_ENDPGM,
@@ -281,11 +289,11 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
.addMBB(EmptyMBBAtEnd);
MI.eraseFromParent();
MBBE = MBB.getFirstTerminator();
+ TermI = MBBE;
+ continue;
}
- break;
- default:
- break;
}
+ TermI++;
}
if (!ST.hasVGPRIndexMode())
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index d407edfbd966..77f565fb5957 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -254,7 +254,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
const LoadSDNode *LD = cast<LoadSDNode>(Node);
uint64_t size = LD->getMemOperand()->getSize();
- if (!size || size > 8 || (size & (size - 1)))
+ if (!size || size > 8 || (size & (size - 1)) || !LD->isSimple())
return;
SDNode *LDAddrNode = LD->getOperand(1).getNode();
@@ -342,7 +342,7 @@ bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node,
unsigned char *ByteSeq) {
const GlobalVariable *V = dyn_cast<GlobalVariable>(Node->getGlobal());
- if (!V || !V->hasInitializer())
+ if (!V || !V->hasInitializer() || !V->isConstant())
return false;
const Constant *Init = V->getInitializer();
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 3932def87854..3932def87854 100755..100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
diff --git a/llvm/lib/Target/Sparc/LeonFeatures.td b/llvm/lib/Target/Sparc/LeonFeatures.td
index 75273eff1868..75273eff1868 100755..100644
--- a/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/llvm/lib/Target/Sparc/LeonFeatures.td
diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp
index e9d3aaeb9cfe..e9d3aaeb9cfe 100755..100644
--- a/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/llvm/lib/Target/Sparc/LeonPasses.cpp
diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h
index b165bc93780f..b165bc93780f 100755..100644
--- a/llvm/lib/Target/Sparc/LeonPasses.h
+++ b/llvm/lib/Target/Sparc/LeonPasses.h
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 069e43c6f544..7845a18b14c1 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -351,7 +351,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_Immediate:
- O << (int)MO.getImm();
+ O << MO.getImm();
break;
case MachineOperand::MO_MachineBasicBlock:
MO.getMBB()->getSymbol()->print(O, MAI);
diff --git a/llvm/lib/Target/Sparc/SparcSchedule.td b/llvm/lib/Target/Sparc/SparcSchedule.td
index 0f05372b7050..0f05372b7050 100755..100644
--- a/llvm/lib/Target/Sparc/SparcSchedule.td
+++ b/llvm/lib/Target/Sparc/SparcSchedule.td
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 8a0092a3f298..c2a0d3e01740 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -58,6 +58,9 @@ class WebAssemblyFastISel final : public FastISel {
int FI;
} Base;
+ // Whether the base has been determined yet
+ bool IsBaseSet = false;
+
int64_t Offset = 0;
const GlobalValue *GV = nullptr;
@@ -74,8 +77,9 @@ class WebAssemblyFastISel final : public FastISel {
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
- assert(Base.Reg == 0 && "Overwriting non-zero register");
+ assert(!IsBaseSet && "Base cannot be reset");
Base.Reg = Reg;
+ IsBaseSet = true;
}
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
@@ -83,8 +87,9 @@ class WebAssemblyFastISel final : public FastISel {
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
- assert(Base.FI == 0 && "Overwriting non-zero frame index");
+ assert(!IsBaseSet && "Base cannot be reset");
Base.FI = FI;
+ IsBaseSet = true;
}
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
@@ -98,13 +103,7 @@ class WebAssemblyFastISel final : public FastISel {
int64_t getOffset() const { return Offset; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() const { return GV; }
- bool isSet() const {
- if (isRegBase()) {
- return Base.Reg != 0;
- } else {
- return Base.FI != 0;
- }
- }
+ bool isSet() const { return IsBaseSet; }
};
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 5ff0d73534a6..085910f01ee6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -328,7 +328,9 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
- (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>;
+ (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
+ (CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
(GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 130589c9df8c..6b6394a58339 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -101,10 +101,12 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
WebAssemblyFrameLowering::getOpcConst(MF) &&
MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) {
MachineOperand &ImmMO = Def->getOperand(1);
- ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
- MI.getOperand(FIOperandNum)
- .ChangeToRegister(FrameRegister, /*isDef=*/false);
- return;
+ if (ImmMO.isImm()) {
+ ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(FrameRegister, /*isDef=*/false);
+ return;
+ }
}
}
}
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp
index 540ad98b6d54..540ad98b6d54 100755..100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToVex.cpp
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index c7ca6fb2a4fc..db6b68659493 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
uint64_t ProbeChunk = StackProbeSize * 8;
+ uint64_t MaxAlign =
+ TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
+
// Synthesize a loop or unroll it, depending on the number of iterations.
+ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
+ // between the unaligned rsp and current rsp.
if (Offset > ProbeChunk) {
- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
+ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
+ MaxAlign % StackProbeSize);
} else {
- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
+ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
+ MaxAlign % StackProbeSize);
}
}
void X86FrameLowering::emitStackProbeInlineGenericBlock(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- uint64_t Offset) const {
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
+ uint64_t AlignOffset) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+
uint64_t CurrentOffset = 0;
- // 0 Thanks to return address being saved on the stack
- uint64_t CurrentProbeOffset = 0;
- // For the first N - 1 pages, just probe. I tried to take advantage of
+ assert(AlignOffset < StackProbeSize);
+
+ // If the offset is so small it fits within a page, there's nothing to do.
+ if (StackProbeSize < Offset + AlignOffset) {
+
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize - AlignOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ NumFrameExtraProbe++;
+ CurrentOffset = StackProbeSize - AlignOffset;
+ }
+
+ // For the next N - 1 pages, just probe. I tried to take advantage of
// natural probes but it implies much more logic and there was very few
// interesting natural probes to interleave.
while (CurrentOffset + StackProbeSize < Offset) {
@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
.setMIFlag(MachineInstr::FrameSetup);
NumFrameExtraProbe++;
CurrentOffset += StackProbeSize;
- CurrentProbeOffset += StackProbeSize;
}
+ // No need to probe the tail, it is smaller than a Page.
uint64_t ChunkSize = Offset - CurrentOffset;
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
void X86FrameLowering::emitStackProbeInlineGenericLoop(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- uint64_t Offset) const {
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
+ uint64_t AlignOffset) const {
assert(Offset && "null offset");
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ if (AlignOffset) {
+ if (AlignOffset < StackProbeSize) {
+ // Perform a first smaller allocation followed by a probe.
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(AlignOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ NumFrameExtraProbe++;
+ Offset -= AlignOffset;
+ }
+ }
+
// Synthesize a loop
NumFrameLoopProbe++;
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// save loop bound
{
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
.addReg(FinalStackProbed)
.addImm(Offset / StackProbeSize * StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// allocate a page
{
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
- BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
uint64_t MaxAlign) const {
uint64_t Val = -MaxAlign;
unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
- .addReg(Reg)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
+ MachineFunction &MF = *MBB.getParent();
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
+
+ // We want to make sure that (in worst case) less than StackProbeSize bytes
+ // are not probed after the AND. This assumption is used in
+ // emitStackProbeInlineGeneric.
+ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
+ {
+ NumFrameLoopProbe++;
+ MachineBasicBlock *entryMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *headMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *bodyMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *footMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MachineFunction::iterator MBBIter = MBB.getIterator();
+ MF.insert(MBBIter, entryMBB);
+ MF.insert(MBBIter, headMBB);
+ MF.insert(MBBIter, bodyMBB);
+ MF.insert(MBBIter, footMBB);
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+
+ // Setup entry block
+ {
+
+ entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
+ BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MachineInstr *MI =
+ BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
+ .addReg(FinalStackProbed)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+
+ BuildMI(entryMBB, DL,
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(&MBB)
+ .addImm(X86::COND_E)
+ .setMIFlag(MachineInstr::FrameSetup);
+ entryMBB->addSuccessor(headMBB);
+ entryMBB->addSuccessor(&MBB);
+ }
+
+ // Loop entry block
+
+ {
+ const unsigned SUBOpc =
+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ BuildMI(headMBB, DL,
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // jump
+ BuildMI(headMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(footMBB)
+ .addImm(X86::COND_B)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ headMBB->addSuccessor(bodyMBB);
+ headMBB->addSuccessor(footMBB);
+ }
+
+ // setup loop body
+ {
+ addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ const unsigned SUBOpc =
+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // cmp with stack pointer bound
+ BuildMI(bodyMBB, DL,
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // jump
+ BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(bodyMBB)
+ .addImm(X86::COND_B)
+ .setMIFlag(MachineInstr::FrameSetup);
+ bodyMBB->addSuccessor(bodyMBB);
+ bodyMBB->addSuccessor(footMBB);
+ }
+
+ // setup loop footer
+ {
+ BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
+ .addReg(FinalStackProbed)
+ .setMIFlag(MachineInstr::FrameSetup);
+ addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ footMBB->addSuccessor(&MBB);
+ }
+
+ recomputeLiveIns(*headMBB);
+ recomputeLiveIns(*bodyMBB);
+ recomputeLiveIns(*footMBB);
+ recomputeLiveIns(MBB);
+ }
+ } else {
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
+ .addReg(Reg)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
}
bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index c0b4be95f88d..bb2e83205e71 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -213,14 +213,14 @@ private:
void emitStackProbeInlineGenericBlock(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- uint64_t Offset) const;
+ const DebugLoc &DL, uint64_t Offset,
+ uint64_t Align) const;
void emitStackProbeInlineGenericLoop(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- uint64_t Offset) const;
+ const DebugLoc &DL, uint64_t Offset,
+ uint64_t Align) const;
/// Emit a stub to later inline the target stack probe.
MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1671917157f4..56690c3c555b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30285,6 +30285,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V);
return;
}
+ case ISD::BITREVERSE:
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ assert(Subtarget.hasXOP() && "Expected XOP");
+ // We can use VPPERM by copying to a vector register and back. We'll need
+ // to move the scalar in two i32 pieces.
+ Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
+ return;
}
}
@@ -31876,7 +31883,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
.addMBB(tailMBB)
- .addImm(X86::COND_L);
+ .addImm(X86::COND_GE);
testMBB->addSuccessor(blockMBB);
testMBB->addSuccessor(tailMBB);
@@ -31892,9 +31899,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
//
// The property we want to enforce is to never have more than [page alloc] between two probes.
- const unsigned MovMIOpc =
- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
+ const unsigned XORMIOpc =
+ TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
+ addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
.addImm(0);
BuildMI(blockMBB, DL,
@@ -36018,8 +36025,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
// Share broadcast with the longest vector and extract low subvector (free).
+ // Ensure the same SDValue from the SDNode use is being used.
for (SDNode *User : Src->uses())
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
+ Src == User->getOperand(0) &&
User->getValueSizeInBits(0) > VT.getSizeInBits()) {
return extractSubVector(SDValue(User, 0), 0, DAG, DL,
VT.getSizeInBits());
@@ -39588,10 +39597,14 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
// vselect Cond, 000..., X -> andn Cond, X
if (TValIsAllZeros) {
- MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64);
- SDValue CastCond = DAG.getBitcast(AndNVT, Cond);
- SDValue CastRHS = DAG.getBitcast(AndNVT, RHS);
- SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS);
+ SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
+ SDValue AndN;
+ // The canonical form differs for i1 vectors - x86andnp is not used
+ if (CondVT.getScalarType() == MVT::i1)
+ AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
+ CastRHS);
+ else
+ AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
return DAG.getBitcast(VT, AndN);
}
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 4aea7bc253bb..4aea7bc253bb 100755..100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 7fc96d1eda89..7fc96d1eda89 100755..100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 54c51b6e7161..f2588938d964 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -357,7 +357,7 @@ DeadArgumentEliminationPass::Liveness
DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
UseVector &MaybeLiveUses) {
// We're live if our use or its Function is already marked as live.
- if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+ if (IsLive(Use))
return Live;
// We're maybe live otherwise, but remember that we must become live if
@@ -657,10 +657,18 @@ void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L,
MarkLive(RA);
break;
case MaybeLive:
- // Note any uses of this value, so this return value can be
- // marked live whenever one of the uses becomes live.
- for (const auto &MaybeLiveUse : MaybeLiveUses)
- Uses.insert(std::make_pair(MaybeLiveUse, RA));
+ assert(!IsLive(RA) && "Use is already live!");
+ for (const auto &MaybeLiveUse : MaybeLiveUses) {
+ if (IsLive(MaybeLiveUse)) {
+ // A use is live, so this value is live.
+ MarkLive(RA);
+ break;
+ } else {
+ // Note any uses of this value, so this value can be
+ // marked live whenever one of the uses becomes live.
+ Uses.insert(std::make_pair(MaybeLiveUse, RA));
+ }
+ }
break;
}
}
@@ -686,17 +694,20 @@ void DeadArgumentEliminationPass::MarkLive(const Function &F) {
/// mark any values that are used by this value (according to Uses) live as
/// well.
void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) {
- if (LiveFunctions.count(RA.F))
- return; // Function was already marked Live.
+ if (IsLive(RA))
+ return; // Already marked Live.
- if (!LiveValues.insert(RA).second)
- return; // We were already marked Live.
+ LiveValues.insert(RA);
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking "
<< RA.getDescription() << " live\n");
PropagateLiveness(RA);
}
+bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) {
+ return LiveFunctions.count(RA.F) || LiveValues.count(RA);
+}
+
/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
/// to any other values it uses (according to Uses).
void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index fa695c39cd1e..1e43014e7d32 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -782,25 +782,24 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
// There are 8 commuted variants.
- // Canonicalize -1 (saturated result) to true value of the select. Just
- // swapping the compare operands is legal, because the selected value is the
- // same in case of equality, so we can interchange u< and u<=.
+ // Canonicalize -1 (saturated result) to true value of the select.
if (match(FVal, m_AllOnes())) {
std::swap(TVal, FVal);
- std::swap(Cmp0, Cmp1);
+ Pred = CmpInst::getInversePredicate(Pred);
}
if (!match(TVal, m_AllOnes()))
return nullptr;
- // Canonicalize predicate to 'ULT'.
- if (Pred == ICmpInst::ICMP_UGT) {
- Pred = ICmpInst::ICMP_ULT;
+ // Canonicalize predicate to less-than or less-or-equal-than.
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
std::swap(Cmp0, Cmp1);
+ Pred = CmpInst::getSwappedPredicate(Pred);
}
- if (Pred != ICmpInst::ICMP_ULT)
+ if (Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_ULE)
return nullptr;
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+ // Strictness of the comparison is irrelevant.
Value *Y;
if (match(Cmp0, m_Not(m_Value(X))) &&
match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
@@ -809,6 +808,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
}
// The 'not' op may be included in the sum but not the compare.
+ // Strictness of the comparison is irrelevant.
X = Cmp0;
Y = Cmp1;
if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
@@ -819,7 +819,9 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
}
// The overflow may be detected via the add wrapping round.
- if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
+ // This is only valid for strict comparison!
+ if (Pred == ICmpInst::ICMP_ULT &&
+ match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {
// ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y)
// ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index ee09a4d9db7e..1557fad4d372 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -792,7 +792,7 @@ private:
StringRef InternalSuffix);
Instruction *CreateAsanModuleDtor(Module &M);
- bool canInstrumentAliasedGlobal(const GlobalAlias &GA) const;
+ const GlobalVariable *getExcludedAliasedGlobal(const GlobalAlias &GA) const;
bool shouldInstrumentGlobal(GlobalVariable *G) const;
bool ShouldUseMachOGlobalsSection() const;
StringRef getGlobalMetadataSection() const;
@@ -1784,20 +1784,22 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls(
}
}
-bool ModuleAddressSanitizer::canInstrumentAliasedGlobal(
- const GlobalAlias &GA) const {
+const GlobalVariable *
+ModuleAddressSanitizer::getExcludedAliasedGlobal(const GlobalAlias &GA) const {
// In case this function should be expanded to include rules that do not just
// apply when CompileKernel is true, either guard all existing rules with an
// 'if (CompileKernel) { ... }' or be absolutely sure that all these rules
// should also apply to user space.
assert(CompileKernel && "Only expecting to be called when compiling kernel");
+ const Constant *C = GA.getAliasee();
+
// When compiling the kernel, globals that are aliased by symbols prefixed
// by "__" are special and cannot be padded with a redzone.
if (GA.getName().startswith("__"))
- return false;
+ return dyn_cast<GlobalVariable>(C->stripPointerCastsAndAliases());
- return true;
+ return nullptr;
}
bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
@@ -2256,14 +2258,12 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
*CtorComdat = false;
// Build set of globals that are aliased by some GA, where
- // canInstrumentAliasedGlobal(GA) returns false.
+ // getExcludedAliasedGlobal(GA) returns the relevant GlobalVariable.
SmallPtrSet<const GlobalVariable *, 16> AliasedGlobalExclusions;
if (CompileKernel) {
for (auto &GA : M.aliases()) {
- if (const auto *GV = dyn_cast<GlobalVariable>(GA.getAliasee())) {
- if (!canInstrumentAliasedGlobal(GA))
- AliasedGlobalExclusions.insert(GV);
- }
+ if (const GlobalVariable *GV = getExcludedAliasedGlobal(GA))
+ AliasedGlobalExclusions.insert(GV);
}
}