aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/X86')
-rw-r--r--[-rwxr-xr-x]contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp0
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp222
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--[-rwxr-xr-x]contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td0
-rw-r--r--[-rwxr-xr-x]contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td0
6 files changed, 227 insertions, 32 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
index 540ad98b6d54..540ad98b6d54 100755..100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index c7ca6fb2a4fc..db6b68659493 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
uint64_t ProbeChunk = StackProbeSize * 8;
+ uint64_t MaxAlign =
+ TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
+
// Synthesize a loop or unroll it, depending on the number of iterations.
+ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
+ // between the unaligned rsp and current rsp.
if (Offset > ProbeChunk) {
- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
+ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
+ MaxAlign % StackProbeSize);
} else {
- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
+ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
+ MaxAlign % StackProbeSize);
}
}
void X86FrameLowering::emitStackProbeInlineGenericBlock(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- uint64_t Offset) const {
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
+ uint64_t AlignOffset) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+
uint64_t CurrentOffset = 0;
- // 0 Thanks to return address being saved on the stack
- uint64_t CurrentProbeOffset = 0;
- // For the first N - 1 pages, just probe. I tried to take advantage of
+ assert(AlignOffset < StackProbeSize);
+
+ // If the offset is so small it fits within a page, there's nothing to do.
+ if (StackProbeSize < Offset + AlignOffset) {
+
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize - AlignOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ NumFrameExtraProbe++;
+ CurrentOffset = StackProbeSize - AlignOffset;
+ }
+
+ // For the next N - 1 pages, just probe. I tried to take advantage of
// natural probes but it implies much more logic and there was very few
// interesting natural probes to interleave.
while (CurrentOffset + StackProbeSize < Offset) {
@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
.setMIFlag(MachineInstr::FrameSetup);
NumFrameExtraProbe++;
CurrentOffset += StackProbeSize;
- CurrentProbeOffset += StackProbeSize;
}
+ // No need to probe the tail, it is smaller than a Page.
uint64_t ChunkSize = Offset - CurrentOffset;
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
void X86FrameLowering::emitStackProbeInlineGenericLoop(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- uint64_t Offset) const {
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
+ uint64_t AlignOffset) const {
assert(Offset && "null offset");
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ if (AlignOffset) {
+ if (AlignOffset < StackProbeSize) {
+ // Perform a first smaller allocation followed by a probe.
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(AlignOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ NumFrameExtraProbe++;
+ Offset -= AlignOffset;
+ }
+ }
+
// Synthesize a loop
NumFrameLoopProbe++;
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// save loop bound
{
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
.addReg(FinalStackProbed)
.addImm(Offset / StackProbeSize * StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// allocate a page
{
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
- BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);
@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
uint64_t MaxAlign) const {
uint64_t Val = -MaxAlign;
unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
- .addReg(Reg)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
+ MachineFunction &MF = *MBB.getParent();
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
+
+ // We want to make sure that (in worst case) less than StackProbeSize bytes
+ // are not probed after the AND. This assumption is used in
+ // emitStackProbeInlineGeneric.
+ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
+ {
+ NumFrameLoopProbe++;
+ MachineBasicBlock *entryMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *headMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *bodyMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *footMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MachineFunction::iterator MBBIter = MBB.getIterator();
+ MF.insert(MBBIter, entryMBB);
+ MF.insert(MBBIter, headMBB);
+ MF.insert(MBBIter, bodyMBB);
+ MF.insert(MBBIter, footMBB);
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+
+ // Setup entry block
+ {
+
+ entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
+ BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MachineInstr *MI =
+ BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
+ .addReg(FinalStackProbed)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+
+ BuildMI(entryMBB, DL,
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(&MBB)
+ .addImm(X86::COND_E)
+ .setMIFlag(MachineInstr::FrameSetup);
+ entryMBB->addSuccessor(headMBB);
+ entryMBB->addSuccessor(&MBB);
+ }
+
+ // Loop entry block
+
+ {
+ const unsigned SUBOpc =
+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ BuildMI(headMBB, DL,
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // jump
+ BuildMI(headMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(footMBB)
+ .addImm(X86::COND_B)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ headMBB->addSuccessor(bodyMBB);
+ headMBB->addSuccessor(footMBB);
+ }
+
+ // setup loop body
+ {
+ addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ const unsigned SUBOpc =
+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // cmp with stack pointer bound
+ BuildMI(bodyMBB, DL,
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // jump
+ BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(bodyMBB)
+ .addImm(X86::COND_B)
+ .setMIFlag(MachineInstr::FrameSetup);
+ bodyMBB->addSuccessor(bodyMBB);
+ bodyMBB->addSuccessor(footMBB);
+ }
+
+ // setup loop footer
+ {
+ BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
+ .addReg(FinalStackProbed)
+ .setMIFlag(MachineInstr::FrameSetup);
+ addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ footMBB->addSuccessor(&MBB);
+ }
+
+ recomputeLiveIns(*headMBB);
+ recomputeLiveIns(*bodyMBB);
+ recomputeLiveIns(*footMBB);
+ recomputeLiveIns(MBB);
+ }
+ } else {
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
+ .addReg(Reg)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
}
bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
index c0b4be95f88d..bb2e83205e71 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
@@ -213,14 +213,14 @@ private:
void emitStackProbeInlineGenericBlock(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- uint64_t Offset) const;
+ const DebugLoc &DL, uint64_t Offset,
+ uint64_t Align) const;
void emitStackProbeInlineGenericLoop(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- uint64_t Offset) const;
+ const DebugLoc &DL, uint64_t Offset,
+ uint64_t Align) const;
/// Emit a stub to later inline the target stack probe.
MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1671917157f4..56690c3c555b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30285,6 +30285,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V);
return;
}
+ case ISD::BITREVERSE:
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ assert(Subtarget.hasXOP() && "Expected XOP");
+ // We can use VPPERM by copying to a vector register and back. We'll need
+ // to move the scalar in two i32 pieces.
+ Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
+ return;
}
}
@@ -31876,7 +31883,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
.addMBB(tailMBB)
- .addImm(X86::COND_L);
+ .addImm(X86::COND_GE);
testMBB->addSuccessor(blockMBB);
testMBB->addSuccessor(tailMBB);
@@ -31892,9 +31899,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
//
// The property we want to enforce is to never have more than [page alloc] between two probes.
- const unsigned MovMIOpc =
- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
+ const unsigned XORMIOpc =
+ TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
+ addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
.addImm(0);
BuildMI(blockMBB, DL,
@@ -36018,8 +36025,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
// Share broadcast with the longest vector and extract low subvector (free).
+ // Ensure the same SDValue from the SDNode use is being used.
for (SDNode *User : Src->uses())
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
+ Src == User->getOperand(0) &&
User->getValueSizeInBits(0) > VT.getSizeInBits()) {
return extractSubVector(SDValue(User, 0), 0, DAG, DL,
VT.getSizeInBits());
@@ -39588,10 +39597,14 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
// vselect Cond, 000..., X -> andn Cond, X
if (TValIsAllZeros) {
- MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64);
- SDValue CastCond = DAG.getBitcast(AndNVT, Cond);
- SDValue CastRHS = DAG.getBitcast(AndNVT, RHS);
- SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS);
+ SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
+ SDValue AndN;
+ // The canonical form differs for i1 vectors - x86andnp is not used
+ if (CondVT.getScalarType() == MVT::i1)
+ AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
+ CastRHS);
+ else
+ AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
return DAG.getBitcast(VT, AndN);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
index 4aea7bc253bb..4aea7bc253bb 100755..100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 7fc96d1eda89..7fc96d1eda89 100755..100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td