aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp8
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/X86/X86.h4
-rw-r--r--llvm/lib/Target/X86/X86.td3
-rw-r--r--llvm/lib/Target/X86/X86EvexToVex.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp193
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td38
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrSystem.td9
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--llvm/lib/Target/X86/X86PartialReduction.cpp6
-rw-r--r--llvm/lib/Target/X86/X86ReturnThunks.cpp92
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp2
17 files changed, 270 insertions, 107 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index a903c5f455a2..da90befb2320 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -622,7 +622,7 @@ static bool printFMAComments(const MCInst *MI, raw_ostream &OS,
OS << '-';
OS << '(' << Mul1Name << " * " << Mul2Name << ") " << AccStr << ' '
- << AccName;
+ << AccName << '\n';
return true;
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
index 901082ce6cf3..640efd468135 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
@@ -13,6 +13,7 @@
#include "X86InstrRelaxTables.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include <atomic>
using namespace llvm;
@@ -119,7 +120,7 @@ const X86InstrRelaxTableEntry *llvm::lookupRelaxTable(unsigned ShortOp) {
namespace {
// This class stores the short form tables. It is instantiated as a
-// ManagedStatic to lazily init the short form table.
+// function scope static variable to lazily init the short form table.
struct X86ShortFormTable {
// Stores relaxation table entries sorted by relaxed form opcode.
SmallVector<X86InstrRelaxTableEntry, 0> Table;
@@ -137,10 +138,9 @@ struct X86ShortFormTable {
};
} // namespace
-static ManagedStatic<X86ShortFormTable> ShortTable;
-
const X86InstrRelaxTableEntry *llvm::lookupShortTable(unsigned RelaxOp) {
- auto &Table = ShortTable->Table;
+ static X86ShortFormTable ShortTable;
+ auto &Table = ShortTable.Table;
auto I = llvm::lower_bound(Table, RelaxOp);
if (I != Table.end() && I->KeyOp == RelaxOp)
return &*I;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 49660883ad83..4c962de16530 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -37,6 +37,7 @@ using namespace llvm;
#define GET_INSTRINFO_MC_DESC
#define GET_INSTRINFO_MC_HELPERS
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "X86GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 7344900f2e31..0ac916527495 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -132,6 +132,9 @@ FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
FunctionPass *createX86IndirectThunksPass();
+/// This pass replaces ret instructions with jmp's to __x86_return thunk.
+FunctionPass *createX86ReturnThunksPass();
+
/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
FunctionPass *createX86DiscriminateMemOpsPass();
@@ -185,6 +188,7 @@ void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
void initializeX86PreAMXConfigPassPass(PassRegistry &);
void initializeX86LowerTileCopyPass(PassRegistry &);
void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &);
+void initializeX86ReturnThunksPass(PassRegistry &);
namespace X86AS {
enum : unsigned {
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a5c6b40c493c..a859176220c7 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -266,6 +266,8 @@ def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
"Write Back No Invalidate">;
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
+def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
+ "Support RDPRU instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
@@ -1238,6 +1240,7 @@ def ProcessorFeatures {
TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
+ FeatureRDPRU,
FeatureWBNOINVD];
list<SubtargetFeature> ZN2Tuning = ZNTuning;
list<SubtargetFeature> ZN2Features =
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp
index c7a013a0b17a..cff95d17c14c 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 61c1fd25031d..12af6087cb47 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -594,7 +594,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Half type will be promoted by default.
setOperationAction(ISD::FABS, MVT::f16, Promote);
setOperationAction(ISD::FNEG, MVT::f16, Promote);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
@@ -629,6 +629,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall);
setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, LibCall);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
@@ -2817,6 +2845,21 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
AddressSpace = X86AS::FS;
else if (GuardReg == "gs")
AddressSpace = X86AS::GS;
+
+ // Use symbol guard if user specify.
+ StringRef GuardSymb = M->getStackProtectorGuardSymbol();
+ if (!GuardSymb.empty()) {
+ GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
+ if (!GV) {
+ Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
+ : Type::getInt32Ty(M->getContext());
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GuardSymb, nullptr,
+ GlobalValue::NotThreadLocal, AddressSpace);
+ }
+ return GV;
+ }
+
return SegmentOffset(IRB, Offset, AddressSpace);
}
}
@@ -11757,15 +11800,17 @@ static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask,
/// value in ExpectedMask is always accepted. Otherwise the indices must match.
///
/// SM_SentinelZero is accepted as a valid negative index but must match in
-/// both.
+/// both, or via a known bits test.
static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
ArrayRef<int> ExpectedMask,
+ const SelectionDAG &DAG,
SDValue V1 = SDValue(),
SDValue V2 = SDValue()) {
int Size = Mask.size();
if (Size != (int)ExpectedMask.size())
return false;
- assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) &&
+ assert(llvm::all_of(ExpectedMask,
+ [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
"Illegal target shuffle mask");
// Check for out-of-range target shuffle mask indices.
@@ -11778,12 +11823,28 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits())
V2 = SDValue();
+ APInt ZeroV1 = APInt::getNullValue(Size);
+ APInt ZeroV2 = APInt::getNullValue(Size);
+
for (int i = 0; i < Size; ++i) {
int MaskIdx = Mask[i];
int ExpectedIdx = ExpectedMask[i];
if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
continue;
- if (0 <= MaskIdx && 0 <= ExpectedIdx) {
+ if (MaskIdx == SM_SentinelZero) {
+ // If we need this expected index to be a zero element, then update the
+ // relevant zero mask and perform the known bits at the end to minimize
+ // repeated computes.
+ SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
+ if (ExpectedV &&
+ Size == (int)ExpectedV.getValueType().getVectorNumElements()) {
+ int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
+ APInt &ZeroMask = ExpectedIdx < Size ? ZeroV1 : ZeroV2;
+ ZeroMask.setBit(BitIdx);
+ continue;
+ }
+ }
+ if (MaskIdx >= 0) {
SDValue MaskV = MaskIdx < Size ? V1 : V2;
SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);
@@ -11791,15 +11852,16 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx))
continue;
}
- // TODO - handle SM_Sentinel equivalences.
return false;
}
- return true;
+ return (ZeroV1.isNullValue() || DAG.MaskedVectorIsZero(V1, ZeroV1)) &&
+ (ZeroV2.isNullValue() || DAG.MaskedVectorIsZero(V2, ZeroV2));
}
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
// instructions.
-static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
+static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT,
+ const SelectionDAG &DAG) {
if (VT != MVT::v8i32 && VT != MVT::v8f32)
return false;
@@ -11809,12 +11871,13 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
SmallVector<int, 8> Unpckhwd;
createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
/* Unary = */ false);
- bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) ||
- isTargetShuffleEquivalent(VT, Mask, Unpckhwd));
+ bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) ||
+ isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG));
return IsUnpackwdMask;
}
-static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
+static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask,
+ const SelectionDAG &DAG) {
// Create 128-bit vector type based on mask size.
MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
MVT VT = MVT::getVectorVT(EltVT, Mask.size());
@@ -11827,8 +11890,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
for (unsigned i = 0; i != 4; ++i) {
SmallVector<int, 16> UnpackMask;
createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
- if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) ||
- isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask))
+ if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) ||
+ isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG))
return true;
}
return false;
@@ -12021,7 +12084,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// Attempt to match the target mask against the unpack lo/hi mask patterns.
SmallVector<int, 64> Unpckl, Unpckh;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1,
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1,
(IsUnary ? V1 : V2))) {
UnpackOpcode = X86ISD::UNPCKL;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
@@ -12030,7 +12093,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
}
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1,
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1,
(IsUnary ? V1 : V2))) {
UnpackOpcode = X86ISD::UNPCKH;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
@@ -12069,14 +12132,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// If a binary shuffle, commute and try again.
if (!IsUnary) {
ShuffleVectorSDNode::commuteMask(Unpckl);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) {
UnpackOpcode = X86ISD::UNPCKL;
std::swap(V1, V2);
return true;
}
ShuffleVectorSDNode::commuteMask(Unpckh);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) {
UnpackOpcode = X86ISD::UNPCKH;
std::swap(V1, V2);
return true;
@@ -12464,14 +12527,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
// Try binary shuffle.
SmallVector<int, 32> BinaryMask;
createPackShuffleMask(VT, BinaryMask, false, NumStages);
- if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2))
+ if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2))
if (MatchPACK(V1, V2, PackVT))
return true;
// Try unary shuffle.
SmallVector<int, 32> UnaryMask;
createPackShuffleMask(VT, UnaryMask, true, NumStages);
- if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1))
+ if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1))
if (MatchPACK(V1, V1, PackVT))
return true;
}
@@ -14283,7 +14346,7 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
// and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps
// because that avoids a constant load from memory.
if (NumElts == 4 &&
- (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask)))
+ (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask, DAG)))
return SDValue();
// Extend the shuffle mask with undef elements.
@@ -17230,7 +17293,7 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
if (Subtarget.hasAVX2()) {
// extract128 + vunpckhps/vshufps, is better than vblend + vpermps.
if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() &&
- !is128BitUnpackShuffleMask(HalfMask) &&
+ !is128BitUnpackShuffleMask(HalfMask, DAG) &&
(!isSingleSHUFPSMask(HalfMask) ||
Subtarget.hasFastVariableCrossLaneShuffle()))
return SDValue();
@@ -17892,7 +17955,7 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code using vpunpcklwd and
// vpunpckhwd instrs than vblend.
- if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
+ if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32, DAG))
return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget,
DAG);
@@ -17930,7 +17993,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code than vblend by using
// vpunpcklwd and vpunpckhwd instrs.
- if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
+ if (isUnpackWdShuffleMask(Mask, MVT::v8i32, DAG) && !V2.isUndef() &&
!Subtarget.hasAVX512())
return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget,
DAG);
@@ -27887,11 +27950,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
// Read Performance Monitoring Counters.
case RDPMC:
+ // Read Processor Register.
+ case RDPRU:
// GetExtended Control Register.
case XGETBV: {
SmallVector<SDValue, 2> Results;
// RDPMC uses ECX to select the index of the performance counter to read.
+ // RDPRU uses ECX to select the processor register to read.
// XGETBV uses ECX to select the index of the XCR register to return.
// The result is stored into registers EDX:EAX.
expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
@@ -29902,14 +29968,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt);
SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
{4, 5, 6, 7, -1, -1, -1, -1});
- Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
- {0, 1, 1, 1, -1, -1, -1, -1});
- Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
- {2, 3, 3, 3, -1, -1, -1, -1});
- Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23,
- {0, 1, 1, 1, -1, -1, -1, -1});
- Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23,
- {2, 3, 3, 3, -1, -1, -1, -1});
+ SDValue Msk02 = getV4X86ShuffleImm8ForMask({0, 1, 1, 1}, dl, DAG);
+ SDValue Msk13 = getV4X86ShuffleImm8ForMask({2, 3, 3, 3}, dl, DAG);
+ Amt0 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk02);
+ Amt1 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk13);
+ Amt2 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk02);
+ Amt3 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk13);
}
}
@@ -30797,6 +30861,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::UMin:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
return AtomicExpansionKind::CmpXChg;
@@ -32894,6 +32960,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
Results);
return;
+ case Intrinsic::x86_rdpru:
+ expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget,
+ Results);
+ return;
case Intrinsic::x86_xgetbv:
expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
Results);
@@ -36985,8 +37055,9 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
bool AllowFloatDomain, bool AllowIntDomain,
- SDValue V1, const X86Subtarget &Subtarget,
- unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
+ SDValue V1, const SelectionDAG &DAG,
+ const X86Subtarget &Subtarget, unsigned &Shuffle,
+ MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
@@ -37057,17 +37128,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, DAG, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
@@ -37076,17 +37147,19 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v4f64;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
+ V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG,
+ V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
@@ -37096,21 +37169,22 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
+ V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v8f64;
return true;
}
if (isTargetShuffleEquivalent(
MaskVT, Mask,
- {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) {
+ {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
}
if (isTargetShuffleEquivalent(
MaskVT, Mask,
- {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) {
+ {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
@@ -37126,6 +37200,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
const APInt &Zeroable,
bool AllowFloatDomain, bool AllowIntDomain,
+ const SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
@@ -37269,33 +37344,36 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) &&
+ AllowFloatDomain) {
V2 = V1;
V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1);
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS;
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}, DAG) &&
+ AllowFloatDomain) {
V2 = V1;
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS;
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}, DAG) &&
Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
SrcVT = DstVT = MVT::v2f64;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG) &&
(AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
SrcVT = DstVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7},
+ DAG) &&
Subtarget.hasFP16()) {
Shuffle = X86ISD::MOVSH;
SrcVT = DstVT = MVT::v8f16;
@@ -37678,7 +37756,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
scaleShuffleElements(Mask, NumElts, ScaledMask)) {
for (unsigned i = 0; i != NumElts; ++i)
IdentityMask.push_back(i);
- if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
+ if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, DAG, V1,
+ V2))
return CanonicalizeShuffleInput(RootVT, V1);
}
}
@@ -37902,7 +37981,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1,
- Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
+ DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
@@ -37913,7 +37992,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
+ AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
@@ -37931,7 +38010,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// TODO: Handle other insertions here as well?
if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
Subtarget.hasSSE41() &&
- !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) {
+ !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG)) {
if (MaskEltSizeInBits == 32) {
SDValue SrcV1 = V1, SrcV2 = V2;
if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask,
@@ -37947,12 +38026,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
if (MaskEltSizeInBits == 64 &&
- isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) &&
+ isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}, DAG) &&
V2.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V2.getScalarValueSizeInBits() <= 32) {
if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
return SDValue(); // Nothing to do!
- PermuteImm = (/*DstIdx*/2 << 4) | (/*SrcIdx*/0 << 0);
+ PermuteImm = (/*DstIdx*/ 2 << 4) | (/*SrcIdx*/ 0 << 0);
Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
CanonicalizeShuffleInput(MVT::v4f32, V1),
CanonicalizeShuffleInput(MVT::v4f32, V2),
@@ -51654,9 +51733,13 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
- if ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs())) {
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs()))) {
bool HasPT = Subtarget.hasSSE41();
// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index a55b95960aa6..6124755ca539 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1532,44 +1532,6 @@ def : Pat<(xor GR32:$src1, -2147483648),
}
//===----------------------------------------------------------------------===//
-// Pattern match SUB as XOR
-//===----------------------------------------------------------------------===//
-
-// An immediate in the LHS of a subtract can't be encoded in the instruction.
-// If there is no possibility of a borrow we can use an XOR instead of a SUB
-// to enable the immediate to be folded.
-// TODO: Move this to a DAG combine?
-
-def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
-
- // If all possible ones in the RHS are set in the LHS then there can't be
- // a borrow and we can use xor.
- return (~Known.Zero).isSubsetOf(CN->getAPIntValue());
- }
-
- return false;
-}]>;
-
-let AddedComplexity = 5 in {
-def : Pat<(sub_is_xor imm:$src2, GR8:$src1),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1),
- (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub_is_xor imm:$src2, GR16:$src1),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1),
- (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(sub_is_xor imm:$src2, GR32:$src1),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1),
- (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1),
- (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-}
-
-//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 52b2a62316cd..c4317be664fd 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -13,8 +13,8 @@
#include "X86InstrFMA3Info.h"
#include "X86InstrInfo.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 27220a8d4d99..8aeb169929f2 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -13,6 +13,7 @@
#include "X86InstrFoldTables.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include <atomic>
#include <vector>
using namespace llvm;
@@ -6102,7 +6103,7 @@ llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
namespace {
// This class stores the memory unfolding tables. It is instantiated as a
-// ManagedStatic to lazily init the unfolding table.
+// function scope static variable to lazily init the unfolding table.
struct X86MemUnfoldTable {
// Stores memory unfolding tables entries sorted by opcode.
std::vector<X86MemoryFoldTableEntry> Table;
@@ -6159,11 +6160,10 @@ struct X86MemUnfoldTable {
};
}
-static ManagedStatic<X86MemUnfoldTable> MemUnfoldTable;
-
const X86MemoryFoldTableEntry *
llvm::lookupUnfoldTable(unsigned MemOp) {
- auto &Table = MemUnfoldTable->Table;
+ static X86MemUnfoldTable MemUnfoldTable;
+ auto &Table = MemUnfoldTable.Table;
auto I = llvm::lower_bound(Table, MemOp);
if (I != Table.end() && I->KeyOp == MemOp)
return &*I;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 7f6ef3479d40..4a9a281d5b99 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -978,6 +978,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
+def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">;
def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
def HasCX8 : Predicate<"Subtarget->hasCX8()">;
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 3a653a56e534..b1ca87279007 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -735,6 +735,15 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
} // SchedRW
//===----------------------------------------------------------------------===//
+// RDPRU - Read Processor Register instruction.
+
+let SchedRW = [WriteSystem] in {
+let Uses = [ECX], Defs = [EAX, EDX] in
+ def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS,
+ Requires<[HasRDPRU]>;
+}
+
+//===----------------------------------------------------------------------===//
// Platform Configuration instruction
// From ISA docs:
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 3c8be95b43e3..6112c0b7d6c3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t {
TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2,
- ROUNDP, ROUNDS
+ ROUNDP, ROUNDS, RDPRU
};
struct IntrinsicData {
@@ -309,6 +309,7 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0),
+ X86_INTRINSIC_DATA(rdpru, RDPRU, X86::RDPRU, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index b107de692365..3fbdb18a0793 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2413,6 +2413,10 @@ static void addConstantComments(const MachineInstr *MI,
}
void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // FIXME: Enable feature predicate checks once all the test pass.
+ // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
+ // Subtarget->getFeatureBits());
+
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI =
MF->getSubtarget<X86Subtarget>().getRegisterInfo();
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index 7761f7323358..c760a32e2579 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -439,8 +439,8 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
- if (!Visited.insert(V).second)
- continue;
+ if (!Visited.insert(V).second)
+ continue;
if (auto *PN = dyn_cast<PHINode>(V)) {
// PHI node should have single use unless it is the root node, then it
@@ -466,7 +466,7 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
// gets us back to this node.
if (BO->hasNUses(BO == Root ? 3 : 2)) {
PHINode *PN = nullptr;
- for (auto *U : Root->users())
+ for (auto *U : BO->users())
if (auto *P = dyn_cast<PHINode>(U))
if (!Visited.count(P))
PN = P;
diff --git a/llvm/lib/Target/X86/X86ReturnThunks.cpp b/llvm/lib/Target/X86/X86ReturnThunks.cpp
new file mode 100644
index 000000000000..4b203229ba83
--- /dev/null
+++ b/llvm/lib/Target/X86/X86ReturnThunks.cpp
@@ -0,0 +1,92 @@
+//==- X86ReturnThunks.cpp - Replace rets with thunks or inline thunks --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that replaces ret instructions with a jmp to __x86_return_thunk.
+///
+/// This corresponds to -mfunction-return=thunk-extern or
+/// __attribute__((function_return("thunk-extern").
+///
+/// This pass is a minimal implementation necessary to help mitigate
+/// RetBleed for the Linux kernel.
+///
+/// Should support for thunk or thunk-inline be necessary in the future, then
+/// this pass should be combined with x86-retpoline-thunks which already has
+/// machinery to emit thunks. Until then, YAGNI.
+///
+/// This pass is very similar to x86-lvi-ret.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-return-thunks"
+#define DEBUG_TYPE PASS_KEY
+
+struct X86ReturnThunks final : public MachineFunctionPass {
+ static char ID;
+ X86ReturnThunks() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override { return "X86 Return Thunks"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+char X86ReturnThunks::ID = 0;
+
+bool X86ReturnThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << "\n");
+
+ bool Modified = false;
+
+ if (!MF.getFunction().hasFnAttribute(llvm::Attribute::FnRetThunkExtern))
+ return Modified;
+
+ StringRef ThunkName = "__x86_return_thunk";
+ if (MF.getFunction().getName() == ThunkName)
+ return Modified;
+
+ const auto &ST = MF.getSubtarget<X86Subtarget>();
+ const bool Is64Bit = ST.getTargetTriple().getArch() == Triple::x86_64;
+ const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;
+ SmallVector<MachineInstr *, 16> Rets;
+
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &Term : MBB.terminators())
+ if (Term.getOpcode() == RetOpc)
+ Rets.push_back(&Term);
+
+ const MCInstrDesc &JMP = ST.getInstrInfo()->get(X86::TAILJMPd);
+
+ for (MachineInstr *Ret : Rets) {
+ BuildMI(Ret->getParent(), Ret->getDebugLoc(), JMP)
+ .addExternalSymbol(ThunkName.data());
+ Ret->eraseFromParent();
+ Modified = true;
+ }
+
+ return Modified;
+}
+
+INITIALIZE_PASS(X86ReturnThunks, PASS_KEY, "X86 Return Thunks", false, false)
+
+FunctionPass *llvm::createX86ReturnThunksPass() {
+ return new X86ReturnThunks();
+}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 4249788e3540..f4e25e4194db 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -100,6 +100,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86OptimizeLEAPassPass(PR);
initializeX86PartialReductionPass(PR);
initializePseudoProbeInserterPass(PR);
+ initializeX86ReturnThunksPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -575,6 +576,7 @@ void X86PassConfig::addPreEmitPass2() {
// hand inspection of the codegen output.
addPass(createX86SpeculativeExecutionSideEffectSuppression());
addPass(createX86IndirectThunksPass());
+ addPass(createX86ReturnThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.