aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86.td')
-rw-r--r--llvm/lib/Target/X86/X86.td101
1 files changed, 71 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index c492d686c52e..53bbd93798ac 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -323,10 +323,15 @@ def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
"platform configuration instruction">;
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
-def FeatureFastVariableShuffle
- : SubtargetFeature<"fast-variable-shuffle",
- "HasFastVariableShuffle",
- "true", "Shuffles with variable masks are fast">;
+def FeatureFastVariableCrossLaneShuffle
+ : SubtargetFeature<"fast-variable-crosslane-shuffle",
+ "HasFastVariableCrossLaneShuffle",
+ "true", "Cross-lane shuffles with variable masks are fast">;
+def FeatureFastVariablePerLaneShuffle
+ : SubtargetFeature<"fast-variable-perlane-shuffle",
+ "HasFastVariablePerLaneShuffle",
+ "true", "Per-lane shuffles with variable masks are fast">;
+
// On some X86 processors, a vzeroupper instruction should be inserted after
// using ymm/zmm registers before executing code that may use SSE instructions.
def FeatureInsertVZEROUPPER
@@ -514,6 +519,10 @@ def FeatureFastVectorShiftMasks
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
"Prefer a left/right vector logical shift pair over a shift+and pair">;
+def FeatureFastMOVBE
+ : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
+ "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
+
def FeatureUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
@@ -555,6 +564,7 @@ include "X86SchedBroadwell.td"
include "X86ScheduleSLM.td"
include "X86ScheduleZnver1.td"
include "X86ScheduleZnver2.td"
+include "X86ScheduleZnver3.td"
include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
@@ -634,7 +644,8 @@ def ProcessorFeatures {
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate,
FeatureFast15ByteNOP,
- FeatureFastVariableShuffle,
+ FeatureFastVariableCrossLaneShuffle,
+ FeatureFastVariablePerLaneShuffle,
FeaturePOPCNTFalseDeps,
FeatureLZCNTFalseDeps,
FeatureInsertVZEROUPPER];
@@ -653,8 +664,7 @@ def ProcessorFeatures {
list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
FeatureXSAVEC,
FeatureXSAVES,
- FeatureCLFLUSHOPT,
- FeatureSGX];
+ FeatureCLFLUSHOPT];
list<SubtargetFeature> SKLTuning = [FeatureHasFastGather,
FeatureMacroFusion,
FeatureSlow3OpsLEA,
@@ -663,7 +673,8 @@ def ProcessorFeatures {
FeatureFastVectorFSQRT,
FeatureFastSHLDRotate,
FeatureFast15ByteNOP,
- FeatureFastVariableShuffle,
+ FeatureFastVariableCrossLaneShuffle,
+ FeatureFastVariablePerLaneShuffle,
FeaturePOPCNTFalseDeps,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> SKLFeatures =
@@ -689,7 +700,8 @@ def ProcessorFeatures {
FeatureFastVectorFSQRT,
FeatureFastSHLDRotate,
FeatureFast15ByteNOP,
- FeatureFastVariableShuffle,
+ FeatureFastVariableCrossLaneShuffle,
+ FeatureFastVariablePerLaneShuffle,
FeaturePrefer256Bit,
FeaturePOPCNTFalseDeps,
FeatureInsertVZEROUPPER];
@@ -726,7 +738,8 @@ def ProcessorFeatures {
FeatureFastVectorFSQRT,
FeatureFastSHLDRotate,
FeatureFast15ByteNOP,
- FeatureFastVariableShuffle,
+ FeatureFastVariableCrossLaneShuffle,
+ FeatureFastVariablePerLaneShuffle,
FeaturePrefer256Bit,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> CNLFeatures =
@@ -740,7 +753,6 @@ def ProcessorFeatures {
FeatureVPCLMULQDQ,
FeatureVPOPCNTDQ,
FeatureGFNI,
- FeatureCLWB,
FeatureRDPID,
FeatureFSRM];
list<SubtargetFeature> ICLTuning = CNLTuning;
@@ -749,13 +761,15 @@ def ProcessorFeatures {
// Icelake Server
list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
+ FeatureCLWB,
FeatureWBNOINVD];
list<SubtargetFeature> ICXTuning = CNLTuning;
list<SubtargetFeature> ICXFeatures =
!listconcat(ICLFeatures, ICXAdditionalFeatures);
- //Tigerlake
+ // Tigerlake
list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
+ FeatureCLWB,
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureSHSTK];
@@ -763,7 +777,7 @@ def ProcessorFeatures {
list<SubtargetFeature> TGLFeatures =
!listconcat(ICLFeatures, TGLAdditionalFeatures );
- //Sapphirerapids
+ // Sapphirerapids
list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
FeatureAMXINT8,
FeatureAMXBF16,
@@ -784,17 +798,6 @@ def ProcessorFeatures {
list<SubtargetFeature> SPRFeatures =
!listconcat(ICXFeatures, SPRAdditionalFeatures);
- // Alderlake
- list<SubtargetFeature> ADLAdditionalFeatures = [FeatureAVXVNNI,
- FeatureCLDEMOTE,
- FeatureHRESET,
- FeaturePTWRITE,
- FeatureSERIALIZE,
- FeatureWAITPKG];
- list<SubtargetFeature> ADLTuning = SKLTuning;
- list<SubtargetFeature> ADLFeatures =
- !listconcat(SKLFeatures, ADLAdditionalFeatures);
-
// Atom
list<SubtargetFeature> AtomFeatures = [FeatureX87,
FeatureCMPXCHG8B,
@@ -830,6 +833,7 @@ def ProcessorFeatures {
FeatureSlowDivide64,
FeatureSlowPMULLD,
FeatureFast7ByteNOP,
+ FeatureFastMOVBE,
FeaturePOPCNTFalseDeps,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> SLMFeatures =
@@ -849,6 +853,7 @@ def ProcessorFeatures {
FeatureSlowTwoMemOps,
FeatureSlowLEA,
FeatureSlowIncDec,
+ FeatureFastMOVBE,
FeaturePOPCNTFalseDeps,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> GLMFeatures =
@@ -856,12 +861,12 @@ def ProcessorFeatures {
// Goldmont Plus
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
- FeatureRDPID,
- FeatureSGX];
+ FeatureRDPID];
list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts,
FeatureSlowTwoMemOps,
FeatureSlowLEA,
FeatureSlowIncDec,
+ FeatureFastMOVBE,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> GLPFeatures =
!listconcat(GLMFeatures, GLPAdditionalFeatures);
@@ -873,6 +878,31 @@ def ProcessorFeatures {
list<SubtargetFeature> TRMFeatures =
!listconcat(GLPFeatures, TRMAdditionalFeatures);
+ // Alderlake
+ list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
+ FeaturePCONFIG,
+ FeatureSHSTK,
+ FeatureWIDEKL,
+ FeatureINVPCID,
+ FeatureADX,
+ FeatureFMA,
+ FeatureVAES,
+ FeatureVPCLMULQDQ,
+ FeatureF16C,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureLZCNT,
+ FeatureAVXVNNI,
+ FeaturePKU,
+ FeatureHRESET,
+ FeatureCLDEMOTE,
+ FeatureMOVDIRI,
+ FeatureMOVDIR64B,
+ FeatureWAITPKG];
+ list<SubtargetFeature> ADLTuning = SKLTuning;
+ list<SubtargetFeature> ADLFeatures =
+ !listconcat(TRMFeatures, ADLAdditionalFeatures);
+
// Knights Landing
list<SubtargetFeature> KNLFeatures = [FeatureX87,
FeatureCMPXCHG8B,
@@ -910,6 +940,7 @@ def ProcessorFeatures {
FeatureSlowTwoMemOps,
FeaturePreferMaskRegisters,
FeatureHasFastGather,
+ FeatureFastMOVBE,
FeatureSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
list<SubtargetFeature> KNMFeatures =
@@ -969,6 +1000,7 @@ def ProcessorFeatures {
FeatureFast15ByteNOP,
FeatureFastScalarShiftMasks,
FeatureFastVectorShiftMasks,
+ FeatureFastMOVBE,
FeatureSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1003,7 +1035,9 @@ def ProcessorFeatures {
FeatureTBM,
FeatureFMA,
FeatureFastBEXTR];
- list<SubtargetFeature> BdVer2Tuning = BdVer1Tuning;
+ list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastMOVBE];
+ list<SubtargetFeature> BdVer2Tuning =
+ !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
list<SubtargetFeature> BdVer2Features =
!listconcat(BdVer1Features, BdVer2AdditionalFeatures);
@@ -1063,6 +1097,7 @@ def ProcessorFeatures {
FeatureFast15ByteNOP,
FeatureBranchFusion,
FeatureFastScalarShiftMasks,
+ FeatureFastMOVBE,
FeatureSlowSHLD,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
@@ -1076,7 +1111,11 @@ def ProcessorFeatures {
FeaturePKU,
FeatureVAES,
FeatureVPCLMULQDQ];
- list<SubtargetFeature> ZN3Tuning = ZNTuning;
+ list<SubtargetFeature> ZN3AdditionalTuning =
+ [FeatureMacroFusion,
+ FeatureFastVariablePerLaneShuffle];
+ list<SubtargetFeature> ZN3Tuning =
+ !listconcat(ZNTuning, ZN3AdditionalTuning);
list<SubtargetFeature> ZN3Features =
!listconcat(ZN2Features, ZN3AdditionalFeatures);
}
@@ -1291,6 +1330,8 @@ def : ProcModel<"cannonlake", SkylakeServerModel,
ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
def : ProcModel<"icelake-client", SkylakeServerModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
+def : ProcModel<"rocketlake", SkylakeServerModel,
+ ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
def : ProcModel<"icelake-server", SkylakeServerModel,
ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
def : ProcModel<"tigerlake", SkylakeServerModel,
@@ -1365,7 +1406,7 @@ def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
ProcessorFeatures.ZNTuning>;
def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
ProcessorFeatures.ZN2Tuning>;
-def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features,
+def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
@@ -1407,7 +1448,7 @@ def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Feature
def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
ProcessorFeatures.HSWTuning>;
// Close to the AVX-512 level implemented by Xeon Scalable Processors.
-def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features,
+def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
ProcessorFeatures.SKXTuning>;
//===----------------------------------------------------------------------===//