diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86.td')
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 101 |
1 files changed, 71 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index c492d686c52e..53bbd93798ac 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -323,10 +323,15 @@ def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", "platform configuration instruction">; // On recent X86 (port bound) processors, its preferable to combine to a single shuffle // using a variable mask over multiple fixed shuffles. -def FeatureFastVariableShuffle - : SubtargetFeature<"fast-variable-shuffle", - "HasFastVariableShuffle", - "true", "Shuffles with variable masks are fast">; +def FeatureFastVariableCrossLaneShuffle + : SubtargetFeature<"fast-variable-crosslane-shuffle", + "HasFastVariableCrossLaneShuffle", + "true", "Cross-lane shuffles with variable masks are fast">; +def FeatureFastVariablePerLaneShuffle + : SubtargetFeature<"fast-variable-perlane-shuffle", + "HasFastVariablePerLaneShuffle", + "true", "Per-lane shuffles with variable masks are fast">; + // On some X86 processors, a vzeroupper instruction should be inserted after // using ymm/zmm registers before executing code that may use SSE instructions. def FeatureInsertVZEROUPPER @@ -514,6 +519,10 @@ def FeatureFastVectorShiftMasks "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", "Prefer a left/right vector logical shift pair over a shift+and pair">; +def FeatureFastMOVBE + : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", + "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; + def FeatureUseGLMDivSqrtCosts : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", "Use Goldmont specific floating point div/sqrt costs">; @@ -555,6 +564,7 @@ include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" include "X86ScheduleZnver2.td" +include "X86ScheduleZnver3.td" include "X86ScheduleBdVer2.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" @@ -634,7 +644,8 @@ def ProcessorFeatures { FeatureFastScalarFSQRT, FeatureFastSHLDRotate, FeatureFast15ByteNOP, - FeatureFastVariableShuffle, + FeatureFastVariableCrossLaneShuffle, + FeatureFastVariablePerLaneShuffle, FeaturePOPCNTFalseDeps, FeatureLZCNTFalseDeps, FeatureInsertVZEROUPPER]; @@ -653,8 +664,7 @@ def ProcessorFeatures { list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, FeatureXSAVEC, FeatureXSAVES, - FeatureCLFLUSHOPT, - FeatureSGX]; + FeatureCLFLUSHOPT]; list<SubtargetFeature> SKLTuning = [FeatureHasFastGather, FeatureMacroFusion, FeatureSlow3OpsLEA, @@ -663,7 +673,8 @@ def ProcessorFeatures { FeatureFastVectorFSQRT, FeatureFastSHLDRotate, FeatureFast15ByteNOP, - FeatureFastVariableShuffle, + FeatureFastVariableCrossLaneShuffle, + FeatureFastVariablePerLaneShuffle, FeaturePOPCNTFalseDeps, FeatureInsertVZEROUPPER]; list<SubtargetFeature> SKLFeatures = @@ -689,7 +700,8 @@ def ProcessorFeatures { FeatureFastVectorFSQRT, FeatureFastSHLDRotate, FeatureFast15ByteNOP, - FeatureFastVariableShuffle, + FeatureFastVariableCrossLaneShuffle, + FeatureFastVariablePerLaneShuffle, FeaturePrefer256Bit, FeaturePOPCNTFalseDeps, FeatureInsertVZEROUPPER]; @@ -726,7 +738,8 @@ def ProcessorFeatures { FeatureFastVectorFSQRT, FeatureFastSHLDRotate, FeatureFast15ByteNOP, - FeatureFastVariableShuffle, + FeatureFastVariableCrossLaneShuffle, + FeatureFastVariablePerLaneShuffle, FeaturePrefer256Bit, FeatureInsertVZEROUPPER]; list<SubtargetFeature> CNLFeatures = @@ -740,7 +753,6 @@ def ProcessorFeatures { FeatureVPCLMULQDQ, FeatureVPOPCNTDQ, FeatureGFNI, - FeatureCLWB, FeatureRDPID, FeatureFSRM]; list<SubtargetFeature> ICLTuning = CNLTuning; @@ -749,13 +761,15 @@ def ProcessorFeatures { // Icelake Server list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, + FeatureCLWB, FeatureWBNOINVD]; list<SubtargetFeature> ICXTuning = CNLTuning; list<SubtargetFeature> ICXFeatures = !listconcat(ICLFeatures, ICXAdditionalFeatures); - //Tigerlake + // Tigerlake list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, + FeatureCLWB, FeatureMOVDIRI, FeatureMOVDIR64B, FeatureSHSTK]; @@ -763,7 +777,7 @@ def ProcessorFeatures { list<SubtargetFeature> TGLFeatures = !listconcat(ICLFeatures, TGLAdditionalFeatures ); - //Sapphirerapids + // Sapphirerapids list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, FeatureAMXINT8, FeatureAMXBF16, @@ -784,17 +798,6 @@ def ProcessorFeatures { list<SubtargetFeature> SPRFeatures = !listconcat(ICXFeatures, SPRAdditionalFeatures); - // Alderlake - list<SubtargetFeature> ADLAdditionalFeatures = [FeatureAVXVNNI, - FeatureCLDEMOTE, - FeatureHRESET, - FeaturePTWRITE, - FeatureSERIALIZE, - FeatureWAITPKG]; - list<SubtargetFeature> ADLTuning = SKLTuning; - list<SubtargetFeature> ADLFeatures = - !listconcat(SKLFeatures, ADLAdditionalFeatures); - // Atom list<SubtargetFeature> AtomFeatures = [FeatureX87, FeatureCMPXCHG8B, @@ -830,6 +833,7 @@ def ProcessorFeatures { FeatureSlowDivide64, FeatureSlowPMULLD, FeatureFast7ByteNOP, + FeatureFastMOVBE, FeaturePOPCNTFalseDeps, FeatureInsertVZEROUPPER]; list<SubtargetFeature> SLMFeatures = @@ -849,6 +853,7 @@ def ProcessorFeatures { FeatureSlowTwoMemOps, FeatureSlowLEA, FeatureSlowIncDec, + FeatureFastMOVBE, FeaturePOPCNTFalseDeps, FeatureInsertVZEROUPPER]; list<SubtargetFeature> GLMFeatures = @@ -856,12 +861,12 @@ def ProcessorFeatures { // Goldmont Plus list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, - FeatureRDPID, - FeatureSGX]; + FeatureRDPID]; list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts, FeatureSlowTwoMemOps, FeatureSlowLEA, FeatureSlowIncDec, + FeatureFastMOVBE, FeatureInsertVZEROUPPER]; list<SubtargetFeature> GLPFeatures = !listconcat(GLMFeatures, GLPAdditionalFeatures); @@ -873,6 +878,31 @@ def ProcessorFeatures { list<SubtargetFeature> TRMFeatures = !listconcat(GLPFeatures, TRMAdditionalFeatures); + // Alderlake + list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, + FeaturePCONFIG, + FeatureSHSTK, + FeatureWIDEKL, + FeatureINVPCID, + FeatureADX, + FeatureFMA, + FeatureVAES, + FeatureVPCLMULQDQ, + FeatureF16C, + FeatureBMI, + FeatureBMI2, + FeatureLZCNT, + FeatureAVXVNNI, + FeaturePKU, + FeatureHRESET, + FeatureCLDEMOTE, + FeatureMOVDIRI, + FeatureMOVDIR64B, + FeatureWAITPKG]; + list<SubtargetFeature> ADLTuning = SKLTuning; + list<SubtargetFeature> ADLFeatures = + !listconcat(TRMFeatures, ADLAdditionalFeatures); + // Knights Landing list<SubtargetFeature> KNLFeatures = [FeatureX87, FeatureCMPXCHG8B, @@ -910,6 +940,7 @@ def ProcessorFeatures { FeatureSlowTwoMemOps, FeaturePreferMaskRegisters, FeatureHasFastGather, + FeatureFastMOVBE, FeatureSlowPMADDWD]; // TODO Add AVX5124FMAPS/AVX5124VNNIW features list<SubtargetFeature> KNMFeatures = @@ -969,6 +1000,7 @@ def ProcessorFeatures { FeatureFast15ByteNOP, FeatureFastScalarShiftMasks, FeatureFastVectorShiftMasks, + FeatureFastMOVBE, FeatureSlowSHLD]; list<SubtargetFeature> BtVer2Features = !listconcat(BtVer1Features, BtVer2AdditionalFeatures); @@ -1003,7 +1035,9 @@ def ProcessorFeatures { FeatureTBM, FeatureFMA, FeatureFastBEXTR]; - list<SubtargetFeature> BdVer2Tuning = BdVer1Tuning; + list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastMOVBE]; + list<SubtargetFeature> BdVer2Tuning = + !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); list<SubtargetFeature> BdVer2Features = !listconcat(BdVer1Features, BdVer2AdditionalFeatures); @@ -1063,6 +1097,7 @@ def ProcessorFeatures { FeatureFast15ByteNOP, FeatureBranchFusion, FeatureFastScalarShiftMasks, + FeatureFastMOVBE, FeatureSlowSHLD, FeatureInsertVZEROUPPER]; list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, @@ -1076,7 +1111,11 @@ def ProcessorFeatures { FeaturePKU, FeatureVAES, FeatureVPCLMULQDQ]; - list<SubtargetFeature> ZN3Tuning = ZNTuning; + list<SubtargetFeature> ZN3AdditionalTuning = + [FeatureMacroFusion, + FeatureFastVariablePerLaneShuffle]; + list<SubtargetFeature> ZN3Tuning = + !listconcat(ZNTuning, ZN3AdditionalTuning); list<SubtargetFeature> ZN3Features = !listconcat(ZN2Features, ZN3AdditionalFeatures); } @@ -1291,6 +1330,8 @@ def : ProcModel<"cannonlake", SkylakeServerModel, ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; def : ProcModel<"icelake-client", SkylakeServerModel, ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; +def : ProcModel<"rocketlake", SkylakeServerModel, + ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; def : ProcModel<"icelake-server", SkylakeServerModel, ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; def : ProcModel<"tigerlake", SkylakeServerModel, @@ -1365,7 +1406,7 @@ def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, ProcessorFeatures.ZNTuning>; def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, ProcessorFeatures.ZN2Tuning>; -def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features, +def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, ProcessorFeatures.ZN3Tuning>; def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], @@ -1407,7 +1448,7 @@ def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Feature def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, ProcessorFeatures.HSWTuning>; // Close to the AVX-512 level implemented by Xeon Scalable Processors. -def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features, +def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, ProcessorFeatures.SKXTuning>; //===----------------------------------------------------------------------===// |