diff options
Diffstat (limited to 'lib/Target/X86/X86ScheduleZnver1.td')
-rw-r--r-- | lib/Target/X86/X86ScheduleZnver1.td | 1222 |
1 files changed, 493 insertions, 729 deletions
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index a4e5327213c2..d28d58580752 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -55,7 +55,6 @@ def ZnFPU2 : ProcResource<1>; def ZnFPU3 : ProcResource<1>; // FPU grouping -def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>; def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; @@ -91,6 +90,32 @@ def ZnDivider : ProcResource<1>; // 4 Cycles load-to use Latency is captured def : ReadAdvance<ReadAfterLd, 4>; +// The Integer PRF for Zen is 168 entries, and it holds the architectural and +// speculative version of the 64-bit integer registers. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +def ZnIntegerPRF : RegisterFile<168, [GR64, CCR]>; + +// 36 Entry (9x4 entries) floating-point Scheduler +def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> { +let BufferSize=36; +} + +// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit +// registers. Operations on 256-bit data types are cracked into two COPs. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; + +// The unit can track up to 192 macro ops in-flight. +// The retire unit handles in-order commit of up to 8 macro ops per cycle. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +// To be noted, the retire unit is shared between integer and FP ops. +// In SMT mode it is 96 entry per thread. But, we do not use the conservative +// value here because there is currently no way to fully mode the SMT mode, +// so there is no point in trying. +def ZnRCU : RetireControlUnit<192, 8>; + +// FIXME: there are 72 read buffers and 44 write buffers. + // (a folded load is an instruction that loads and does some operation) // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops // Instructions with folded loads are usually micro-fused, so they only appear @@ -99,30 +124,43 @@ def : ReadAdvance<ReadAfterLd, 4>; // b. addpd // This multiclass is for folded loads for integer units. multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW, - ProcResourceKind ExePort, - int Lat> { + list<ProcResourceKind> ExePorts, + int Lat, list<int> Res = [], int UOps = 1, + int LoadLat = 4, int LoadUOps = 1> { // Register variant takes 1-cycle on Execution Port. - def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } // Memory variant also uses a cycle on ZnAGU - // adds 4 cycles to the latency. - def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> { - let NumMicroOps = 2; - let Latency = !add(Lat, 4); + // adds LoadLat cycles to the latency (default = 4). + def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { + let Latency = !add(Lat, LoadLat); + let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); + let NumMicroOps = !add(UOps, LoadUOps); } } // This multiclass is for folded loads for floating point units. multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW, - ProcResourceKind ExePort, - int Lat> { + list<ProcResourceKind> ExePorts, + int Lat, list<int> Res = [], int UOps = 1, + int LoadLat = 7, int LoadUOps = 0> { // Register variant takes 1-cycle on Execution Port. - def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } // Memory variant also uses a cycle on ZnAGU - // adds 7 cycles to the latency. - def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> { - let Latency = !add(Lat, 7); + // adds LoadLat cycles to the latency (default = 7). + def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { + let Latency = !add(Lat, LoadLat); + let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); + let NumMicroOps = !add(UOps, LoadUOps); } } @@ -130,103 +168,310 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW, // operation in codegen def : WriteRes<WriteRMW, [ZnAGU]>; -def : WriteRes<WriteStore, [ZnAGU]>; -def : WriteRes<WriteMove, [ZnALU]>; -def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; } +def : WriteRes<WriteStore, [ZnAGU]>; +def : WriteRes<WriteStoreNT, [ZnAGU]>; +def : WriteRes<WriteMove, [ZnALU]>; +def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; } def : WriteRes<WriteZero, []>; def : WriteRes<WriteLEA, [ZnALU]>; -defm : ZnWriteResPair<WriteALU, ZnALU, 1>; -defm : ZnWriteResPair<WriteShift, ZnALU, 1>; -defm : ZnWriteResPair<WriteJump, ZnALU, 1>; +defm : ZnWriteResPair<WriteALU, [ZnALU], 1>; +defm : ZnWriteResPair<WriteADC, [ZnALU], 1>; +defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; +defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; + +defm : ZnWriteResPair<WriteBSWAP32,[ZnALU], 1, [4]>; +defm : ZnWriteResPair<WriteBSWAP64,[ZnALU], 1, [4]>; + +defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; +defm : ZnWriteResPair<WriteShiftDouble, [ZnALU], 1>; +defm : ZnWriteResPair<WriteJump, [ZnALU], 1>; +defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>; + +defm : ZnWriteResPair<WriteCMOV, [ZnALU], 1>; +defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>; +def : WriteRes<WriteSETCC, [ZnALU]>; +def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>; +defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>; + +// Bit counts. +defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>; +defm : ZnWriteResPair<WriteBSR, [ZnALU], 3>; +defm : ZnWriteResPair<WriteLZCNT, [ZnALU], 2>; +defm : ZnWriteResPair<WriteTZCNT, [ZnALU], 2>; +defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; -// IDIV -def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> { - let Latency = 41; - let ResourceCycles = [1, 41]; -} +// BMI1 BEXTR, BMI2 BZHI +defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>; +defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>; -def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> { - let Latency = 45; - let ResourceCycles = [1, 4, 41]; -} - -// IMUL +// IDIV +defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>; +defm : ZnWriteResPair<WriteDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>; +defm : ZnWriteResPair<WriteDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>; +defm : ZnWriteResPair<WriteDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>; +defm : ZnWriteResPair<WriteIDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>; +defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>; +defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>; +defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>; + +// IMULH def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{ let Latency = 4; } -def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> { - let Latency = 4; -} - -def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> { - let Latency = 8; -} // Floating point operations -defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>; -defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>; -defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>; -defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>; -defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>; -defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>; -defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>; -defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>; -defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>; -defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>; -defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>; -defm : ZnWriteResFpuPair<WriteFMA, ZnFPU03, 5>; -defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>; -defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>; -defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>; +defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteFLoadX, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteFLoadY, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>; +defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; +defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>; +defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>; +defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>; + +defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFAddZ>; +defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; +defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFCmpZ>; +defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; +defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>; +defm : X86WriteResPairUnsupported<WriteFBlendZ>; +defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>; +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; +defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>; +defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>; +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; +defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; +defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; +defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; +defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; +defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>; +//defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>; +defm : X86WriteResPairUnsupported<WriteFDivZ>; +defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 15>; +//defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15>; +defm : X86WriteResPairUnsupported<WriteFDiv64Z>; +defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>; +defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? +defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? +defm : X86WriteResPairUnsupported<WriteFRndZ>; +defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteFLogicZ>; +defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteFTestZ>; +defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>; +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; +defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>; +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; +defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>; +defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>; +defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteFMulZ>; +defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>; +defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>; +defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteFMul64Z>; +defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>; +defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>; +defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>; +defm : X86WriteResPairUnsupported<WriteFMAZ>; +defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>; +defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>; +defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>; +defm : X86WriteResPairUnsupported<WriteFRcpZ>; +//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>; +defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>; +//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>; +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; +defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>; +defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>; +defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteFSqrtZ>; +defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [20]>; +defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [20]>; +defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; +defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>; // Vector integer operations which uses FPU units -defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>; -defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>; -defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>; +defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>; +defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>; +defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; +defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>; +defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>; +defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>; + +defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; +defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; +defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; +defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>; +defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteVecTestZ>; +defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVecALUZ>; +defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; +defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>; +defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>; +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; +defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [1], 1, 7, 1>; // FIXME +defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2], 1, 7, 1>; // FIXME +defm : X86WriteResPairUnsupported<WritePMULLDZ>; +defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteShuffleZ>; +defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; +defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>; +defm : X86WriteResPairUnsupported<WriteBlendZ>; +defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>; +defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>; +defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WritePSADBWZ>; +defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>; // Vector Shift Operations -defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>; +defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>; +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; + +// Vector insert/extract operations. +defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>; + +def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> { + let Latency = 2; + let ResourceCycles = [1, 2]; +} +def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 2, 3]; +} + +// MOVMSK Instructions. +def : WriteRes<WriteFMOVMSK, [ZnFPU2]>; +def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>; +def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>; + +def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> { + let NumMicroOps = 2; + let Latency = 2; + let ResourceCycles = [2]; +} // AES Instructions. -defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>; -defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>; -defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>; +defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>; +defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>; +defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>; def : WriteRes<WriteFence, [ZnAGU]>; def : WriteRes<WriteNop, []>; // Following instructions with latency=100 are microcoded. // We set long latency so as to block the entire pipeline. -defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>; - -//Microcoded Instructions -let Latency = 100 in { - def : WriteRes<WriteMicrocoded, []>; - def : WriteRes<WriteSystem, []>; - def : WriteRes<WriteMPSAD, []>; - def : WriteRes<WriteMPSADLd, []>; - def : WriteRes<WriteCLMul, []>; - def : WriteRes<WriteCLMulLd, []>; - def : WriteRes<WritePCmpIStrM, []>; - def : WriteRes<WritePCmpIStrMLd, []>; - def : WriteRes<WritePCmpEStrI, []>; - def : WriteRes<WritePCmpEStrILd, []>; - def : WriteRes<WritePCmpEStrM, []>; - def : WriteRes<WritePCmpEStrMLd, []>; - def : WriteRes<WritePCmpIStrI, []>; - def : WriteRes<WritePCmpIStrILd, []>; - } - -//=== Regex based itineraries ===// +defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>; +defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>; + +// Microcoded Instructions +def ZnWriteMicrocoded : SchedWriteRes<[]> { + let Latency = 100; +} + +def : SchedAlias<WriteMicrocoded, ZnWriteMicrocoded>; +def : SchedAlias<WriteFCMOV, ZnWriteMicrocoded>; +def : SchedAlias<WriteSystem, ZnWriteMicrocoded>; +def : SchedAlias<WriteMPSAD, ZnWriteMicrocoded>; +def : SchedAlias<WriteMPSADY, ZnWriteMicrocoded>; +def : SchedAlias<WriteMPSADLd, ZnWriteMicrocoded>; +def : SchedAlias<WriteMPSADYLd, ZnWriteMicrocoded>; +def : SchedAlias<WriteCLMul, ZnWriteMicrocoded>; +def : SchedAlias<WriteCLMulLd, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpIStrM, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpIStrMLd, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpEStrI, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpEStrILd, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpEStrM, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpEStrMLd, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpIStrI, ZnWriteMicrocoded>; +def : SchedAlias<WritePCmpIStrILd, ZnWriteMicrocoded>; +def : SchedAlias<WriteLDMXCSR, ZnWriteMicrocoded>; +def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>; + +//=== Regex based InstRW ===// // Notation: // - r: register. // - m = memory. @@ -247,14 +492,6 @@ def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>; // r,m. def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; -// CMOVcc. -// r,r. -def : InstRW<[WriteALU], - (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>; -// r,m. -def : InstRW<[WriteALULd, ReadAfterLd], - (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>; - // XCHG. // r,r. def ZnWriteXCHG : SchedWriteRes<[ZnALU]> { @@ -271,7 +508,7 @@ def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> { } def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>; -def : InstRW<[WriteMicrocoded], (instregex "XLAT")>; +def : InstRW<[WriteMicrocoded], (instrs XLAT)>; // POP16. // r. @@ -302,20 +539,7 @@ def ZnWritePushA : SchedWriteRes<[ZnAGU]> { def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>; //LAHF -def : InstRW<[WriteMicrocoded], (instregex "LAHF")>; - -// SAHF. -def ZnWriteSAHF : SchedWriteRes<[ZnALU]> { - let Latency = 2; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteSAHF], (instregex "SAHF")>; - -// BSWAP. -def ZnWriteBSwap : SchedWriteRes<[ZnALU]> { - let ResourceCycles = [4]; -} -def : InstRW<[ZnWriteBSwap], (instregex "BSWAP")>; +def : InstRW<[WriteMicrocoded], (instrs LAHF)>; // MOVBE. // r,m. @@ -336,16 +560,6 @@ def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)", "(ADD|SUB)64mi32")>; // ADC SBB. -// r,r/i. -def : InstRW<[WriteALU], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)", - "(ADC|SBB)(16|32|64)ri8", - "(ADC|SBB)64ri32", - "(ADC|SBB)(8|16|32|64)rr_REV")>; - -// r,m. -def : InstRW<[WriteALULd, ReadAfterLd], - (instregex "(ADC|SBB)(8|16|32|64)rm")>; - // m,r/i. def : InstRW<[WriteALULd], (instregex "(ADC|SBB)(8|16|32|64)m(r|i)", @@ -355,59 +569,52 @@ def : InstRW<[WriteALULd], // INC DEC NOT NEG. // m. def : InstRW<[WriteALULd], - (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m", - "(INC|DEC)64(16|32)m")>; + (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>; // MUL IMUL. // r16. def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 3; } -def : InstRW<[ZnWriteMul16], (instregex "IMUL16r", "MUL16r")>; +def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>; +def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right? +def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. // m16. def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } -def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instregex "IMUL16m", "MUL16m")>; +def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>; // r32. def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 3; } -def : InstRW<[ZnWriteMul32], (instregex "IMUL32r", "MUL32r")>; +def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>; +def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right? +def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. // m32. def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } -def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instregex "IMUL32m", "MUL32m")>; +def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>; // r64. def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 4; let NumMicroOps = 2; } -def : InstRW<[ZnWriteMul64], (instregex "IMUL64r", "MUL64r")>; +def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>; +def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right? +def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. // m64. def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 9; let NumMicroOps = 2; } -def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instregex "IMUL64m", "MUL64m")>; - -// r16,r16. -def ZnWriteMul16rri : SchedWriteRes<[ZnALU1, ZnMultiplier]> { - let Latency = 3; -} -def : InstRW<[ZnWriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>; - -// r16,m16. -def ZnWriteMul16rmi : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { - let Latency = 8; -} -def : InstRW<[ZnWriteMul16rmi, ReadAfterLd], (instregex "IMUL16rmi", "IMUL16rmi8")>; +def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>; // MULX. // r32,r32,r32. @@ -415,72 +622,43 @@ def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 3; let ResourceCycles = [1, 2]; } -def : InstRW<[ZnWriteMulX32], (instregex "MULX32rr")>; +def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>; // r32,r32,m32. def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; let ResourceCycles = [1, 2, 2]; } -def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instregex "MULX32rm")>; +def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>; // r64,r64,r64. def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> { let Latency = 3; } -def : InstRW<[ZnWriteMulX64], (instregex "MULX64rr")>; +def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>; // r64,r64,m64. def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } -def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instregex "MULX64rm")>; - -// DIV, IDIV. -// r8. -def ZnWriteDiv8 : SchedWriteRes<[ZnALU2, ZnDivider]> { - let Latency = 15; -} -def : InstRW<[ZnWriteDiv8], (instregex "DIV8r", "IDIV8r")>; - -// r16. -def ZnWriteDiv16 : SchedWriteRes<[ZnALU2, ZnDivider]> { - let Latency = 17; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteDiv16], (instregex "DIV16r", "IDIV16r")>; - -// r32. -def ZnWriteDiv32 : SchedWriteRes<[ZnALU2, ZnDivider]> { - let Latency = 25; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteDiv32], (instregex "DIV32r", "IDIV32r")>; - -// r64. -def ZnWriteDiv64 : SchedWriteRes<[ZnALU2, ZnDivider]> { - let Latency = 41; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteDiv64], (instregex "DIV64r", "IDIV64r")>; +def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>; //-- Control transfer instructions --// // J(E|R)CXZ. def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>; -def : InstRW<[ZnWriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>; +def : InstRW<[ZnWriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>; // INTO -def : InstRW<[WriteMicrocoded], (instregex "INTO")>; +def : InstRW<[WriteMicrocoded], (instrs INTO)>; // LOOP. def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>; -def : InstRW<[ZnWriteLOOP], (instregex "LOOP")>; +def : InstRW<[ZnWriteLOOP], (instrs LOOP)>; // LOOP(N)E, LOOP(N)Z def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>; -def : InstRW<[ZnWriteLOOPE], (instregex "LOOPE", "LOOPNE", - "LOOPZ", "LOOPNZ")>; +def : InstRW<[ZnWriteLOOPE], (instrs LOOPE, LOOPNE)>; // CALL. // r. @@ -494,7 +672,7 @@ def ZnWriteRET : SchedWriteRes<[ZnALU03]> { let NumMicroOps = 2; } def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)", - "IRET(D|Q)", "RETF")>; + "IRET(16|32|64)")>; //-- Logic instructions --// @@ -504,12 +682,6 @@ def : InstRW<[WriteALULd], (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)", "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>; -// ANDN. -// r,r. -def : InstRW<[WriteALU], (instregex "ANDN(32|64)rr")>; -// r,m. -def : InstRW<[WriteALULd, ReadAfterLd], (instregex "ANDN(32|64)rm")>; - // Define ALU latency variants def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> { let Latency = 2; @@ -518,24 +690,8 @@ def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> { let Latency = 6; } -def ZnWriteALULat3 : SchedWriteRes<[ZnALU]> { - let Latency = 3; -} -def ZnWriteALULat3Ld : SchedWriteRes<[ZnAGU, ZnALU]> { - let Latency = 7; -} - -// BSF BSR. -// r,r. -def : InstRW<[ZnWriteALULat3], (instregex "BS(R|F)(16|32|64)rr")>; -// r,m. -def : InstRW<[ZnWriteALULat3Ld, ReadAfterLd], (instregex "BS(R|F)(16|32|64)rm")>; - // BT. -// r,r/i. -def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>; - -def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mr")>; +// m,i. def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>; // BTR BTS BTC. @@ -546,7 +702,6 @@ def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> { } def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>; - // m,r,i. def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> { let Latency = 6; @@ -559,79 +714,35 @@ def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>; // r,r. def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>; // r,m. -def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "BLS(I|MSK|R)(32|64)rm")>; - -// BEXTR. -// r,r,r. -def : InstRW<[WriteALU], (instregex "BEXTR(32|64)rr")>; -// r,m,r. -def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BEXTR(32|64)rm")>; - -// BZHI. -// r,r,r. -def : InstRW<[WriteALU], (instregex "BZHI(32|64)rr")>; -// r,m,r. -def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BZHI(32|64)rm")>; +def : InstRW<[ZnWriteALULat2Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>; // CLD STD. -def : InstRW<[WriteALU], (instregex "STD", "CLD")>; +def : InstRW<[WriteALU], (instrs STD, CLD)>; // PDEP PEXT. // r,r,r. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; -// r,m,r. +// r,r,m. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; -// ROR ROL. -def : InstRW<[WriteShift], (instregex "RO(R|L)(8|16|32|64)r1")>; - // RCR RCL. -// r,1. -def : InstRW<[WriteShift], (instregex "RC(R|L)(8|16|32|64)r1")>; - -// m,1. -def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m1")>; - -// i. -def : InstRW<[WriteShift], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>; - // m,i. -def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>; +def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>; // SHR SHL SAR. // m,i. def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>; // SHRD SHLD. -// r,r -def : InstRW<[WriteShift], (instregex "SH(R|L)D(16|32|64)rri8")>; - // m,r def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>; // r,r,cl. -def : InstRW<[WriteMicrocoded], (instregex "SHLD(16|32|64)rrCL")>; - -// r,r,cl. -def : InstRW<[WriteMicrocoded], (instregex "SHRD(16|32|64)rrCL")>; +def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>; // m,r,cl. def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>; -// SETcc. -// r. -def : InstRW<[WriteShift], - (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>; -// m. -def : InstRW<[WriteShift], - (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; - -// LZCNT TZCNT. -// r,r. -def : InstRW<[ZnWriteALULat2], (instregex "(LZCNT|TZCNT)(16|32|64)rr")>; -// r,m. -def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "(LZCNT|TZCNT)(16|32|64)rm")>; - //-- Misc instructions --// // CMPXCHG. def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> { @@ -644,9 +755,9 @@ def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> { let NumMicroOps = 18; } -def : InstRW<[ZnWriteCMPXCHG8B], (instregex "CMPXCHG8B")>; +def : InstRW<[ZnWriteCMPXCHG8B], (instrs CMPXCHG8B)>; -def : InstRW<[WriteMicrocoded], (instregex "CMPXCHG16B")>; +def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>; // LEAVE def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> { @@ -656,13 +767,13 @@ def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> { def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>; // PAUSE. -def : InstRW<[WriteMicrocoded], (instregex "PAUSE")>; +def : InstRW<[WriteMicrocoded], (instrs PAUSE)>; // RDTSC. def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>; // RDPMC. -def : InstRW<[WriteMicrocoded], (instregex "RDPMC")>; +def : InstRW<[WriteMicrocoded], (instrs RDPMC)>; // RDRAND. def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>; @@ -732,7 +843,7 @@ def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>; def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>; // FXCHG. -def : InstRW<[ZnWriteFXCH], (instregex "XCH_F")>; +def : InstRW<[ZnWriteFXCH], (instrs XCH_F)>; // FILD. def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> { @@ -756,31 +867,29 @@ def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> { } // FLDZ. -def : InstRW<[ZnWriteFPU13], (instregex "LD_F0")>; +def : SchedAlias<WriteFLD0, ZnWriteFPU13>; // FLD1. -def : InstRW<[ZnWriteFPU3], (instregex "LD_F1")>; +def : SchedAlias<WriteFLD1, ZnWriteFPU3>; // FLDPI FLDL2E etc. -def : InstRW<[ZnWriteFPU3], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>; - -def : InstRW<[WriteMicrocoded], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F")>; +def : SchedAlias<WriteFLDC, ZnWriteFPU3>; // FNSTSW. // AX. -def : InstRW<[WriteMicrocoded], (instregex "FNSTSW16r")>; +def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>; // m16. -def : InstRW<[WriteMicrocoded], (instregex "FNSTSWm")>; +def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>; // FLDCW. -def : InstRW<[WriteMicrocoded], (instregex "FLDCW16m")>; +def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>; // FNSTCW. -def : InstRW<[WriteMicrocoded], (instregex "FNSTCW16m")>; +def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>; // FINCSTP FDECSTP. -def : InstRW<[ZnWriteFPU3], (instregex "FINCSTP", "FDECSTP")>; +def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>; // FFREE. def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>; @@ -793,14 +902,6 @@ def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>; //-- Arithmetic instructions --// -def ZnWriteFPU3Lat2 : SchedWriteRes<[ZnFPU3]> { - let Latency = 2; -} - -def ZnWriteFPU3Lat2Ld : SchedWriteRes<[ZnAGU, ZnFPU3]> { - let Latency = 9; -} - def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ; def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ; @@ -809,22 +910,18 @@ def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> { let Latency = 8; } -// FABS. -def : InstRW<[ZnWriteFPU3Lat2], (instregex "ABS_F")>; - // FCHS. def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>; // FCOM(P) FUCOM(P). // r. -def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr", - "UCOM_FPr")>; +def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>; // m. -def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>; +def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>; // FCOMPP FUCOMPP. // r. -def : InstRW<[ZnWriteFPU0Lat1], (instregex "FCOMPP", "UCOM_FPPr")>; +def : InstRW<[ZnWriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>; def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]> { @@ -833,8 +930,7 @@ def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]> // FCOMI(P) FUCOMI(P). // m. -def : InstRW<[ZnWriteFPU02], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr", - "UCOM_FIPr")>; +def : InstRW<[ZnWriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>; def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]> { @@ -844,92 +940,42 @@ def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]> } // FICOM(P). -def : InstRW<[ZnWriteFPU03], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>; +def : InstRW<[ZnWriteFPU03], (instregex "FICOM(P?)(16|32)m")>; // FTST. def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>; // FXAM. -def : InstRW<[ZnWriteFPU3Lat1], (instregex "FXAM")>; +def : InstRW<[ZnWriteFPU3Lat1], (instrs FXAM)>; // FPREM. -def : InstRW<[WriteMicrocoded], (instregex "FPREM")>; +def : InstRW<[WriteMicrocoded], (instrs FPREM)>; // FPREM1. -def : InstRW<[WriteMicrocoded], (instregex "FPREM1")>; +def : InstRW<[WriteMicrocoded], (instrs FPREM1)>; // FRNDINT. -def : InstRW<[WriteMicrocoded], (instregex "FRNDINT")>; +def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>; // FSCALE. -def : InstRW<[WriteMicrocoded], (instregex "FSCALE")>; +def : InstRW<[WriteMicrocoded], (instrs FSCALE)>; // FXTRACT. -def : InstRW<[WriteMicrocoded], (instregex "FXTRACT")>; +def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>; // FNOP. -def : InstRW<[ZnWriteFPU0Lat1], (instregex "FNOP")>; +def : InstRW<[ZnWriteFPU0Lat1], (instrs FNOP)>; // WAIT. -def : InstRW<[ZnWriteFPU0Lat1], (instregex "WAIT")>; +def : InstRW<[ZnWriteFPU0Lat1], (instrs WAIT)>; // FNCLEX. -def : InstRW<[WriteMicrocoded], (instregex "FNCLEX")>; +def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>; // FNINIT. -def : InstRW<[WriteMicrocoded], (instregex "FNINIT")>; +def : InstRW<[WriteMicrocoded], (instrs FNINIT)>; //=== Integer MMX and XMM Instructions ===// -//-- Move instructions --// - -// Moves from GPR to FPR incurs a penalty -def ZnWriteFPU2 : SchedWriteRes<[ZnFPU2]> { - let Latency = 3; -} - -// Move to ALU doesn't incur penalty -def ZnWriteToALU2 : SchedWriteRes<[ZnFPU2]> { - let Latency = 2; -} - -def ZnWriteFPU : SchedWriteRes<[ZnFPU]>; -def ZnWriteFPUY : SchedWriteRes<[ZnFPU]> { - let NumMicroOps = 2; - let Latency=2; -} - -// MOVD. -// r32/64 <- (x)mm. -def : InstRW<[ZnWriteToALU2], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr", - "VMOVPDI2DIrr", "MOVPDI2DIrr")>; - -// (x)mm <- r32/64. -def : InstRW<[ZnWriteFPU2], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", - "VMOVDI2PDIrr", "MOVDI2PDIrr")>; - -// MOVQ. -// r64 <- (x)mm. -def : InstRW<[ZnWriteToALU2], (instregex "VMOVPQIto64rr")>; - -// (x)mm <- r64. -def : InstRW<[ZnWriteFPU2], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>; - -// (x)mm <- (x)mm. -def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ64rr")>; - -// (V)MOVDQA/U. -// x <- x. -def : InstRW<[ZnWriteFPU], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr", - "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV")>; - -// y <- y. -def : InstRW<[ZnWriteFPUY], (instregex "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>; - -// MOVDQ2Q. -def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVDQ2Qrr")>; - -// MOVQ2DQ. -def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ2DQrr")>; // PACKSSWB/DW. // mm <- mm. @@ -938,15 +984,22 @@ def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> { let NumMicroOps = 2; } def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ; +def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> { + let Latency = 8; + let NumMicroOps = 2; +} -def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr", - "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; -def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm", - "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; +def : InstRW<[ZnWriteFPU12], (instrs MMX_PACKSSDWirr, + MMX_PACKSSWBirr, + MMX_PACKUSWBirr)>; +def : InstRW<[ZnWriteFPU12m], (instrs MMX_PACKSSDWirm, + MMX_PACKSSWBirm, + MMX_PACKUSWBirm)>; -// VPMOVSX/ZX BW BD BQ DW DQ. +// VPMOVSX/ZX BW BD BQ WD WQ DQ. // y <- x. -def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>; def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ; def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> { @@ -969,12 +1022,12 @@ def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> { // x,x,i / v,v,v,i def : InstRW<[ZnWriteFPU013], (instregex "(V?)PBLENDWrri")>; // ymm -def : InstRW<[ZnWriteFPU013Y], (instregex "(V?)PBLENDWYrri")>; +def : InstRW<[ZnWriteFPU013Y], (instrs VPBLENDWYrri)>; // x,m,i / v,v,m,i def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>; // y,m,i -def : InstRW<[ZnWriteFPU013LdY], (instregex "(V?)PBLENDWYrmi")>; +def : InstRW<[ZnWriteFPU013LdY], (instrs VPBLENDWYrmi)>; def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ; def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> { @@ -983,9 +1036,9 @@ def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> { // VPBLENDD. // v,v,v,i. -def : InstRW<[ZnWriteFPU01], (instregex "VPBLENDDrri")>; +def : InstRW<[ZnWriteFPU01], (instrs VPBLENDDrri)>; // ymm -def : InstRW<[ZnWriteFPU01Y], (instregex "VPBLENDDYrri")>; +def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>; // v,v,m,i def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> { @@ -998,8 +1051,8 @@ def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> { let Latency = 9; let ResourceCycles = [1, 3]; } -def : InstRW<[ZnWriteFPU01Op2], (instregex "VPBLENDDrmi")>; -def : InstRW<[ZnWriteFPU01Op2Y], (instregex "VPBLENDDYrmi")>; +def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>; +def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>; // MASKMOVQ. def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>; @@ -1007,42 +1060,13 @@ def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>; // MASKMOVDQU. def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>; -// VPMASKMOVQ. +// VPMASKMOVD. // ymm -def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>; -def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>; - def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOVD(Y?)rm")>; // m, v,v. def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>; -// PMOVMSKB. -def ZnWritePMOVMSKB : SchedWriteRes<[ZnFPU2]> { - let NumMicroOps = 2; -} -def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> { - let Latency = 2; -} -def : InstRW<[ZnWritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKBrr")>; -def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>; - -// PEXTR B/W/D/Q. -// r32,x,i. -def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> { - let Latency = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>; - -def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 2, 3]; -} -// m8,x,i. -def : InstRW<[ZnWritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>; - // VPBROADCAST B/W. // x, m8/16. def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> { @@ -1069,13 +1093,12 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>; // HADD, HSUB PS/PD // PHADD|PHSUB (S) W/D. -def : InstRW<[WriteMicrocoded], (instregex "MMX_PHADD(W?)r(r|m)64", - "MMX_PHADDSWr(r|m)64", - "MMX_PHSUB(W|D)r(r|m)64", - "MMX_PHSUBSWrr64", - "(V?)PH(ADD|SUB)(W|D)(Y?)r(r|m)", - "(V?)PH(ADD|SUB)SWr(r|m)(256)?")>; - +def : SchedAlias<WritePHAdd, ZnWriteMicrocoded>; +def : SchedAlias<WritePHAddLd, ZnWriteMicrocoded>; +def : SchedAlias<WritePHAddX, ZnWriteMicrocoded>; +def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>; +def : SchedAlias<WritePHAddY, ZnWriteMicrocoded>; +def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>; // PCMPGTQ. def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>; @@ -1092,69 +1115,16 @@ def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> { let ResourceCycles = [1,2]; } def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>; -def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>; - -// PMULLD. -// x,x. -def ZnWritePMULLDr : SchedWriteRes<[ZnFPU0]> { - let Latency = 4; -} -// ymm. -def ZnWritePMULLDYr : SchedWriteRes<[ZnFPU0]> { - let Latency = 5; - let ResourceCycles = [2]; -} -def : InstRW<[ZnWritePMULLDr], (instregex "(V?)PMULLDrr")>; -def : InstRW<[ZnWritePMULLDYr], (instregex "(V?)PMULLDYrr")>; - -// x,m. -def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> { - let Latency = 11; - let NumMicroOps = 2; -} -// y,m. -def ZnWritePMULLDYm : SchedWriteRes<[ZnAGU, ZnFPU0]> { - let Latency = 12; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWritePMULLDm], (instregex "(V?)PMULLDrm")>; -def : InstRW<[ZnWritePMULLDYm], (instregex "(V?)PMULLDYrm")>; +def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>; //-- Logic instructions --// -// PTEST. -// v,v. -def ZnWritePTESTr : SchedWriteRes<[ZnFPU12]> { - let ResourceCycles = [2]; -} -def : InstRW<[ZnWritePTESTr], (instregex "(V?)PTEST(Y?)rr")>; - -// v,m. -def ZnWritePTESTm : SchedWriteRes<[ZnAGU, ZnFPU12]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWritePTESTm], (instregex "(V?)PTEST(Y?)rm")>; - // PSLL,PSRL,PSRA W/D/Q. // x,x / v,v,x. def ZnWritePShift : SchedWriteRes<[ZnFPU2]> ; def ZnWritePShiftY : SchedWriteRes<[ZnFPU2]> { let Latency = 2; } -def ZnWritePShiftLd : SchedWriteRes<[ZnAGU,ZnFPU2]> { - let Latency = 8; -} -def ZnWritePShiftYLd : SchedWriteRes<[ZnAGU, ZnFPU2]> { - let Latency = 9; -} -def : InstRW<[ZnWritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)rr")>; -def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)Yrr")>; - -def : InstRW<[ZnWritePShiftLd], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)rm")>; -def : InstRW<[ZnWritePShiftYLd], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)Yrm")>; // PSLL,PSRL DQ. def : InstRW<[ZnWritePShift], (instregex "(V?)PS(R|L)LDQri")>; @@ -1163,33 +1133,16 @@ def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>; //=== Floating Point XMM and YMM Instructions ===// //-- Move instructions --// -// MOVMSKP S/D. -// r32 <- x,y. -def ZnWriteMOVMSKPr : SchedWriteRes<[ZnFPU2]> ; -def : InstRW<[ZnWriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)(Y?)rr")>; - // VPERM2F128. -def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rr")>; -def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rm")>; - -// BLENDVP S/D. -def ZnWriteFPU01Lat3 : SchedWriteRes<[ZnFPU013]> { - let Latency = 3; -} -def ZnWriteFPU01Lat3Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> { - let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWriteFPU01Lat3], (instregex "BLENDVP(S|D)rr0")>; -def : InstRW<[ZnWriteFPU01Lat3Ld, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>; +def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>; +def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>; def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> { let NumMicroOps = 2; let Latency = 8; } // VBROADCASTF128. -def : InstRW<[ZnWriteBROADCAST], (instregex "VBROADCASTF128")>; +def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128)>; // EXTRACTPS. // r32,x,i. @@ -1210,10 +1163,10 @@ def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; // VEXTRACTF128. // x,y,i. -def : InstRW<[ZnWriteFPU013], (instregex "VEXTRACTF128rr")>; +def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr)>; // m128,y,i. -def : InstRW<[ZnWriteFPU013m], (instregex "VEXTRACTF128mr")>; +def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr)>; def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> { let Latency = 2; @@ -1226,69 +1179,27 @@ def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> { } // VINSERTF128. // y,y,x,i. -def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>; -def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>; - -// VMASKMOVP S/D. -// x,x,m. -def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 8; -} -// y,y,m. -def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 4; -} -def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>; -def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>; -def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>; - -// m256,y,y. -def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; - -// VGATHERDPS. -// x. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>; -// y. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSYrm")>; - -// VGATHERQPS. -// x. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPSrm")>; +def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr)>; +def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm)>; -// y. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPSYrm")>; - -// VGATHERDPD. -// x. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPDrm")>; - -// y. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPDYrm")>; - -// VGATHERQPD. -// x. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDrm")>; - -// y. -def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDYrm")>; +// VGATHER. +def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>; //-- Conversion instructions --// def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> { let Latency = 4; } +def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> { + let Latency = 5; +} + // CVTPD2PS. // x,x. -def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V?)CVTPD2PSrr")>; +def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>; +// y,y. +def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>; +// z,z. +defm : X86WriteResUnsupported<WriteCvtPD2PSZ>; def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> { let Latency = 11; @@ -1296,34 +1207,30 @@ def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> { let ResourceCycles = [1,2]; } // x,m128. -def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V?)CVTPD2PS(X?)rm")>; - -// x,y. -def ZnWriteCVTPD2PSYr : SchedWriteRes<[ZnFPU3]> { - let Latency = 5; -} -def : InstRW<[ZnWriteCVTPD2PSYr], (instregex "(V?)CVTPD2PSYrr")>; +def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>; // x,m256. def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let Latency = 11; } -def : InstRW<[ZnWriteCVTPD2PSYLd], (instregex "(V?)CVTPD2PSYrm")>; +def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>; +// z,m512 +defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>; // CVTSD2SS. // x,x. // Same as WriteCVTPD2PSr -def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(Int_)?(V)?CVTSD2SSrr")>; +def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>; // x,m64. -def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(Int_)?(V)?CVTSD2SSrm")>; +def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>; // CVTPS2PD. // x,x. def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> { let Latency = 3; } -def : InstRW<[ZnWriteCVTPS2PDr], (instregex "(V?)CVTPS2PDrr")>; +def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>; // x,m64. // y,m128. @@ -1331,20 +1238,23 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let Latency = 10; let NumMicroOps = 2; } -def : InstRW<[ZnWriteCVTPS2PDLd], (instregex "(V?)CVTPS2PD(Y?)rm")>; +def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>; +def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>; +defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>; // y,x. def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> { let Latency = 3; } -def : InstRW<[ZnWriteVCVTPS2PDY], (instregex "VCVTPS2PDYrr")>; +def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>; +defm : X86WriteResUnsupported<WriteCvtPS2PDZ>; // CVTSS2SD. // x,x. def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> { let Latency = 4; } -def : InstRW<[ZnWriteCVTSS2SDr], (instregex "(Int_)?(V?)CVTSS2SDrr")>; +def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>; // x,m32. def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { @@ -1352,7 +1262,7 @@ def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let NumMicroOps = 2; let ResourceCycles = [1, 2]; } -def : InstRW<[ZnWriteCVTSS2SDLd], (instregex "(Int_)?(V?)CVTSS2SDrm")>; +def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>; def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> { let Latency = 5; @@ -1363,7 +1273,7 @@ def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>; // Same as xmm // y,x. -def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "VCVTDQ2PDYrr")>; +def : InstRW<[ZnWriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>; def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> { let Latency = 5; @@ -1383,7 +1293,6 @@ def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>; def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>; // x,m256. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>; -def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQ(64)?rm")>; def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> { let Latency = 4; @@ -1394,7 +1303,7 @@ def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>; // CVTPI2PD. // x,mm. -def : InstRW<[ZnWriteCVTPS2PDr], (instregex "MMX_CVT(T?)PI2PDirr")>; +def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>; // CVT(T)PD2PI. // mm,x. @@ -1403,24 +1312,21 @@ def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>; def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> { let Latency = 5; } -// CVSTSI2SS. -// x,r32. -def : InstRW<[ZnWriteCVSTSI2SSr], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>; // same as CVTPD2DQr // CVT(T)SS2SI. // r32,x. -def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>; +def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>; // same as CVTPD2DQm // r32,m32. -def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>; +def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>; def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> { let Latency = 5; } // CVTSI2SD. // x,r32/64. -def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>; +def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>; def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> { @@ -1431,34 +1337,29 @@ def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> { } // CVTSD2SI. // r32/64 -def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(Int_)?CVT(T?)SD2SI(64)?rr")>; +def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>; // r32,m32. -def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(Int_)?CVT(T?)SD2SI(64)?rm")>; - - -def ZnWriteVCVSTSI2SIr: SchedWriteRes<[ZnFPU3]> { - let Latency = 5; -} -def ZnWriteVCVSTSI2SILd: SchedWriteRes<[ZnFPU3, ZnAGU]> { - let Latency = 12; -} -// VCVTSD2SI. -// r32/64 -def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(Int_)?VCVT(T?)SD2SI(64)?rr")>; -// r32,m32. -def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(Int_)?VCVT(T?)SD2SI(64)?rm")>; +def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>; // VCVTPS2PH. // x,v,i. -def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)rr")>; +def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>; +def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; // m,v,i. -def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)mr")>; +def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>; +def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // VCVTPH2PS. // v,x. -def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rr")>; +def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>; +def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZ>; // v,m. -def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rm")>; +def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>; +def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>; //-- SSE4A instructions --// // EXTRQ @@ -1473,12 +1374,6 @@ def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> { } def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>; -// MOVNTSS/MOVNTSD -def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> { - let Latency = 8; -} -def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>; - //-- SHA instructions --// // SHA256MSG2 def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>; @@ -1544,41 +1439,19 @@ def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>; //-- Arithmetic instructions --// // HADD, HSUB PS/PD -def : InstRW<[WriteMicrocoded], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)r(r|m)")>; - -// MULL SS/SD PS/PD. -// x,x / v,v,v. -def ZnWriteMULr : SchedWriteRes<[ZnFPU01]> { - let Latency = 3; -} -// ymm. -def ZnWriteMULYr : SchedWriteRes<[ZnFPU01]> { - let Latency = 4; -} -def : InstRW<[ZnWriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>; -def : InstRW<[ZnWriteMULYr], (instregex "(V?)MUL(P|S)(S|D)Yrr")>; - -// x,m / v,v,m. -def ZnWriteMULLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 10; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteMULLd], (instregex "(V?)MUL(P|S)(S|D)rm")>; - -// ymm -def ZnWriteMULYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 11; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteMULYLd], (instregex "(V?)MUL(P|S)(S|D)Yrm")>; +def : SchedAlias<WriteFHAdd, ZnWriteMicrocoded>; +def : SchedAlias<WriteFHAddLd, ZnWriteMicrocoded>; +def : SchedAlias<WriteFHAddY, ZnWriteMicrocoded>; +def : SchedAlias<WriteFHAddYLd, ZnWriteMicrocoded>; // VDIVPS. +// TODO - convert to ZnWriteResFpuPair // y,y,y. def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> { let Latency = 12; let ResourceCycles = [12]; } -def : InstRW<[ZnWriteVDIVPSYr], (instregex "VDIVPSYrr")>; +def : SchedAlias<WriteFDivY, ZnWriteVDIVPSYr>; // y,y,m256. def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { @@ -1586,15 +1459,16 @@ def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let NumMicroOps = 2; let ResourceCycles = [1, 19]; } -def : InstRW<[ZnWriteVDIVPSYLd], (instregex "VDIVPSYrm")>; +def : SchedAlias<WriteFDivYLd, ZnWriteVDIVPSYLd>; // VDIVPD. +// TODO - convert to ZnWriteResFpuPair // y,y,y. def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> { let Latency = 15; let ResourceCycles = [15]; } -def : InstRW<[ZnWriteVDIVPDY], (instregex "VDIVPDYrr")>; +def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>; // y,y,m256. def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { @@ -1602,173 +1476,63 @@ def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let NumMicroOps = 2; let ResourceCycles = [1,22]; } -def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>; - -// VRCPPS. -// y,y. -def ZnWriteVRCPPSr : SchedWriteRes<[ZnFPU01]> { - let Latency = 5; -} -def : InstRW<[ZnWriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>; - -// y,m256. -def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 12; - let NumMicroOps = 3; -} -def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm(_Int)?")>; - -// ROUND SS/SD PS/PD. -// v,v,i. -def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> { - let Latency = 4; -} -def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>; - -// VFMADD. -// v,v,v. -def ZnWriteFMADDr : SchedWriteRes<[ZnFPU03]> { - let Latency = 5; -} -def : InstRW<[ZnWriteFMADDr], - (instregex - "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(213|132|231)(Y)?r", - "VF(N?)M(ADD|SUB)(132|231|213)S(S|D)r", - "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", - "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; - -// v,v,m. -def ZnWriteFMADDm : SchedWriteRes<[ZnAGU, ZnFPU03]> { - let Latency = 12; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteFMADDm], - (instregex - "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)(213|132|231)P(S|D)(Y)?m", - "VF(N?)M(ADD|SUB)(132|231|213)S(S|D)m", - "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", - "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; - -// v,m,i. -def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> { - let Latency = 11; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>; +def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>; // DPPS. // x,x,i / v,v,v,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rri")>; +def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>; +def : SchedAlias<WriteDPPSY, ZnWriteMicrocoded>; // x,m,i / v,v,m,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rmi")>; +def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>; +def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>; // DPPD. // x,x,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrri")>; +def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>; // x,m,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrmi")>; - -// VSQRTPS. -// y,y. -def ZnWriteVSQRTPSYr : SchedWriteRes<[ZnFPU3]> { - let Latency = 28; - let ResourceCycles = [28]; -} -def : InstRW<[ZnWriteVSQRTPSYr], (instregex "VSQRTPSYr")>; - -// y,m256. -def ZnWriteVSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { - let Latency = 35; - let ResourceCycles = [1,35]; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteVSQRTPSYLd], (instregex "VSQRTPSYm")>; - -// VSQRTPD. -// y,y. -def ZnWriteVSQRTPDYr : SchedWriteRes<[ZnFPU3]> { - let Latency = 40; - let ResourceCycles = [40]; -} -def : InstRW<[ZnWriteVSQRTPDYr], (instregex "VSQRTPDYr")>; - -// y,m256. -def ZnWriteVSQRTPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { - let Latency = 47; - let NumMicroOps = 2; - let ResourceCycles = [1,47]; -} -def : InstRW<[ZnWriteVSQRTPDYLd], (instregex "VSQRTPDYm")>; +def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>; // RSQRTSS +// TODO - convert to ZnWriteResFpuPair // x,x. def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> { let Latency = 5; } -def : InstRW<[ZnWriteRSQRTSSr], (instregex "(V?)RSQRTSS(Y?)r(_Int)?")>; +def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>; -// RSQRTPS -// x,x. -def ZnWriteRSQRTPSr : SchedWriteRes<[ZnFPU01]> { - let Latency = 5; -} -def : InstRW<[ZnWriteRSQRTPSr], (instregex "(V?)RSQRTPS(Y?)r(_Int)?")>; - -// RSQRTSSm // x,m128. def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> { let Latency = 12; let NumMicroOps = 2; - let ResourceCycles = [1,2]; -} -def : InstRW<[ZnWriteRSQRTSSLd], (instregex "(V?)RSQRTSSm(_Int)?")>; - -// RSQRTPSm -def ZnWriteRSQRTPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 12; - let NumMicroOps = 2; + let ResourceCycles = [1,2]; // FIXME: Is this right? } -def : InstRW<[ZnWriteRSQRTPSLd], (instregex "(V?)RSQRTPSm(_Int)?")>; +def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>; -// RSQRTPS 256. +// RSQRTPS +// TODO - convert to ZnWriteResFpuPair // y,y. def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [2]; } -def : InstRW<[ZnWriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>; +def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>; // y,m256. def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { let Latency = 12; let NumMicroOps = 2; } -def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm(_Int)?")>; - -//-- Logic instructions --// - -// AND, ANDN, OR, XOR PS/PD. -// x,x / v,v,v. -def : InstRW<[WriteVecLogic], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; -// x,m / v,v,m. -def : InstRW<[WriteVecLogicLd], - (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; +def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>; //-- Other instructions --// // VZEROUPPER. -def : InstRW<[WriteMicrocoded], (instregex "VZEROUPPER")>; +def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>; // VZEROALL. -def : InstRW<[WriteMicrocoded], (instregex "VZEROALL")>; - -// LDMXCSR. -def : InstRW<[WriteMicrocoded], (instregex "(V)?LDMXCSR")>; - -// STMXCSR. -def : InstRW<[WriteMicrocoded], (instregex "(V)?STMXCSR")>; +def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>; } // SchedModel |