aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td')
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td336
1 files changed, 162 insertions, 174 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index a8c65435ab9b..7fc96d1eda89 100755
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -255,7 +255,8 @@ defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>;
-defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
+defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87).
+defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE).
defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>;
@@ -342,8 +343,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
@@ -361,10 +364,10 @@ defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
-defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 4, [1], 1, 5>; // Vector integer multiply.
-defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 4, [1], 1, 6>;
-defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 4, [1], 1, 7>;
-defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply.
+defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>;
+defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>;
defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>;
defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>;
@@ -619,6 +622,8 @@ def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"KOR(B|D|Q|W)rr",
"KXNOR(B|D|Q|W)rr",
"KXOR(B|D|Q|W)rr",
+ "KSET0(B|D|Q|W)", // Same as KXOR
+ "KSET1(B|D|Q|W)", // Same as KXNOR
"MMX_PADDS(B|W)irr",
"MMX_PADDUS(B|W)irr",
"MMX_PAVG(B|W)irr",
@@ -814,19 +819,26 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
}
def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
- "KADD(B|D|Q|W)rr",
+ "VALIGND(Z|Z128|Z256)rri",
+ "VALIGNQ(Z|Z128|Z256)rri",
+ "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
+ "VPBROADCAST(B|W)rr",
+ "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
+
+def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
"KSHIFTR(B|D|Q|W)ri",
"KUNPCK(BW|DQ|WD)rr",
- "VALIGND(Z|Z128|Z256)rri",
- "VALIGNQ(Z|Z128|Z256)rri",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
"VCMP(SD|SS)Zrr",
- "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
"VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
"VFPCLASS(SD|SS)Zrr",
- "VPBROADCAST(B|W)rr",
"VPCMPB(Z|Z128|Z256)rri",
"VPCMPD(Z|Z128|Z256)rri",
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
@@ -834,7 +846,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
"VPCMPQ(Z|Z128|Z256)rri",
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
"VPCMPW(Z|Z128|Z256)rri",
- "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
"VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -1171,7 +1182,7 @@ def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64)>;
+def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>;
def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
@@ -1331,8 +1342,8 @@ def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
(instregex "VBLENDMPDZ128rm(b?)",
"VBLENDMPSZ128rm(b?)",
- "VBROADCASTI32X2Z128m(b?)",
- "VBROADCASTSSZ128m(b?)",
+ "VBROADCASTI32X2Z128rm(b?)",
+ "VBROADCASTSSZ128rm(b?)",
"VINSERT(F|I)128rm",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
@@ -1350,8 +1361,8 @@ def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
"VPADD(B|D|Q|W)Z128rm(b?)",
"(V?)PADD(B|D|Q|W)rm",
"VPBLENDM(B|D|Q|W)Z128rm(b?)",
- "VPBROADCASTDZ128m(b?)",
- "VPBROADCASTQZ128m(b?)",
+ "VPBROADCASTDZ128rm(b?)",
+ "VPBROADCASTQZ128rm(b?)",
"VPSUB(B|D|Q|W)Z128rm(b?)",
"(V?)PSUB(B|D|Q|W)rm",
"VPTERNLOGDZ128rm(b?)i",
@@ -1456,7 +1467,7 @@ def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
let ResourceCycles = [1,1,1,1,1];
}
def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
-def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64)>;
+def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>;
def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
@@ -1516,9 +1527,8 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
- "VFPCLASSSDZrm(b?)",
- "VPBROADCASTB(Z|Z256)m(b?)",
- "VPBROADCASTW(Z|Z256)m(b?)")>;
+ "VPBROADCASTB(Z|Z256)rm(b?)",
+ "VPBROADCASTW(Z|Z256)rm(b?)")>;
def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
VPBROADCASTWYrm,
VPMOVSXBDYrm,
@@ -1535,24 +1545,24 @@ def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
(instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VBLENDMPS(Z|Z256)rm(b?)",
- "VBROADCASTF32X2Z256m(b?)",
- "VBROADCASTF32X2Zm(b?)",
+ "VBROADCASTF32X2Z256rm(b?)",
+ "VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
"VBROADCASTF32X4rm(b?)",
"VBROADCASTF32X8rm(b?)",
"VBROADCASTF64X2Z128rm(b?)",
"VBROADCASTF64X2rm(b?)",
"VBROADCASTF64X4rm(b?)",
- "VBROADCASTI32X2Z256m(b?)",
- "VBROADCASTI32X2Zm(b?)",
+ "VBROADCASTI32X2Z256rm(b?)",
+ "VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
"VBROADCASTI32X4rm(b?)",
"VBROADCASTI32X8rm(b?)",
"VBROADCASTI64X2Z128rm(b?)",
"VBROADCASTI64X2rm(b?)",
"VBROADCASTI64X4rm(b?)",
- "VBROADCASTSD(Z|Z256)m(b?)",
- "VBROADCASTSS(Z|Z256)m(b?)",
+ "VBROADCASTSD(Z|Z256)rm(b?)",
+ "VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32x4(Z|Z256)rm(b?)",
"VINSERTF32x8Zrm(b?)",
"VINSERTF64x2(Z|Z256)rm(b?)",
@@ -1577,8 +1587,8 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
"VPADD(B|D|Q|W)Yrm",
"VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
"VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
- "VPBROADCASTD(Z|Z256)m(b?)",
- "VPBROADCASTQ(Z|Z256)m(b?)",
+ "VPBROADCASTD(Z|Z256)rm(b?)",
+ "VPBROADCASTQ(Z|Z256)rm(b?)",
"VPSUB(B|D|Q|W)Yrm",
"VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
"VPTERNLOGD(Z|Z256)rm(b?)i",
@@ -1667,17 +1677,9 @@ def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
VPMOVSXWDYrm,
VPMOVZXWDYrm)>;
def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
- "VCMP(PD|PS)Z128rm(b?)i",
- "VCMP(SD|SS)Zrm",
+ "VFPCLASSSDZrm(b?)",
"VFPCLASSSSZrm(b?)",
- "VPCMPBZ128rmi(b?)",
- "VPCMPDZ128rmi(b?)",
- "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
- "VPCMPGT(B|D|Q|W)Z128rm(b?)",
"(V?)PCMPGTQrm",
- "VPCMPQZ128rmi(b?)",
- "VPCMPU(B|D|Q|W)Z128rmi(b?)",
- "VPCMPWZ128rmi(b?)",
"VPERMI2D128rm(b?)",
"VPERMI2PD128rm(b?)",
"VPERMI2PS128rm(b?)",
@@ -1701,15 +1703,32 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VPMOVZXBWZ128rm(b?)",
"VPMOVZXDQZ128rm(b?)",
"VPMOVZXWDZ128rm(b?)",
- "VPMOVZXWQZ128rm(b?)",
- "VPTESTMBZ128rm(b?)",
- "VPTESTMDZ128rm(b?)",
- "VPTESTMQZ128rm(b?)",
- "VPTESTMWZ128rm(b?)",
- "VPTESTNMBZ128rm(b?)",
- "VPTESTNMDZ128rm(b?)",
- "VPTESTNMQZ128rm(b?)",
- "VPTESTNMWZ128rm(b?)")>;
+ "VPMOVZXWQZ128rm(b?)")>;
+
+def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
+ "VCMP(SD|SS)Zrm",
+ "VFPCLASSPDZ128rm(b?)",
+ "VFPCLASSPSZ128rm(b?)",
+ "VPCMPBZ128rmi(b?)",
+ "VPCMPDZ128rmi(b?)",
+ "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
+ "VPCMPGT(B|D|Q|W)Z128rm(b?)",
+ "VPCMPQZ128rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z128rmi(b?)",
+ "VPCMPWZ128rmi(b?)",
+ "VPTESTMBZ128rm(b?)",
+ "VPTESTMDZ128rm(b?)",
+ "VPTESTMQZ128rm(b?)",
+ "VPTESTMWZ128rm(b?)",
+ "VPTESTNMBZ128rm(b?)",
+ "VPTESTNMDZ128rm(b?)",
+ "VPTESTNMQZ128rm(b?)",
+ "VPTESTNMWZ128rm(b?)")>;
def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 9;
@@ -1745,30 +1764,38 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VALIGND(Z|Z256)rm(b?)i",
"VALIGNQ(Z|Z256)rm(b?)i",
- "VCMPPD(Z|Z256)rm(b?)i",
- "VCMPPS(Z|Z256)rm(b?)i",
- "VPCMPB(Z|Z256)rmi(b?)",
- "VPCMPD(Z|Z256)rmi(b?)",
- "VPCMPEQB(Z|Z256)rm(b?)",
- "VPCMPEQD(Z|Z256)rm(b?)",
- "VPCMPEQQ(Z|Z256)rm(b?)",
- "VPCMPEQW(Z|Z256)rm(b?)",
- "VPCMPGTB(Z|Z256)rm(b?)",
- "VPCMPGTD(Z|Z256)rm(b?)",
- "VPCMPGTQ(Z|Z256)rm(b?)",
- "VPCMPGTW(Z|Z256)rm(b?)",
- "VPCMPQ(Z|Z256)rmi(b?)",
- "VPCMPU(B|D|Q|W)Z256rmi(b?)",
- "VPCMPU(B|D|Q|W)Zrmi(b?)",
- "VPCMPW(Z|Z256)rmi(b?)",
"VPMAXSQ(Z|Z256)rm(b?)",
"VPMAXUQ(Z|Z256)rm(b?)",
"VPMINSQ(Z|Z256)rm(b?)",
- "VPMINUQ(Z|Z256)rm(b?)",
- "VPTESTM(B|D|Q|W)Z256rm(b?)",
- "VPTESTM(B|D|Q|W)Zrm(b?)",
- "VPTESTNM(B|D|Q|W)Z256rm(b?)",
- "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
+ "VPMINUQ(Z|Z256)rm(b?)")>;
+
+def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
+ "VCMPPS(Z|Z256)rm(b?)i",
+ "VFPCLASSPD(Z|Z256)rm(b?)",
+ "VFPCLASSPS(Z|Z256)rm(b?)",
+ "VPCMPB(Z|Z256)rmi(b?)",
+ "VPCMPD(Z|Z256)rmi(b?)",
+ "VPCMPEQB(Z|Z256)rm(b?)",
+ "VPCMPEQD(Z|Z256)rm(b?)",
+ "VPCMPEQQ(Z|Z256)rm(b?)",
+ "VPCMPEQW(Z|Z256)rm(b?)",
+ "VPCMPGTB(Z|Z256)rm(b?)",
+ "VPCMPGTD(Z|Z256)rm(b?)",
+ "VPCMPGTQ(Z|Z256)rm(b?)",
+ "VPCMPGTW(Z|Z256)rm(b?)",
+ "VPCMPQ(Z|Z256)rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z256rmi(b?)",
+ "VPCMPU(B|D|Q|W)Zrmi(b?)",
+ "VPCMPW(Z|Z256)rmi(b?)",
+ "VPTESTM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTM(B|D|Q|W)Zrm(b?)",
+ "VPTESTNM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 10;
@@ -1938,14 +1965,14 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
- let Latency = 12;
+ let Latency = 15;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
- let Latency = 12;
+ let Latency = 15;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
@@ -2106,8 +2133,8 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX
}
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
-def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 18;
+def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> {
+ let Latency = 21;
let NumMicroOps = 4;
let ResourceCycles = [1,3];
}
@@ -2134,21 +2161,19 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
}
def : SchedAlias<WriteFDiv64Ld, SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
-def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 19;
+def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> {
+ let Latency = 22;
let NumMicroOps = 4;
let ResourceCycles = [1,3];
}
-def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)",
- "VPMULLQZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
-def SKXWriteResGroup214 : SchedWriteRes<[]> {
- let Latency = 20;
- let NumMicroOps = 0;
+def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> {
+ let Latency = 22;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
}
-def: InstRW<[SKXWriteResGroup214], (instrs VGATHERDPSZ128rm,
- VGATHERQPSZrm,
- VPGATHERDDZ128rm)>;
+def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
let Latency = 20;
@@ -2164,15 +2189,41 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
}
def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
-def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 20;
- let NumMicroOps = 5;
+def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKXWriteResGroup218], (instrs VGATHERQPSZ128rm,
- VGATHERQPSZ256rm,
- VPGATHERQDZ128rm,
- VPGATHERQDZ256rm)>;
+def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
+ VGATHERDPDZ128rm, VPGATHERDQZ128rm,
+ VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
+
+def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
+ VGATHERQPDZ256rm, VPGATHERQQZ256rm,
+ VGATHERDPSZ128rm, VPGATHERDDZ128rm,
+ VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
+
+def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 21;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
+ VGATHERDPDZrm, VPGATHERDQZrm,
+ VGATHERQPDZrm, VPGATHERQQZrm,
+ VGATHERQPSZrm, VPGATHERQDZrm)>;
+
+def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,16,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 20;
@@ -2202,57 +2253,31 @@ def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
-def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 22;
- let NumMicroOps = 5;
+def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKXWriteResGroup224], (instrs VGATHERDPDZ128rm,
- VGATHERQPDZ128rm,
- VPGATHERDQZ128rm,
- VPGATHERQQZ128rm)>;
+def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
-def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
- let Latency = 22;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
}
-def: InstRW<[SKXWriteResGroup224_2], (instrs VGATHERDPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm,
- VGATHERQPSrm,
- VPGATHERDDrm,
- VPGATHERDQrm,
- VPGATHERQDrm,
- VPGATHERQQrm,
- VPGATHERDDrm,
- VPGATHERQDrm,
- VPGATHERDQrm,
- VPGATHERQQrm,
- VGATHERDPSrm,
- VGATHERQPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm)>;
-
-def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
}
-def: InstRW<[SKXWriteResGroup224_3], (instrs VGATHERDPSYrm,
- VGATHERQPDYrm,
- VGATHERQPSYrm,
- VPGATHERDDYrm,
- VPGATHERDQYrm,
- VPGATHERQDYrm,
- VPGATHERQQYrm,
- VPGATHERDDYrm,
- VPGATHERQDYrm,
- VPGATHERDQYrm,
- VPGATHERQQYrm,
- VGATHERDPSYrm,
- VGATHERQPSYrm,
- VGATHERDPDYrm)>;
+def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 22;
@@ -2276,27 +2301,6 @@ def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
-def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm,
- VGATHERQPDZ256rm,
- VPGATHERDQZ256rm,
- VPGATHERQDZrm,
- VPGATHERQQZ256rm)>;
-
-def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 26;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup238], (instrs VGATHERDPDZrm,
- VGATHERQPDZrm,
- VPGATHERDQZrm,
- VPGATHERQQZrm)>;
-
def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 27;
let NumMicroOps = 2;
@@ -2304,14 +2308,6 @@ def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
-def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 27;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
- VPGATHERDDZ256rm)>;
-
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
@@ -2326,14 +2322,6 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
-def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 30;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm,
- VPGATHERDDZrm)>;
-
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
@@ -2461,7 +2449,7 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
def: InstRW<[WriteZero], (instrs CLC)>;
-// Intruction variants handled by the renamer. These might not need execution
+// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Skylake Pipeline" > "Register allocation and renaming".