aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp120
1 files changed, 77 insertions, 43 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 098b0e993886..09e2c762abdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -11,35 +11,65 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
-struct FMinFMaxLegacyInfo {
- Register LHS;
- Register RHS;
- Register True;
- Register False;
- CmpInst::Predicate Pred;
+class AMDGPUPostLegalizerCombinerHelper {
+protected:
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ CombinerHelper &Helper;
+
+public:
+ AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
+
+ struct FMinFMaxLegacyInfo {
+ Register LHS;
+ Register RHS;
+ Register True;
+ Register False;
+ CmpInst::Predicate Pred;
+ };
+
+ // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
+ bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
+ void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
+ const FMinFMaxLegacyInfo &Info);
+
+ bool matchUCharToFloat(MachineInstr &MI);
+ void applyUCharToFloat(MachineInstr &MI);
+
+ // FIXME: Should be able to have 2 separate matchdatas rather than custom
+ // struct boilerplate.
+ struct CvtF32UByteMatchInfo {
+ Register CvtVal;
+ unsigned ShiftOffset;
+ };
+
+ bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
+ void applyCvtF32UByteN(MachineInstr &MI,
+ const CvtF32UByteMatchInfo &MatchInfo);
};
-// TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
-static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
+bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
+ MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
// FIXME: Combines should have subtarget predicates, and we shouldn't need
// this here.
if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
@@ -77,12 +107,11 @@ static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
}
}
-static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
- const FMinFMaxLegacyInfo &Info) {
-
- auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
- MachineIRBuilder MIB(MI);
- MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
+void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
+ MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
+ B.setInstrAndDebugLoc(MI);
+ auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
+ B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
};
switch (Info.Pred) {
@@ -127,8 +156,7 @@ static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
MI.eraseFromParent();
}
-static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF, CombinerHelper &Helper) {
+bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
// TODO: We could try to match extracting the higher bytes, which would be
@@ -147,15 +175,15 @@ static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
-static void applyUCharToFloat(MachineInstr &MI) {
- MachineIRBuilder B(MI);
+void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
+ B.setInstrAndDebugLoc(MI);
const LLT S32 = LLT::scalar(32);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- LLT Ty = B.getMRI()->getType(DstReg);
- LLT SrcTy = B.getMRI()->getType(SrcReg);
+ LLT Ty = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy != S32)
SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
@@ -171,16 +199,8 @@ static void applyUCharToFloat(MachineInstr &MI) {
MI.eraseFromParent();
}
-// FIXME: Should be able to have 2 separate matchdatas rather than custom struct
-// boilerplate.
-struct CvtF32UByteMatchInfo {
- Register CvtVal;
- unsigned ShiftOffset;
-};
-
-static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF,
- CvtF32UByteMatchInfo &MatchInfo) {
+bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
+ MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
Register SrcReg = MI.getOperand(1).getReg();
// Look through G_ZEXT.
@@ -207,14 +227,14 @@ static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
-static void applyCvtF32UByteN(MachineInstr &MI,
- const CvtF32UByteMatchInfo &MatchInfo) {
- MachineIRBuilder B(MI);
+void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
+ MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
const LLT S32 = LLT::scalar(32);
Register CvtSrc = MatchInfo.CvtVal;
- LLT SrcTy = B.getMRI()->getType(MatchInfo.CvtVal);
+ LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
if (SrcTy != S32) {
assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
@@ -225,6 +245,18 @@ static void applyCvtF32UByteN(MachineInstr &MI,
MI.eraseFromParent();
}
+class AMDGPUPostLegalizerCombinerHelperState {
+protected:
+ CombinerHelper &Helper;
+ AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
+
+public:
+ AMDGPUPostLegalizerCombinerHelperState(
+ CombinerHelper &Helper,
+ AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
+ : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
+};
+
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
@@ -234,7 +266,7 @@ namespace {
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo {
+class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
@@ -258,10 +290,12 @@ public:
bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB, MDT);
- AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
+ CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
+ AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
+ AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
+ PostLegalizerHelper);
- if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ if (Generated.tryCombineAll(Observer, MI, B))
return true;
switch (MI.getOpcode()) {