aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp71
1 files changed, 62 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2bb59086f391..c1c88d9a7462 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -62,7 +62,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
public:
AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
- : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
+ : Kind(Kind_), AsmParser(AsmParser_) {}
using Ptr = std::unique_ptr<AMDGPUOperand>;
@@ -1548,6 +1548,7 @@ private:
bool validateVccOperand(unsigned Reg) const;
bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
+ bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
@@ -3613,6 +3614,40 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
+ const OperandVector &Operands) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
+ return true;
+
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx == -1)
+ return true;
+
+ const MCOperand &Src2 = Inst.getOperand(Src2Idx);
+ if (!Src2.isReg())
+ return true;
+
+ MCRegister Src2Reg = Src2.getReg();
+ MCRegister DstReg = Inst.getOperand(0).getReg();
+ if (Src2Reg == DstReg)
+ return true;
+
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
+ return true;
+
+ if (isRegIntersect(Src2Reg, DstReg, TRI)) {
+ Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
+ "source 2 operand must not partially overlap with dst");
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
switch (Inst.getOpcode()) {
default:
@@ -4297,6 +4332,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMAIAccWrite(Inst, Operands)) {
return false;
}
+ if (!validateMFMA(Inst, Operands)) {
+ return false;
+ }
if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
return false;
}
@@ -4568,7 +4606,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
uint64_t AccumOffset = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
- unsigned UserSGPRCount = 0;
+
+ // Count the number of user SGPRs implied from the enabled feature bits.
+ unsigned ImpliedUserSGPRCount = 0;
+
+ // Track if the asm explicitly contains the directive for the user SGPR
+ // count.
+ Optional<unsigned> ExplicitUserSGPRCount;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
Optional<bool> EnableWavefrontSize32;
@@ -4617,6 +4661,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.kernarg_size = Val;
+ } else if (ID == ".amdhsa_user_sgpr_count") {
+ ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -4626,31 +4672,31 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
if (Val)
- UserSGPRCount += 4;
+ ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -4660,13 +4706,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
if (Val)
- UserSGPRCount += 1;
+ ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
@@ -4850,6 +4896,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
SGPRBlocks);
+ if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
+ return TokError("amdgpu_user_sgpr_count smaller than than implied by "
+ "enabled user SGPRs");
+
+ unsigned UserSGPRCount =
+ ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
+
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,