src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-04-16 16:01:22 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-04-16 16:01:22 +0000
commit	71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree	5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
parent	31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
download	src-71d5a2540a98c81f5bcaeb48805e0e2881f530ef.tar.gz src-71d5a2540a98c81f5bcaeb48805e0e2881f530ef.zip

Vendor import of llvm trunk r300422:vendor/llvm/llvm-trunk-r300422

https://llvm.org/svn/llvm-project/llvm/trunk@300422

Notes

Notes: svn path=/vendor/llvm/dist/; revision=317017 svn path=/vendor/llvm/llvm-trunk-r300422/; revision=317018; tag=vendor/llvm/llvm-trunk-r300422

Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')

-rw-r--r--

lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

212

1 files changed, 114 insertions, 98 deletions

diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 974e79fff3d7..0446655830d1 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

@@ -17,11 +17,11 @@

#include "AMDGPUAsmPrinter.h"

+#include "AMDGPUTargetMachine.h"

#include "MCTargetDesc/AMDGPUTargetStreamer.h"

#include "InstPrinter/AMDGPUInstPrinter.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "AMDGPU.h"

-#include "AMDKernelCodeT.h"

#include "AMDGPUSubtarget.h"

#include "R600Defines.h"

#include "R600MachineFunctionInfo.h"

@@ -93,33 +93,40 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {

AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,

std::unique_ptr<MCStreamer> Streamer)

- : AsmPrinter(TM, std::move(Streamer)) {}

+ : AsmPrinter(TM, std::move(Streamer)) {

+ AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();

+ }

StringRef AMDGPUAsmPrinter::getPassName() const {

return "AMDGPU Assembly Printer";

}

+const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {

+ return TM.getMCSubtargetInfo();

+AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const {

+ return static_cast<AMDGPUTargetStreamer&>(*OutStreamer->getTargetStreamer());

void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {

if (TM.getTargetTriple().getOS() != Triple::AMDHSA)

return;

- // Need to construct an MCSubtargetInfo here in case we have no functions

- // in the module.

- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(

- TM.getTargetTriple().str(), TM.getTargetCPU(),

- TM.getTargetFeatureString()));

- AMDGPUTargetStreamer *TS =

- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());

+ AMDGPU::IsaInfo::IsaVersion ISA =

+ AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());

- TS->EmitDirectiveHSACodeObjectVersion(2, 1);

+ getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1);

+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(

+ ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");

+ getTargetStreamer().EmitStartOfCodeObjectMetadata(M);

- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());

- TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,

- "AMD", "AMDGPU");

+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {

+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)

+ return;

- // Emit runtime metadata.

- TS->EmitRuntimeMetadata(M);

+ getTargetStreamer().EmitEndOfCodeObjectMetadata();

}

bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(

@@ -136,25 +143,32 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(

return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);

}

void AMDGPUAsmPrinter::EmitFunctionBodyStart() {

const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();

SIProgramInfo KernelInfo;

+ amd_kernel_code_t KernelCode;

if (STM.isAmdCodeObjectV2(*MF)) {

getSIProgramInfo(KernelInfo, *MF);

- EmitAmdKernelCodeT(*MF, KernelInfo);

+ getAmdKernelCode(KernelCode, KernelInfo, *MF);

+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());

+ getTargetStreamer().EmitAMDKernelCodeT(KernelCode);

}

+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)

+ return;

+ getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(),

+ KernelCode);

}

void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {

const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();

- if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {

- AMDGPUTargetStreamer *TS =

- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());

+ if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) {

SmallString<128> SymbolName;

getNameWithPrefix(SymbolName, MF->getFunction()),

- TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);

+ getTargetStreamer().EmitAMDGPUSymbolType(

+ SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);

}

AsmPrinter::EmitFunctionEntryLabel();

@@ -163,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {

void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {

// Group segment variables aren't emitted in HSA.

- if (AMDGPU::isGroupSegment(GV))

+ if (AMDGPU::isGroupSegment(GV, AMDGPUASI))

return;

AsmPrinter::EmitGlobalVariable(GV);

@@ -247,6 +261,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {

OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +

Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),

false);

+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +

+ Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),

+ false);

OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +

Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),

false);

@@ -382,6 +399,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

case AMDGPU::EXEC_HI:

case AMDGPU::SCC:

case AMDGPU::M0:

+ case AMDGPU::SRC_SHARED_BASE:

+ case AMDGPU::SRC_SHARED_LIMIT:

+ case AMDGPU::SRC_PRIVATE_BASE:

+ case AMDGPU::SRC_PRIVATE_LIMIT:

continue;

case AMDGPU::VCC:

@@ -478,33 +499,20 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

ExtraSGPRs = 6;

}

- // Record first reserved register and reserved register count fields, and

- // update max register counts if "amdgpu-debugger-reserve-regs" attribute was

- // requested.

- ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;

- ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);

- // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and

- // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"

- // attribute was requested.

- if (STM.debuggerEmitPrologue()) {

- ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =

- RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());

- ProgInfo.DebuggerPrivateSegmentBufferSGPR =

- RI->getHWRegIndex(MFI->getScratchRSrcReg());

- }

+ unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);

// Check the addressable register limit before we add ExtraSGPRs.

if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

!STM.hasSGPRInitBug()) {

- unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();

+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {

// This can happen due to a compiler bug or when using inline asm.

LLVMContext &Ctx = MF.getFunction()->getContext();

DiagnosticInfoResourceLimit Diag(*MF.getFunction(),

"addressable scalar registers",

MaxSGPR + 1, DS_Error,

- DK_ResourceLimit, MaxAddressableNumSGPRs);

+ DK_ResourceLimit,

+ MaxAddressableNumSGPRs);

Ctx.diagnose(Diag);

MaxSGPR = MaxAddressableNumSGPRs - 1;

}

@@ -512,41 +520,43 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

// Account for extra SGPRs and VGPRs reserved for debugger use.

MaxSGPR += ExtraSGPRs;

- MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);

+ MaxVGPR += ExtraVGPRs;

// We found the maximum register index. They start at 0, so add one to get the

// number of registers.

- ProgInfo.NumVGPR = MaxVGPR + 1;

ProgInfo.NumSGPR = MaxSGPR + 1;

+ ProgInfo.NumVGPR = MaxVGPR + 1;

// Adjust number of registers used to meet default/requested minimum/maximum

// number of waves per execution unit request.

ProgInfo.NumSGPRsForWavesPerEU = std::max(

- ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));

+ ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));

ProgInfo.NumVGPRsForWavesPerEU = std::max(

- ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));

+ ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));

if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||

STM.hasSGPRInitBug()) {

- unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();

- if (ProgInfo.NumSGPR > MaxNumSGPRs) {

- // This can happen due to a compiler bug or when using inline asm to use the

- // registers which are usually reserved for vcc etc.

+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

+ if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {

+ // This can happen due to a compiler bug or when using inline asm to use

+ // the registers which are usually reserved for vcc etc.

LLVMContext &Ctx = MF.getFunction()->getContext();

DiagnosticInfoResourceLimit Diag(*MF.getFunction(),

"scalar registers",

ProgInfo.NumSGPR, DS_Error,

- DK_ResourceLimit, MaxNumSGPRs);

+ DK_ResourceLimit,

+ MaxAddressableNumSGPRs);

Ctx.diagnose(Diag);

- ProgInfo.NumSGPR = MaxNumSGPRs;

- ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;

+ ProgInfo.NumSGPR = MaxAddressableNumSGPRs;

+ ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;

}

if (STM.hasSGPRInitBug()) {

- ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;

- ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;

+ ProgInfo.NumSGPR =

+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;

+ ProgInfo.NumSGPRsForWavesPerEU =

+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;

}

if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {

@@ -565,13 +575,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

// SGPRBlocks is actual number of SGPR blocks minus 1.

ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,

- RI->getSGPRAllocGranule());

- ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;

+ STM.getSGPREncodingGranule());

+ ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1;

// VGPRBlocks is actual number of VGPR blocks minus 1.

ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,

- RI->getVGPRAllocGranule());

- ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;

+ STM.getVGPREncodingGranule());

+ ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;

+ // Record first reserved VGPR and number of reserved VGPRs.

+ ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;

+ ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);

+ // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and

+ // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"

+ // attribute was requested.

+ if (STM.debuggerEmitPrologue()) {

+ ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =

+ RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());

+ ProgInfo.DebuggerPrivateSegmentBufferSGPR =

+ RI->getHWRegIndex(MFI->getScratchRSrcReg());

+ }

// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode

// register.

@@ -580,7 +604,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

ProgInfo.IEEEMode = STM.enableIEEEBit(MF);

// Make clamp modifier on NaN input returns 0.

- ProgInfo.DX10Clamp = 1;

+ ProgInfo.DX10Clamp = STM.enableDX10Clamp();

const MachineFrameInfo &FrameInfo = MF.getFrameInfo();

ProgInfo.ScratchSize = FrameInfo.getStackSize();

@@ -635,6 +659,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

ProgInfo.ComputePGMRSrc2 =

S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |

S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |

+ S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |

S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |

S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |

S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |

@@ -688,7 +713,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,

OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);

OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);

OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);

- OutStreamer->EmitIntValue(MFI->PSInputEna, 4);

+ OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);

OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);

OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);

}

@@ -713,97 +738,88 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {

}

-void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,

- const SIProgramInfo &KernelInfo) const {

+void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,

+ const SIProgramInfo &KernelInfo,

+ const MachineFunction &MF) const {

const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

const SISubtarget &STM = MF.getSubtarget<SISubtarget>();

- amd_kernel_code_t header;

- AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());

+ AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());

- header.compute_pgm_resource_registers =

+ Out.compute_pgm_resource_registers =

KernelInfo.ComputePGMRSrc1 |

(KernelInfo.ComputePGMRSrc2 << 32);

- header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;

+ Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;

- AMD_HSA_BITS_SET(header.code_properties,

+ AMD_HSA_BITS_SET(Out.code_properties,

AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,

getElementByteSizeValue(STM.getMaxPrivateElementSize()));

if (MFI->hasPrivateSegmentBuffer()) {

- header.code_properties |=

+ Out.code_properties |=

AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;

}

if (MFI->hasDispatchPtr())

- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

if (MFI->hasQueuePtr())

- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;

+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;

if (MFI->hasKernargSegmentPtr())

- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;

+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;

if (MFI->hasDispatchID())

- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;

+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;

if (MFI->hasFlatScratchInit())

- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;

- // TODO: Private segment size

+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;

if (MFI->hasGridWorkgroupCountX()) {

- header.code_properties |=

+ Out.code_properties |=

AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;

}

if (MFI->hasGridWorkgroupCountY()) {

- header.code_properties |=

+ Out.code_properties |=

AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;

}

if (MFI->hasGridWorkgroupCountZ()) {

- header.code_properties |=

+ Out.code_properties |=

AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;

}

if (MFI->hasDispatchPtr())

- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

if (STM.debuggerSupported())

- header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;

+ Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;

if (STM.isXNACKEnabled())

- header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;

+ Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;

// FIXME: Should use getKernArgSize

- header.kernarg_segment_byte_size =

+ Out.kernarg_segment_byte_size =

STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());

- header.wavefront_sgpr_count = KernelInfo.NumSGPR;

- header.workitem_vgpr_count = KernelInfo.NumVGPR;

- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;

- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;

- header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;

- header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;

+ Out.wavefront_sgpr_count = KernelInfo.NumSGPR;

+ Out.workitem_vgpr_count = KernelInfo.NumVGPR;

+ Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize;

+ Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize;

+ Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;

+ Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;

// These alignment values are specified in powers of two, so alignment =

// 2^n. The minimum alignment is 2^4 = 16.

- header.kernarg_segment_alignment = std::max((size_t)4,

+ Out.kernarg_segment_alignment = std::max((size_t)4,

countTrailingZeros(MFI->getMaxKernArgAlign()));

if (STM.debuggerEmitPrologue()) {

- header.debug_wavefront_private_segment_offset_sgpr =

+ Out.debug_wavefront_private_segment_offset_sgpr =

KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;

- header.debug_private_segment_buffer_sgpr =

+ Out.debug_private_segment_buffer_sgpr =

KernelInfo.DebuggerPrivateSegmentBufferSGPR;

}

- AMDGPUTargetStreamer *TS =

- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());

- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());

- TS->EmitAMDKernelCodeT(header);

}

bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,