Skip to content

Commit

Permalink
Move state processing into separate class
Browse files Browse the repository at this point in the history
Move state processing into separate class
  • Loading branch information
paigeale authored and igcbot committed Dec 11, 2024
1 parent b050853 commit fd3672e
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 200 deletions.
2 changes: 2 additions & 0 deletions IGC/Compiler/CISACodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ set(IGC_BUILD__SRC__CISACodeGen_Common
"${CMAKE_CURRENT_SOURCE_DIR}/FoldKnownWorkGroupSizes.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenCodeGenModule.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenerateBlockMemOpsPass.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenericShaderState.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenIRLowering.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenSimplification.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/getCacheOpts.cpp"
Expand Down Expand Up @@ -150,6 +151,7 @@ set(IGC_BUILD__HDR__CISACodeGen_Common
"${CMAKE_CURRENT_SOURCE_DIR}/FoldKnownWorkGroupSizes.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenCodeGenModule.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenerateBlockMemOpsPass.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenericShaderState.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/GenIRLowering.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenSimplification.h"
"${CMAKE_CURRENT_SOURCE_DIR}/getCacheOpts.h"
Expand Down
137 changes: 11 additions & 126 deletions IGC/Compiler/CISACodeGen/CShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ using namespace llvm;
using namespace IGC;
using namespace IGC::IGCMD;

CShader::CShader(Function* pFunc, CShaderProgram* pProgram)
: entry(pFunc)
CShader::CShader(Function *pFunc, CShaderProgram *pProgram, GenericShaderState &GState)
: m_State(GState)
, entry(pFunc)
, m_parent(pProgram)
, encoder()
, m_BarrierNumber(0)
Expand All @@ -48,21 +49,6 @@ CShader::CShader(Function* pFunc, CShaderProgram* pProgram)
m_HW_TID = nullptr;

m_shaderStats = nullptr;
m_constantBufferMask = 0;
m_constantBufferLoaded = 0;
m_ConstantBufferLength = 0;
m_uavLoaded = 0;
for (int i = 0; i < 4; i++)
{
m_shaderResourceLoaded[i] = 0;
}
m_renderTargetLoaded = 0;
isInputsPulled = false;
m_cbSlot = -1;
m_statelessCBPushedSize = 0;
isMessageTargetDataCacheDataPort = false;
m_BindingTableEntryCount = 0;
m_BindingTableUsedEntriesBitmap = 0;
// [OCL] preAnalysis()/ParseShaderSpecificOpcode() must
// set this to ture if there is any stateless access.
m_HasGlobalStatelessMemoryAccess = false;
Expand Down Expand Up @@ -391,7 +377,6 @@ void CShader::CreateImplicitArgs()
if (IGC::isIntelSymbolTableVoidProgram(entry))
return;

m_numBlocks = entry->size();
m_R0 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, 1, "R0");
encoder.GetVISAPredefinedVar(m_R0, PREDEFINED_R0);

Expand Down Expand Up @@ -682,15 +667,15 @@ void CShader::AllocateConstants3DShader(uint& offset)

void CShader::AllocateConstants(uint& offset)
{
m_ConstantBufferLength = 0;
m_State.m_ConstantBufferLength = 0;
for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++) {
CVariable* var = GetSymbol(m_argListCache[I->second]);
AllocateInput(var, offset + m_ConstantBufferLength, 0, encoder.IsCodePatchCandidate());
m_ConstantBufferLength += var->GetSize();
AllocateInput(var, offset + m_State.m_ConstantBufferLength, 0, encoder.IsCodePatchCandidate());
m_State.m_ConstantBufferLength += var->GetSize();
}

m_ConstantBufferLength = iSTD::Align(m_ConstantBufferLength, getMinPushConstantBufferAlignmentInBytes());
offset += m_ConstantBufferLength;
m_State.m_ConstantBufferLength = iSTD::Align(m_State.m_ConstantBufferLength, getMinPushConstantBufferAlignmentInBytes());
offset += m_State.m_ConstantBufferLength;
}

void CShader::AllocateSimplePushConstants(uint& offset)
Expand All @@ -717,87 +702,8 @@ void CShader::AllocateNOSConstants(uint& offset)
maxConstantPushed = std::max(maxConstantPushed, I->first + numConstantsPushed);
}
maxConstantPushed = iSTD::Max(maxConstantPushed, static_cast<uint>(m_ModuleMetadata->MinNOSPushConstantSize));
m_NOSBufferSize = iSTD::Align(maxConstantPushed * SIZE_DWORD, getMinPushConstantBufferAlignmentInBytes());
offset += m_NOSBufferSize;
}


void CShader::CreateGatherMap()
{
int index = -1;
gatherMap.reserve(pushInfo.constants.size());
for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++)
{
unsigned int address = (I->first.bufId * 256 * 4) + (I->first.eltId);
unsigned int cstOffset = address / 4;
unsigned int cstChannel = address % 4;
if (cstOffset != index)
{
USC::SConstantGatherEntry entry;
entry.GatherEntry.Fields.constantBufferOffset = cstOffset % 256;
entry.GatherEntry.Fields.channelMask = BIT(cstChannel);
// with 3DSTATE_DX9_CONSTANT if buffer is more than 4Kb,
// the constant after 255 can be accessed in constant buffer 1
int CBIndex = cstOffset / 256;
entry.GatherEntry.Fields.constantBufferIndex = CBIndex;
m_constantBufferMask |= BIT(CBIndex);
gatherMap.push_back(entry);
index = cstOffset;
}
else
{
gatherMap[gatherMap.size() - 1].GatherEntry.Fields.channelMask |= BIT(cstChannel);
}
}

// The size of the gather map must be even
if (gatherMap.size() % 2 != 0)
{
USC::SConstantGatherEntry entry;
entry.GatherEntry.Value = 0;
gatherMap.push_back(entry);
}
}

void CShader::CreateConstantBufferOutput(SKernelProgram* pKernelProgram)
{
pKernelProgram->ConstantBufferMask = m_constantBufferMask;
pKernelProgram->gatherMapSize = gatherMap.size();
if (pKernelProgram->gatherMapSize > 0)
{
pKernelProgram->gatherMap = new char[pKernelProgram->gatherMapSize * sizeof(USC::SConstantGatherEntry)];
memcpy_s(pKernelProgram->gatherMap, pKernelProgram->gatherMapSize *
sizeof(USC::SConstantGatherEntry),
&gatherMap[0],
gatherMap.size() * sizeof(USC::SConstantGatherEntry));
pKernelProgram->ConstantBufferLength = m_ConstantBufferLength / getMinPushConstantBufferAlignmentInBytes();
}

if (m_cbSlot != -1)
{
pKernelProgram->bufferSlot = m_cbSlot;
pKernelProgram->statelessCBPushedSize = m_statelessCBPushedSize;
}

// for simple push
for (unsigned int i = 0; i < pushInfo.simplePushBufferUsed; i++)
{
pKernelProgram->simplePushInfoArr[i].m_cbIdx = pushInfo.simplePushInfoArr[i].cbIdx;
pKernelProgram->simplePushInfoArr[i].m_pushableAddressGrfOffset= pushInfo.simplePushInfoArr[i].pushableAddressGrfOffset;
pKernelProgram->simplePushInfoArr[i].m_pushableOffsetGrfOffset = pushInfo.simplePushInfoArr[i].pushableOffsetGrfOffset;
pKernelProgram->simplePushInfoArr[i].m_offset = pushInfo.simplePushInfoArr[i].offset;
pKernelProgram->simplePushInfoArr[i].m_size = pushInfo.simplePushInfoArr[i].size;
pKernelProgram->simplePushInfoArr[i].isStateless = pushInfo.simplePushInfoArr[i].isStateless;
pKernelProgram->simplePushInfoArr[i].isBindless = pushInfo.simplePushInfoArr[i].isBindless;
}

if (GetContext()->m_ConstantBufferReplaceShaderPatterns)
{
pKernelProgram->m_ConstantBufferReplaceShaderPatterns = GetContext()->m_ConstantBufferReplaceShaderPatterns;
pKernelProgram->m_ConstantBufferReplaceShaderPatternsSize = GetContext()->m_ConstantBufferReplaceShaderPatternsSize;
pKernelProgram->m_ConstantBufferUsageMask = GetContext()->m_ConstantBufferUsageMask;
pKernelProgram->m_ConstantBufferReplaceSize = GetContext()->m_ConstantBufferReplaceSize;
}
m_State.m_NOSBufferSize = iSTD::Align(maxConstantPushed * SIZE_DWORD, getMinPushConstantBufferAlignmentInBytes());
offset += m_State.m_NOSBufferSize;
}

CVariable* CShader::CreateFunctionSymbol(llvm::Function* pFunc)
Expand Down Expand Up @@ -4105,28 +4011,7 @@ bool CShader::CompileSIMDSizeInCommon(SIMDMode simdMode)

uint32_t CShader::GetShaderThreadUsageRate()
{
uint32_t grfNum = GetContext()->getNumGRFPerThread();
// prevent callee divide by zero
return std::max<uint32_t>(1, grfNum / GRF_TOTAL_NUM);
}

unsigned int CShader::GetSamplerCount(unsigned int samplerCount)
{
if (samplerCount > 0)
{
if (samplerCount <= 4)
return 1; // between 1 and 4 samplers used
else if (samplerCount >= 5 && samplerCount <= 8)
return 2; // between 5 and 8 samplers used
else if (samplerCount >= 9 && samplerCount <= 12)
return 3; // between 9 and 12 samplers used
else if (samplerCount >= 13 && samplerCount <= 16)
return 4; // between 13 and 16 samplers used
else
// Samplers count out of range. Force value 0 to avoid undefined behavior.
return 0;
}
return 0;
return m_State.GetShaderThreadUsageRate();
}

CShaderProgram::CShaderProgram(CodeGenContext* ctx, llvm::Function* kernel)
Expand Down
6 changes: 4 additions & 2 deletions IGC/Compiler/CISACodeGen/ComputeShaderBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ using namespace llvm;

namespace IGC
{
CComputeShaderBase::CComputeShaderBase(llvm::Function* pFunc, CShaderProgram* pProgram)
: CShader(pFunc, pProgram) {}
CComputeShaderBase::CComputeShaderBase(llvm::Function *pFunc,
CShaderProgram *pProgram)
: m_State(*pFunc, *pProgram->GetContext()),
CShader(pFunc, pProgram, m_State) {}

CComputeShaderBase::~CComputeShaderBase() {}

Expand Down
2 changes: 2 additions & 0 deletions IGC/Compiler/CISACodeGen/ComputeShaderBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace IGC
public:
CComputeShaderBase(llvm::Function* pFunc, CShaderProgram* pProgram);
virtual ~CComputeShaderBase();

GenericShaderState m_State;
protected:
// Determines if HW can handle auto generating local IDs with this
// order
Expand Down
24 changes: 12 additions & 12 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10839,7 +10839,7 @@ void EmitPass::emitLoadRawIndexed(

ResourceDescriptor resource = GetResourceVariable(bufPtrv);
LSC_DOC_ADDR_SPACE addrSpace = m_pCtx->getUserAddrSpaceMD().Get(inst);
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
if (shouldGenerateLSC(inst))
{
if ((IGC_GET_FLAG_VALUE(RovOpt) & 2) && useRasterizerOrderedByteAddressBuffer(inst))
Expand Down Expand Up @@ -12314,7 +12314,7 @@ void EmitPass::emitStoreRawIndexed(
Value* pBufPtr = inst->getResourceValue();
Value* pValToStore = inst->getStoreValue();

m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;

if (shouldGenerateLSC(inst))
{
Expand Down Expand Up @@ -15970,7 +15970,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst *pInst, Value *dstAddr,

}
ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::emitAtomicTyped(GenIntrinsicInst* pInsn)
Expand Down Expand Up @@ -16168,7 +16168,7 @@ void EmitPass::emitAtomicTyped(GenIntrinsicInst* pInsn)
}
}
ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void setSIMDSizeMask(CEncoder* m_encoder, const CShader* m_currShader, int i)
Expand Down Expand Up @@ -16324,7 +16324,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
JoinSIMD(tempdst, numChannels, instWidth);
}
}
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::emitTypedWrite(llvm::Instruction* pInsn)
Expand Down Expand Up @@ -16524,7 +16524,7 @@ void EmitPass::emitTypedWrite(llvm::Instruction* pInsn)
ResourceLoopBackEdge(needLoop, flag, label);
}
ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::emitThreadGroupNamedBarriersSignal(llvm::Instruction* inst)
Expand Down Expand Up @@ -16970,7 +16970,7 @@ void EmitPass::emitUniformAtomicCounter(llvm::GenIntrinsicInst* pInsn)
}

ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::emitAtomicCounter(llvm::GenIntrinsicInst* pInsn)
Expand Down Expand Up @@ -17076,7 +17076,7 @@ void EmitPass::emitAtomicCounter(llvm::GenIntrinsicInst* pInsn)

ResourceLoopBackEdge(needLoop, flag, label);
ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::CmpBoolOp(Pattern* cmpPattern,
Expand Down Expand Up @@ -19952,7 +19952,7 @@ void EmitPass::emitLSCTypedRead(llvm::Instruction* pInsn)
JoinSIMD(tempdst, numChannels, instWidth);
}
}
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::emitLSCTypedWrite(llvm::Instruction* pInsn)
Expand Down Expand Up @@ -20102,7 +20102,7 @@ void EmitPass::emitLSCTypedWrite(llvm::Instruction* pInsn)
ResourceLoopBackEdge(needLoop, flag, label);

ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}


Expand Down Expand Up @@ -20293,7 +20293,7 @@ void EmitPass::emitLSCAtomicTyped(llvm::GenIntrinsicInst* inst)
}
}
ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

void EmitPass::emitLscUniformAtomicCounter(llvm::GenIntrinsicInst* pInst)
Expand Down Expand Up @@ -20367,7 +20367,7 @@ void EmitPass::emitLscUniformAtomicCounter(llvm::GenIntrinsicInst* pInst)
}

ResetVMask();
m_currShader->isMessageTargetDataCacheDataPort = true;
m_currShader->m_State.isMessageTargetDataCacheDataPort = true;
}

// DstSubRegOffset and SrcSubRegOffset are in unit of element size.
Expand Down
Loading

0 comments on commit fd3672e

Please sign in to comment.