diff --git a/IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h b/IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h index c59c137d7a61..7ecdcbc4d31d 100644 --- a/IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h +++ b/IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h @@ -159,6 +159,36 @@ class GenXOCLRuntimeInfo : public ModulePass { } }; + struct FunctionInfo { + public: + FunctionInfo(StringRef Name, const GenXSubtarget &ST); + FunctionInfo(const FunctionGroup &FG, GenXOCLRuntimeInfo &RI, + const GenXSubtarget &ST, const GenXBackendConfig &BC); + + std::string Name; + + bool DisableEUFusion = false; + bool SupportsDebugging = false; + bool UsesDPAS = false; + bool UsesGroupId = false; + bool UsesReadWriteImages = false; + bool UsesSample = false; + + unsigned GRFSizeInBytes; + unsigned NumBarriers = 0; + unsigned SLMSize = 0; + unsigned StatelessPrivateMemSize = 0; + unsigned ThreadPrivateMemSize = 0; + + private: + void initInstructionLevelProperties(const FunctionGroup &FG, + GenXOCLRuntimeInfo &RI, + const GenXSubtarget &ST); + + void initInstructionLevelProperties(Function *Func, GenXOCLRuntimeInfo &RI, + const GenXSubtarget &ST); + }; + // Additional kernel info that are not provided by finalizer // but still required for runtime. struct KernelInfo { @@ -174,20 +204,7 @@ class GenXOCLRuntimeInfo : public ModulePass { std::vector VISAAsm; private: - std::string Name; - - bool UsesGroupId = false; - bool UsesDPAS = false; - int NumBarriers = 0; - bool UsesSample = false; - bool UsesReadWriteImages = false; - bool SupportsDebugging = false; - unsigned SLMSize = 0; - unsigned ThreadPrivateMemSize = 0; - unsigned StatelessPrivateMemSize = 0; - bool DisableEUFusion = false; - - unsigned GRFSizeInBytes; + FunctionInfo FuncInfo; using ArgInfoStorageTy = std::vector; using PrintStringStorageTy = std::vector; @@ -195,10 +212,6 @@ class GenXOCLRuntimeInfo : public ModulePass { PrintStringStorageTy PrintStrings; private: - void setInstructionUsageProperties(const FunctionGroup &FG, - GenXOCLRuntimeInfo &RI, - const GenXBackendConfig &BC); - void setMetadataProperties(vc::KernelMetadata &KM, const GenXSubtarget &ST); void setArgumentProperties(const Function &Kernel, const vc::KernelMetadata &KM, const GenXSubtarget &ST, @@ -217,7 +230,7 @@ class GenXOCLRuntimeInfo : public ModulePass { KernelInfo(const FunctionGroup &FG, GenXOCLRuntimeInfo &RI, const GenXSubtarget &ST, const GenXBackendConfig &BC); - const std::string &getName() const { return Name; } + const std::string &getName() const { return FuncInfo.Name; } // These are considered to always be true (at least in igcmc). // Preserve this here. @@ -226,29 +239,31 @@ class GenXOCLRuntimeInfo : public ModulePass { bool usesLocalIdZ() const { return true; } // Deduced from actual function instructions. - bool usesGroupId() const { return UsesGroupId; } + bool usesGroupId() const { return FuncInfo.UsesGroupId; } - bool supportsDebugging() const { return SupportsDebugging; } + bool supportsDebugging() const { return FuncInfo.SupportsDebugging; } // SIMD size is always set by igcmc to one. Preserve this here. unsigned getSIMDSize() const { return 1; } - unsigned getSLMSize() const { return SLMSize; } + unsigned getSLMSize() const { return FuncInfo.SLMSize; } // Deduced from actual function instructions. - unsigned getTPMSize() const { return ThreadPrivateMemSize; } - unsigned getStatelessPrivMemSize() const { return StatelessPrivateMemSize; } + unsigned getTPMSize() const { return FuncInfo.ThreadPrivateMemSize; } + unsigned getStatelessPrivMemSize() const { + return FuncInfo.StatelessPrivateMemSize; + } - unsigned getGRFSizeInBytes() const { return GRFSizeInBytes; } + unsigned getGRFSizeInBytes() const { return FuncInfo.GRFSizeInBytes; } // Deduced from actual function instructions. - bool usesDPAS() const { return UsesDPAS; } + bool usesDPAS() const { return FuncInfo.UsesDPAS; } // igcmc always sets this to zero. Preserve this here. unsigned getNumThreads() const { return 0; } - int getNumBarriers() const { return NumBarriers; } - bool usesSample() const { return UsesSample; } - bool usesReadWriteImages() const { return UsesReadWriteImages; } - bool requireDisableEUFusion() const { return DisableEUFusion; } + unsigned getNumBarriers() const { return FuncInfo.NumBarriers; } + bool usesSample() const { return FuncInfo.UsesSample; } + bool usesReadWriteImages() const { return FuncInfo.UsesReadWriteImages; } + bool requireDisableEUFusion() const { return FuncInfo.DisableEUFusion; } // Arguments accessors. arg_iterator arg_begin() { return ArgInfos.begin(); } diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp index e7b5f9492ec1..ef2213f8f0b4 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp @@ -30,6 +30,7 @@ SPDX-License-Identifier: MIT #include #include #include +#include #include #include #include @@ -43,6 +44,8 @@ SPDX-License-Identifier: MIT #include "Probe/Assertion.h" +#define DEBUG_TYPE "genx-runtime-info" + #define CISA_CALL(c) \ do { \ auto Result = (c); \ @@ -272,72 +275,98 @@ KernelArgBuilder::translateArgument(const Argument &Arg) const { //===----------------------------------------------------------------------===// // -// Kernel info implementation. +// Function info implementation. // //===----------------------------------------------------------------------===// -// Just perform linear instructions scan to find usage stats. -void GenXOCLRuntimeInfo::KernelInfo::setInstructionUsageProperties( - const FunctionGroup &FG, GenXOCLRuntimeInfo &RI, - const GenXBackendConfig &BC) { - for (Function *F : FG) { - for (BasicBlock &BB : *F) { - for (Instruction &I : BB) { - switch (GenXIntrinsic::getGenXIntrinsicID(&I)) { - default: - break; - case GenXIntrinsic::genx_group_id_x: - case GenXIntrinsic::genx_group_id_y: - case GenXIntrinsic::genx_group_id_z: - UsesGroupId = true; - break; - case GenXIntrinsic::genx_barrier: - case GenXIntrinsic::genx_sbarrier: - NumBarriers = 1; - break; - case GenXIntrinsic::genx_3d_sample: - case GenXIntrinsic::genx_sample: - case GenXIntrinsic::genx_sample_unorm: - UsesSample = true; - break; - case GenXIntrinsic::genx_dpas2: - case GenXIntrinsic::genx_dpas_nosrc0: - case GenXIntrinsic::genx_dpas: - if (!DisableEUFusion) { - const auto &GUA = RI.getAnalysis(*F); - if (!GUA.isUniform(BB)) - DisableEUFusion = true; - } - case GenXIntrinsic::genx_dpasw: - case GenXIntrinsic::genx_dpasw_nosrc0: - UsesDPAS = true; - break; -#if 0 - // ThreadPrivateMemSize was not copied to igcmc structures - // always defaulting to zero and everything worked. After - // removal of igcmc structures TPMSize started to be - // initialized to values other than zero and some ispc tests - // started to fail. - // Restore old behavior as temporary fix until proper - // investigation will be performed. This is really strange. - case GenXIntrinsic::genx_alloca: - ThreadPrivateMemSize = BC.getStackSurfaceMaxSize(); - break; -#endif - } +GenXOCLRuntimeInfo::FunctionInfo::FunctionInfo(StringRef Name, + const GenXSubtarget &ST) + : Name(Name.str()), GRFSizeInBytes(ST.getGRFByteSize()) {} + +GenXOCLRuntimeInfo::FunctionInfo::FunctionInfo(const FunctionGroup &FG, + GenXOCLRuntimeInfo &RI, + const GenXSubtarget &ST, + const GenXBackendConfig &BC) + : DisableEUFusion(BC.isDisableEUFusion()), + SupportsDebugging(BC.emitDebuggableKernels()), + GRFSizeInBytes(ST.getGRFByteSize()), + StatelessPrivateMemSize( + vc::getStackAmount(FG.getHead(), BC.getStatelessPrivateMemSize())) { + initInstructionLevelProperties(FG, RI, ST); + + auto *Func = FG.getHead(); + + if (vc::isKernel(Func)) { + vc::KernelMetadata KernelMD(FG.getHead()); + Name = KernelMD.getName().str(); + SLMSize = KernelMD.getSLMSize(); + + if (ST.hasNBarrier()) + NumBarriers = KernelMD.getAlignedBarrierCnt(NumBarriers); + } else { + Name = Func->getName().str(); + } +} + +void GenXOCLRuntimeInfo::FunctionInfo::initInstructionLevelProperties( + Function *Func, GenXOCLRuntimeInfo &RI, const GenXSubtarget &ST) { + LLVM_DEBUG(dbgs() << "> Function: " << Func->getName() << "\n"); + for (auto &Inst : instructions(*Func)) { + auto IID = vc::getAnyIntrinsicID(&Inst); + switch (IID) { + default: + break; + case GenXIntrinsic::genx_group_id_x: + case GenXIntrinsic::genx_group_id_y: + case GenXIntrinsic::genx_group_id_z: + LLVM_DEBUG(dbgs() << ">> UsesGroupId: true\n"); + UsesGroupId = true; + break; + case GenXIntrinsic::genx_barrier: + case GenXIntrinsic::genx_sbarrier: + NumBarriers = 1; + LLVM_DEBUG(dbgs() << ">> NumBarriers: " << NumBarriers << "\n"); + break; + case GenXIntrinsic::genx_3d_sample: + case GenXIntrinsic::genx_sample: + case GenXIntrinsic::genx_sample_unorm: + UsesSample = true; + LLVM_DEBUG(dbgs() << ">> UsesSample: true\n"); + break; + case GenXIntrinsic::genx_dpas2: + case GenXIntrinsic::genx_dpas: + case GenXIntrinsic::genx_dpas_nosrc0: + if (!DisableEUFusion && ST.hasFusedEU()) { + const auto &GUA = RI.getAnalysis(*Func); + DisableEUFusion = !GUA.isUniform(*Inst.getParent()); + LLVM_DEBUG(dbgs() << ">> DisableEUFusion: " << DisableEUFusion << "\n"); } + LLVM_FALLTHROUGH; + case GenXIntrinsic::genx_dpasw: + case GenXIntrinsic::genx_dpasw_nosrc0: + LLVM_DEBUG(dbgs() << ">> UsesDPAS: true\n"); + UsesDPAS = true; + break; } } } -void GenXOCLRuntimeInfo::KernelInfo::setMetadataProperties( - vc::KernelMetadata &KM, const GenXSubtarget &ST) { - Name = KM.getName().str(); - SLMSize = KM.getSLMSize(); - - if (ST.hasNBarrier()) - NumBarriers = KM.getAlignedBarrierCnt(NumBarriers); +void GenXOCLRuntimeInfo::FunctionInfo::initInstructionLevelProperties( + const FunctionGroup &FG, GenXOCLRuntimeInfo &RI, const GenXSubtarget &ST) { + LLVM_DEBUG(dbgs() << "Function group: " << FG.getHead()->getName() << "\n"); + // Collect data from the kernel and subroutine callees + for (Function *Func : FG) + initInstructionLevelProperties(Func, RI, ST); + // Collect data from directly-called stackcall functions + for (const auto *Subgroup : FG.subgroups()) + for (Function *Func : *Subgroup) + initInstructionLevelProperties(Func, RI, ST); } +//===----------------------------------------------------------------------===// +// +// Kernel info implementation. +// +//===----------------------------------------------------------------------===// void GenXOCLRuntimeInfo::KernelInfo::setArgumentProperties( const Function &Kernel, const vc::KernelMetadata &KM, const GenXSubtarget &ST, const GenXBackendConfig &BC) { @@ -355,7 +384,7 @@ void GenXOCLRuntimeInfo::KernelInfo::setArgumentProperties( [&ArgBuilder](const Argument &Arg) { return ArgBuilder.translateArgument(Arg); }); - UsesReadWriteImages = std::any_of( + FuncInfo.UsesReadWriteImages = std::any_of( ArgInfos.begin(), ArgInfos.end(), [](const KernelArgInfo &AI) { return AI.isImage() && AI.getAccessKind() == KernelArgInfo::AccessKindType::ReadWrite; @@ -376,27 +405,16 @@ void GenXOCLRuntimeInfo::KernelInfo::setPrintStrings( } GenXOCLRuntimeInfo::KernelInfo::KernelInfo(const GenXSubtarget &ST) - : Name{"Intel_Symbol_Table_Void_Program"}, GRFSizeInBytes{ - ST.getGRFByteSize()} {} + : FuncInfo("Intel_Symbol_Table_Void_Program", ST) {} GenXOCLRuntimeInfo::KernelInfo::KernelInfo(const FunctionGroup &FG, GenXOCLRuntimeInfo &RI, const GenXSubtarget &ST, - const GenXBackendConfig &BC) { - DisableEUFusion = BC.isDisableEUFusion(); - - setInstructionUsageProperties(FG, RI, BC); - - GRFSizeInBytes = ST.getGRFByteSize(); - - StatelessPrivateMemSize = - vc::getStackAmount(FG.getHead(), BC.getStatelessPrivateMemSize()); - - SupportsDebugging = BC.emitDebuggableKernels(); - + const GenXBackendConfig &BC) + : FuncInfo(FG, RI, ST, BC) { vc::KernelMetadata KM{FG.getHead()}; - IGC_ASSERT_MESSAGE(KM.isKernel(), "Expected kernel as head of function group"); - setMetadataProperties(KM, ST); + IGC_ASSERT_MESSAGE(KM.isKernel(), + "Expected kernel as head of function group"); setArgumentProperties(*FG.getHead(), KM, ST, BC); setPrintStrings(*FG.getHead()->getParent()); }