Skip to content

Commit

Permalink
Fill scale in LSC_ADDR structure for store instructions
Browse files Browse the repository at this point in the history
LSC_ADDR has scale field that can be used by vISA to multiply the provided offset.
This change prepares interfaces for use in emitter to match cases where this could be profitable to emit smaller vISA.
  • Loading branch information
PawelJurek authored and igcbot committed Sep 15, 2023
1 parent f32f17e commit f705542
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 43 deletions.
3 changes: 2 additions & 1 deletion IGC/Compiler/CISACodeGen/CISABuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8310,6 +8310,7 @@ namespace IGC
LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order,
int immOffset,
int immScale,
LSC_CACHE_OPTS cacheOpts,
LSC_DOC_ADDR_SPACE addrSpace)
{
Expand All @@ -8323,7 +8324,7 @@ namespace IGC

LSC_ADDR addr { };
addr.type = LSC_ADDR_TYPE_FLAT;
addr.immScale = 1;
addr.immScale = immScale;
addr.immOffset = immOffset;
addr.size = addr_size;
addr.addrSpace = addrSpace;
Expand Down
3 changes: 2 additions & 1 deletion IGC/Compiler/CISACodeGen/CISABuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ namespace IGC
unsigned blockOffset,
ResourceDescriptor *resource,
LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order, int immOffset,
LSC_DATA_ORDER data_order,
int immOffset, int immScale,
LSC_CACHE_OPTS cacheOpts,
LSC_DOC_ADDR_SPACE addrSpace);
void LSC_LoadBlock1D(
Expand Down
80 changes: 46 additions & 34 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6415,7 +6415,7 @@ void EmitPass::emitLSCSimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVa
uint32_t blkBits = 64;
uint32_t nBlks = bytesToRead * 8 / 64;

emitLSCStore(inst, data, pTempVar, blkBits, nBlks, srcOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, immOffset);
emitLSCStore(inst, data, pTempVar, blkBits, nBlks, srcOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, immOffset, 1);
m_encoder->Push();

bytesRemaining -= bytesToRead;
Expand Down Expand Up @@ -10714,7 +10714,7 @@ void EmitPass::WriteStackDataBlocks(StackDataBlocks& blkData, uint offsetS)
resource.m_surfaceType = ESURFACE_STATELESS;
unsigned blkBits = 64;
unsigned nBlks = (BlkSize * 8) / 64;
emitLSCStore(nullptr, Arg, pSP, blkBits, nBlks, ArgOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, spOffset);
emitLSCStore(nullptr, Arg, pSP, blkBits, nBlks, ArgOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, spOffset, 1);
m_encoder->Push();
}
else
Expand Down Expand Up @@ -11373,6 +11373,7 @@ void EmitPass::emitStoreRawIndexed(
pBufPtr,
varOffset,
immOffset,
nullptr,
pValToStore,
inst->getParent(),
cacheOpts,
Expand Down Expand Up @@ -11539,7 +11540,8 @@ void EmitPass::emitStore3DInner(Value* pllValToStore, Value* pllDstPtr, Value* p
void EmitPass::emitStore(
StoreInst *inst,
Value *varOffset,
ConstantInt *immOffset
ConstantInt *immOffset,
ConstantInt *immScale
) {
if (shouldGenerateLSC(inst))
{
Expand All @@ -11551,6 +11553,7 @@ void EmitPass::emitStore(
inst->getPointerOperand(),
varOffset,
immOffset,
immScale,
inst->getValueOperand(),
inst->getParent(),
cacheOpts,
Expand All @@ -11559,6 +11562,8 @@ void EmitPass::emitStore(
addrSpace);
return;
}
IGC_ASSERT_MESSAGE(immScale ? immScale->getSExtValue() == 1 : true,
"Immediate Scale not supported on non-LSC path!");
emitVectorStore(inst, varOffset, immOffset);
}

Expand Down Expand Up @@ -18180,8 +18185,8 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
ResourceDescriptor &Resource,
CVariable *StoreVar, CVariable *Offset,
int ImmOffset, uint32_t NumElts,
uint32_t EltBytes,
int ImmOffset, int ImmScale,
uint32_t NumElts, uint32_t EltBytes,
alignment_t Alignment,
LSC_DOC_ADDR_SPACE addrSpace) {
// NumElts must be 1!
Expand Down Expand Up @@ -18244,7 +18249,7 @@ void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
emitLSCStore(cacheOpts,
stVar, eOffset, EltBytes * 8, 1, 0, &Resource,
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, addrSpace);
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace);
m_encoder->Push();
});

Expand All @@ -18258,11 +18263,13 @@ void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
// (max size = UQ x 64 = 512 bytes)
// 2. sub-DW-aligned store, vectorSize is 1|2|3|4|8
// (max size = UQ x 8 = 64 bytes)
void EmitPass::emitLSCVectorStore_uniform(
LSC_CACHE_OPTS cacheOpts, bool UseA32,
ResourceDescriptor& Resource, CVariable* StoreVar, CVariable* Offset, int ImmOffset,
uint32_t NumElts, uint32_t EltBytes, alignment_t Align, LSC_DOC_ADDR_SPACE addrSpace)
{
void EmitPass::emitLSCVectorStore_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
ResourceDescriptor &Resource,
CVariable *StoreVar,
CVariable *Offset, int ImmOffset,
int ImmScale, uint32_t NumElts,
uint32_t EltBytes, alignment_t Align,
LSC_DOC_ADDR_SPACE addrSpace) {
// If needed, can handle non-uniform StoreVar.
IGC_ASSERT(StoreVar->IsUniform() && Offset->IsUniform() && (EltBytes == 4 || EltBytes == 8));

Expand Down Expand Up @@ -18307,10 +18314,10 @@ void EmitPass::emitLSCVectorStore_uniform(

ResourceLoop(Resource, [&](CVariable* /*flag*/) {
m_encoder->SetNoMask();
emitLSCStore(cacheOpts, new_stVar, new_eoff, dSize * 8, 1, 0, &Resource,
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
LSC_DATA_ORDER_NONTRANSPOSE,
ImmOffset, addrSpace);
emitLSCStore(
cacheOpts, new_stVar, new_eoff, dSize * 8, 1, 0, &Resource,
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace);
m_encoder->Push();
});
return;
Expand All @@ -18332,19 +18339,18 @@ void EmitPass::emitLSCVectorStore_uniform(
emitLSCStore(cacheOpts, stVar, eOffset, dSize * 8, vSize, 0, &Resource,
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
LSC_DATA_ORDER_TRANSPOSE,
ImmOffset, addrSpace);
ImmOffset, ImmScale, addrSpace);
m_encoder->Push();
});
return;
}

void EmitPass::emitLSCVectorStore(
Value* Ptr,
Value* varOffset, ConstantInt* immOffset,
Value* storedVal, BasicBlock* BB,
LSC_CACHE_OPTS cacheOpts, alignment_t align, bool dontForceDmask,
LSC_DOC_ADDR_SPACE addrSpace)
{
void EmitPass::emitLSCVectorStore(Value *Ptr,
Value *varOffset, ConstantInt *immOffset,
ConstantInt *immScale, Value *storedVal,
BasicBlock *BB, LSC_CACHE_OPTS cacheOpts,
alignment_t align, bool dontForceDmask,
LSC_DOC_ADDR_SPACE addrSpace) {
PointerType* ptrType = cast<PointerType>(Ptr->getType());
Type* Ty = storedVal->getType();
IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
Expand Down Expand Up @@ -18394,6 +18400,8 @@ void EmitPass::emitLSCVectorStore(

const int immOffsetVal =
immOffset ? static_cast<int>(immOffset->getSExtValue()) : 0;
const int immScaleVal =
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;

// 1. handle cases eltBytes < 4
if (eltBytes < 4)
Expand All @@ -18402,8 +18410,8 @@ void EmitPass::emitLSCVectorStore(

IGC_ASSERT(elts == 1);
emitLSCVectorStore_subDW(cacheOpts, useA32, resource,
storedVar, eOffset, immOffsetVal, 1, eltBytes,
align, addrSpace);
storedVar, eOffset, immOffsetVal, immScaleVal,
1, eltBytes, align, addrSpace);
return;
}

Expand All @@ -18412,9 +18420,9 @@ void EmitPass::emitLSCVectorStore(
// 2. Handle uniform Store
if (dstUniform && srcUniform)
{
emitLSCVectorStore_uniform(
cacheOpts, useA32,
resource, storedVar, eOffset, immOffsetVal, elts, eltBytes, align, addrSpace);
emitLSCVectorStore_uniform(cacheOpts, useA32, resource, storedVar,
eOffset, immOffsetVal, immScaleVal, elts,
eltBytes, align, addrSpace);
return;
}

Expand Down Expand Up @@ -18475,13 +18483,14 @@ void EmitPass::emitLSCVectorStore(
case VectorMessage::MESSAGE_A32_LSC_RW:
emitLSCStore(
cacheOpts, subStoredVar, rawAddrVar, blkBits, numBlks, 0, &resource,
LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetVal, addrSpace);
LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetVal, immScaleVal, addrSpace);
break;
case VectorMessage::MESSAGE_A64_LSC_RW:
emitLSCStore(cacheOpts,
subStoredVar, rawAddrVar, blkBits, numBlks, 0,
&resource, LSC_ADDR_SIZE_64b,
LSC_DATA_ORDER_NONTRANSPOSE, immOffsetVal, addrSpace);
LSC_DATA_ORDER_NONTRANSPOSE, immOffsetVal,
immScaleVal, addrSpace);
break;
default:
IGC_ASSERT_MESSAGE(0, "Internal Error: unexpected Message kind for store");
Expand Down Expand Up @@ -18742,7 +18751,7 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
{
ResourceDescriptor resource;
resource.m_surfaceType = ESURFACE_STATELESS;
emitLSCStore(nullptr, pOldFP, pFP, 64, 1, 0, &resource, (useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b), LSC_DATA_ORDER_TRANSPOSE, 0);
emitLSCStore(nullptr, pOldFP, pFP, 64, 1, 0, &resource, (useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b), LSC_DATA_ORDER_TRANSPOSE, 0, 1);
m_encoder->Push();
}
else
Expand Down Expand Up @@ -21354,6 +21363,7 @@ void EmitPass::emitLscIntrinsicStore(llvm::GenIntrinsicInst* inst)
useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
LSC_DATA_ORDER_NONTRANSPOSE,
fragImmOffset,
1,
cacheOpts,
addrspace);
}
Expand Down Expand Up @@ -21416,12 +21426,13 @@ void EmitPass::emitLSCStore(
ResourceDescriptor* resource,
LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order,
int immOffset)
int immOffset,
int immScale)
{
LSC_DOC_ADDR_SPACE addrSpace = m_pCtx->m_UserAddrSpaceMD.Get(inst);
LSC_CACHE_OPTS cacheOpts = translateLSCCacheControlsFromMetadata(inst, false);
emitLSCStore(cacheOpts, src, offset, elemSize, numElems, blockOffset,
resource, addr_size, data_order, immOffset, addrSpace);
resource, addr_size, data_order, immOffset, immScale, addrSpace);
}

void EmitPass::emitLSCStore(
Expand All @@ -21435,6 +21446,7 @@ void EmitPass::emitLSCStore(
LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order,
int immOffset,
int immScale,
LSC_DOC_ADDR_SPACE addrSpace)
{
LSC_DATA_SIZE elemSizeEnum = m_encoder->LSC_GetElementSize(elemSize);
Expand All @@ -21443,7 +21455,7 @@ void EmitPass::emitLSCStore(
m_encoder->LSC_StoreScatter(LSC_STORE,
src, offset, elemSizeEnum, numElemsEnum,
blockOffset, resource, addr_size, data_order,
immOffset, cacheOpts, addrSpace);
immOffset, immScale, cacheOpts, addrSpace);
}

void EmitPass::emitLSC2DBlockOperation(llvm::GenIntrinsicInst* inst)
Expand Down
17 changes: 10 additions & 7 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ class EmitPass : public llvm::FunctionPass

// TODO: unify the functions below and clean up
void emitStore(llvm::StoreInst *inst, llvm::Value *varOffset,
llvm::ConstantInt *immOffset
llvm::ConstantInt *immOffset, ConstantInt *immScale = nullptr
);
void emitStore3DInner(llvm::Value* pllValToStore, llvm::Value* pllDstPtr, llvm::Value* pllElmIdx);

Expand Down Expand Up @@ -433,8 +433,8 @@ class EmitPass : public llvm::FunctionPass
LSC_DOC_ADDR_SPACE addrSpace);
void emitLSCVectorStore(llvm::Value *Ptr,
llvm::Value *offset, llvm::ConstantInt *immOffset,
llvm::Value *storedVal, llvm::BasicBlock* BB,
LSC_CACHE_OPTS cacheOpts,
llvm::ConstantInt *immScale, llvm::Value *storedVal,
llvm::BasicBlock *BB, LSC_CACHE_OPTS cacheOpts,
alignment_t align, bool dontForceDMask,
LSC_DOC_ADDR_SPACE addrSpace);
void emitUniformVectorCopy(CVariable* Dst, CVariable* Src, uint32_t nElts,
Expand Down Expand Up @@ -571,7 +571,8 @@ class EmitPass : public llvm::FunctionPass
ResourceDescriptor* resource,
LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order,
int immOffset);
int immOffset,
int immScale);
void emitLSCStore(
LSC_CACHE_OPTS cacheOpts,
CVariable* src,
Expand All @@ -583,6 +584,7 @@ class EmitPass : public llvm::FunctionPass
LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order,
int immOffset,
int immScale,
LSC_DOC_ADDR_SPACE addrSpace);
////////////////////////////////////////////////////////////////////
// NOTE: for vector load/stores instructions pass the
Expand Down Expand Up @@ -963,14 +965,15 @@ class EmitPass : public llvm::FunctionPass
void emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
ResourceDescriptor &Resource,
CVariable *StoreVar, CVariable *Offset,
int ImmOffset, uint32_t NumElts,
int ImmOffset, int ImmScale, uint32_t NumElts,
uint32_t EltBytes, alignment_t Align,
LSC_DOC_ADDR_SPACE addrSpace);
void emitLSCVectorStore_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
ResourceDescriptor &Resource,
CVariable *StoreVar, CVariable *Offset,
int ImmOffset, uint32_t NumElts,
uint32_t EltBytes, alignment_t Align,
int ImmOffset, int ImmScale,
uint32_t NumElts, uint32_t EltBytes,
alignment_t Align,
LSC_DOC_ADDR_SPACE addrSpace);
LSC_FENCE_OP getLSCMemoryFenceOp(bool IsGlobalMemFence, bool InvalidateL1,
bool EvictL1) const;
Expand Down

0 comments on commit f705542

Please sign in to comment.