diff --git a/visa/HWConformity.h b/visa/HWConformity.h index df4bd50ddae9..fe68047201ce 100644 --- a/visa/HWConformity.h +++ b/visa/HWConformity.h @@ -120,8 +120,6 @@ class HWConformity { uint8_t numInFirstMov, bool rule4_11, bool allowSrcCrossGRF); void splitSIMD32Inst(INST_LIST_ITER iter, G4_BB *bb); - bool evenlySplitInst(INST_LIST_ITER iter, G4_BB *bb, - bool checkOverlap = true); void moveSrcToGRF(INST_LIST_ITER it, uint32_t srcNum, uint16_t numGRF, G4_BB *bb); void saveDst(INST_LIST_ITER &it, uint8_t stride, G4_BB *bb); @@ -259,6 +257,8 @@ class HWConformity { void localizeForAcc(G4_BB *bb); void splitDWMULInst(INST_LIST_ITER &start, INST_LIST_ITER &end, G4_BB *bb); void fixMulSrc1(INST_LIST_ITER i, G4_BB *bb); + bool evenlySplitInst(INST_LIST_ITER iter, G4_BB *bb, + bool checkOverlap = true); }; } // namespace vISA // single entry point for HW conformity checks diff --git a/visa/SWWA.cpp b/visa/SWWA.cpp index aaa106ef3608..b7b8ca3c367b 100644 --- a/visa/SWWA.cpp +++ b/visa/SWWA.cpp @@ -3915,6 +3915,17 @@ void Optimizer::HWWorkaround() { LSC_FENCE_OP_NONE) bb->insertBefore(ii, inst->cloneInst()); + // When destination is an address register the following apply: + // Destination must not span across the lower to upper 8 dword + // boundary of the register. + // Fix this restriction after RA instead of HWConformity just because + // RA(spill/fill, A0 save/restore) would generate such instructions. + if (inst->getExecSize() == g4::SIMD32 && inst->getDst() && + inst->getDst()->isDirectA0()) { + HWConformity hwConf(builder, kernel); + hwConf.evenlySplitInst(ii, bb, /*checkOverlap*/ false); + } + ii++; } }