Skip to content

Commit

Permalink
Fix direct address dst restriction on SIMD32
Browse files Browse the repository at this point in the history
Fix direct address dst restriction on SIMD32
  • Loading branch information
fangliu2020 authored and igcbot committed Dec 19, 2024
1 parent 1c30d69 commit 4be9d0e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
4 changes: 2 additions & 2 deletions visa/HWConformity.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,6 @@ class HWConformity {
uint8_t numInFirstMov, bool rule4_11,
bool allowSrcCrossGRF);
void splitSIMD32Inst(INST_LIST_ITER iter, G4_BB *bb);
bool evenlySplitInst(INST_LIST_ITER iter, G4_BB *bb,
bool checkOverlap = true);
void moveSrcToGRF(INST_LIST_ITER it, uint32_t srcNum, uint16_t numGRF,
G4_BB *bb);
void saveDst(INST_LIST_ITER &it, uint8_t stride, G4_BB *bb);
Expand Down Expand Up @@ -259,6 +257,8 @@ class HWConformity {
void localizeForAcc(G4_BB *bb);
void splitDWMULInst(INST_LIST_ITER &start, INST_LIST_ITER &end, G4_BB *bb);
void fixMulSrc1(INST_LIST_ITER i, G4_BB *bb);
bool evenlySplitInst(INST_LIST_ITER iter, G4_BB *bb,
bool checkOverlap = true);
};
} // namespace vISA
// single entry point for HW conformity checks
Expand Down
11 changes: 11 additions & 0 deletions visa/SWWA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3915,6 +3915,17 @@ void Optimizer::HWWorkaround() {
LSC_FENCE_OP_NONE)
bb->insertBefore(ii, inst->cloneInst());

// When destination is an address register the following apply:
// Destination must not span across the lower to upper 8 dword
// boundary of the register.
// Fix this restriction after RA instead of HWConformity just because
// RA(spill/fill, A0 save/restore) would generate such instructions.
if (inst->getExecSize() == g4::SIMD32 && inst->getDst() &&
inst->getDst()->isDirectA0()) {
HWConformity hwConf(builder, kernel);
hwConf.evenlySplitInst(ii, bb, /*checkOverlap*/ false);
}

ii++;
}
}
Expand Down

0 comments on commit 4be9d0e

Please sign in to comment.