Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EraVM] Enable MachineCopyPropagation optimization #712

Merged
merged 14 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1007,10 +1007,16 @@ class TargetInstrInfo : public MCInstrInfo {
/// The source and destination registers may overlap, which may require a
/// careful implementation when multiple copy instructions are required for
/// large registers. See for example the ARM target.
///
/// If RenamableDest is true, the copy instruction's destination operand is
/// marked renamable.
/// If RenamableSrc is true, the copy instruction's source operand is
/// marked renamable.
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const {
MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
bool RenamableDest = false,
bool RenamableSrc = false) const {
llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
}

Expand Down
142 changes: 131 additions & 11 deletions llvm/lib/CodeGen/MachineCopyPropagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,13 @@ static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,

class CopyTracker {
struct CopyInfo {
MachineInstr *MI, *LastSeenUseInCopy;
MachineInstr *MI = nullptr;
MachineInstr *LastSeenUseInCopy = nullptr;
// EraVM local begin
SmallPtrSet<MachineInstr *, 4> SrcUsers;
// EraVM local end
SmallVector<MCRegister, 4> DefRegs;
bool Avail;
bool Avail = false;
};

DenseMap<MCRegister, CopyInfo> Copies;
Expand Down Expand Up @@ -181,6 +185,45 @@ class CopyTracker {
}
}

// EraVM local begin
/// Track copy's src users, and return false if that can't be done.
/// We can only track if we have a COPY instruction which source is
/// the same as the Reg.
bool trackSrcUsers(MCRegister Reg, MachineInstr &MI,
const TargetRegisterInfo &TRI, const TargetInstrInfo &TII,
bool UseCopyInstr) {
MCRegUnit RU = *TRI.regunits(Reg).begin();
MachineInstr *AvailCopy = findCopyDefViaUnit(RU, TRI);
if (!AvailCopy)
return false;

std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*AvailCopy, TII, UseCopyInstr);
Register Src = CopyOperands->Source->getReg();

// Bail out, if the source of the copy is not the same as the Reg.
if (Src != Reg)
return false;

auto I = Copies.find(RU);
if (I == Copies.end())
return false;

I->second.SrcUsers.insert(&MI);
return true;
}

/// Return the users for a given register.
SmallPtrSet<MachineInstr *, 4> getSrcUsers(MCRegister Reg,
const TargetRegisterInfo &TRI) {
MCRegUnit RU = *TRI.regunits(Reg).begin();
auto I = Copies.find(RU);
if (I == Copies.end())
return {};
return I->second.SrcUsers;
}
// EraVM local end

/// Add this copy's registers into the tracker's copy maps.
void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII, bool UseCopyInstr) {
Expand All @@ -193,13 +236,14 @@ class CopyTracker {

// Remember Def is defined by the copy.
for (MCRegUnit Unit : TRI.regunits(Def))
Copies[Unit] = {MI, nullptr, {}, true};
// EraVM local begin
Copies[Unit] = {MI, nullptr, {}, {}, true};
// EraVM local end

// Remember source that's copied to Def. Once it's clobbered, then
// it's no longer available for copy propagation.
for (MCRegUnit Unit : TRI.regunits(Src)) {
auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}});
auto &Copy = I.first->second;
auto &Copy = Copies[Unit];
if (!is_contained(Copy.DefRegs, Def))
Copy.DefRegs.push_back(Def);
Copy.LastSeenUseInCopy = MI;
Expand Down Expand Up @@ -370,6 +414,7 @@ class MachineCopyPropagation : public MachineFunctionPass {
typedef enum { DebugUse = false, RegularUse = true } DebugType;

void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
void readSuccessorLiveIns(const MachineBasicBlock &MBB);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
void EliminateSpillageCopies(MachineBasicBlock &MBB);
Expand All @@ -384,6 +429,10 @@ class MachineCopyPropagation : public MachineFunctionPass {
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
bool hasOverlappingMultipleDef(const MachineInstr &MI,
const MachineOperand &MODef, Register Def);
// EraVM local begin
bool canUpdateSrcUsers(const MachineInstr &Copy,
const MachineOperand &CopySrc);
// EraVM local end

/// Candidates for deletion.
SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
Expand Down Expand Up @@ -422,6 +471,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
}
}

void MachineCopyPropagation::readSuccessorLiveIns(
const MachineBasicBlock &MBB) {
if (MaybeDeadCopies.empty())
return;

// If a copy result is livein to a successor, it is not dead.
for (const MachineBasicBlock *Succ : MBB.successors()) {
for (const auto &LI : Succ->liveins()) {
for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
MaybeDeadCopies.remove(Copy);
}
}
}
}

/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
/// This fact may have been obscured by sub register usage or may not be true at
/// all even though Src and Def are subregisters of the registers used in
Expand Down Expand Up @@ -608,6 +673,28 @@ bool MachineCopyPropagation::hasOverlappingMultipleDef(
return false;
}

// EraVM local begin
/// Return true if it is safe to update the users of the source register of the
/// copy.
bool MachineCopyPropagation::canUpdateSrcUsers(const MachineInstr &Copy,
const MachineOperand &CopySrc) {
for (auto *SrcUser : Tracker.getSrcUsers(CopySrc.getReg(), *TRI)) {
if (hasImplicitOverlap(*SrcUser, CopySrc))
return false;

for (MachineOperand &MO : SrcUser->uses()) {
if (!MO.isReg() || !MO.isUse() || MO.getReg() != CopySrc.getReg())
continue;
if (MO.isTied() || !MO.isRenamable() ||
!isBackwardPropagatableRegClassCopy(Copy, *SrcUser,
MO.getOperandNo()))
return false;
}
}
return true;
}
// EraVM local end

/// Look for available copies whose destination register is used by \p MI and
/// replace the use in \p MI with the copy's source register.
void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
Expand Down Expand Up @@ -826,6 +913,12 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
assert(!Reg.isVirtual() &&
"MachineCopyPropagation should be run after register allocation!");

// EraVM local begin
// Skip invalidating constant registers.
if (MRI->isReserved(Reg) && MRI->isConstantPhysReg(Reg))
continue;
// EraVM local end

if (MO.isDef() && !MO.isEarlyClobber()) {
Defs.push_back(Reg.asMCReg());
continue;
Expand Down Expand Up @@ -873,10 +966,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}

// If MBB doesn't have successors, delete the copies whose defs are not used.
// If MBB does have successors, then conservative assume the defs are live-out
// since we don't want to trust live-in lists.
if (MBB.succ_empty()) {
bool TracksLiveness = MRI->tracksLiveness();

// If liveness is tracked, we can use the live-in lists to know which
// copies aren't dead.
if (TracksLiveness)
readSuccessorLiveIns(MBB);

// If MBB doesn't have succesor, delete copies whose defs are not used.
// If MBB does have successors, we can only delete copies if we are able to
// use liveness information from successors to confirm they are really dead.
if (MBB.succ_empty() || TracksLiveness) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
Expand Down Expand Up @@ -965,13 +1065,29 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
if (hasOverlappingMultipleDef(MI, MODef, Def))
continue;

// EraVM local begin
if (!canUpdateSrcUsers(*Copy, *CopyOperands->Source))
continue;
// EraVM local end

LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
<< "\n with " << printReg(Def, TRI) << "\n in "
<< MI << " from " << *Copy);

MODef.setReg(Def);
MODef.setIsRenamable(CopyOperands->Destination->isRenamable());

// EraVM local begin
for (auto *SrcUser : Tracker.getSrcUsers(Src, *TRI)) {
for (MachineOperand &MO : SrcUser->uses()) {
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Src)
continue;
MO.setReg(Def);
MO.setIsRenamable(CopyOperands->Destination->isRenamable());
}
}
// EraVM local end

LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
MaybeDeadCopies.insert(Copy);
Changed = true;
Expand All @@ -988,7 +1104,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
// Ignore non-trivial COPYs.
std::optional<DestSourcePair> CopyOperands =
isCopyInstr(MI, *TII, UseCopyInstr);
if (CopyOperands && MI.getNumOperands() == 2) {
if (CopyOperands) {
Register DefReg = CopyOperands->Destination->getReg();
Register SrcReg = CopyOperands->Source->getReg();

Expand Down Expand Up @@ -1037,7 +1153,11 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
CopyDbgUsers[Copy].insert(&MI);
}
}
} else {
// EraVM local begin
} else if (!Tracker.trackSrcUsers(MO.getReg().asMCReg(), MI, *TRI, *TII,
UseCopyInstr)) {
// If we can't track the source users, invalidate the register.
// EraVM local end
Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
UseCopyInstr);
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/TargetInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,9 @@ void TargetInstrInfo::lowerCopy(MachineInstr *MI,
}

copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(),
SrcMO.getReg(), SrcMO.isKill());
SrcMO.getReg(), SrcMO.isKill(),
DstMO.getReg().isPhysical() ? DstMO.isRenamable() : false,
SrcMO.getReg().isPhysical() ? SrcMO.isRenamable() : false);

if (MI->getNumOperands() > 2)
transferImplicitOperands(MI, TRI);
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3512,7 +3512,9 @@ void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest,
bool RenamableSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
llvm::ArrayRef<unsigned> Indices) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool KillSrc, bool RenamableDest = false,
bool RenamableSrc = false) const override;

void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register SrcReg,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ bool R600InstrInfo::isVector(const MachineInstr &MI) const {
void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest, bool RenamableSrc) const {
unsigned VectorComponents = 0;
if ((R600::R600_Reg128RegClass.contains(DestReg) ||
R600::R600_Reg128VerticalRegClass.contains(DestReg)) &&
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/R600InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ class R600InstrInfo final : public R600GenInstrInfo {

void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool KillSrc, bool RenamableDest = false,
bool RenamableSrc = false) const override;
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const override;

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,8 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest, bool RenamableSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg);

// FIXME: This is hack to resolve copies between 16 bit and 32 bit
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool KillSrc, bool RenamableDest = false,
bool RenamableSrc = false) const override;

void materializeImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARC/ARCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB,
void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest, bool RenamableSrc) const {
assert(ARC::GPR32RegClass.contains(SrcReg) &&
"Only GPR32 src copy supported.");
assert(ARC::GPR32RegClass.contains(DestReg) &&
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARC/ARCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class ARCInstrInfo : public ARCGenInstrInfo {

void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool KillSrc, bool RenamableDest = false,
bool RenamableSrc = false) const override;

void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register SrcReg,
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,9 @@ void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest,
bool RenamableSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {

void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool KillSrc, bool RenamableDest = false,
bool RenamableSrc = false) const override;

void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register SrcReg,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest, bool RenamableSrc) const {
// Need to check the arch.
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/Thumb1InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {

void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool KillSrc, bool RenamableDest = false,
bool RenamableSrc = false) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FrameIndex,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
MCRegister SrcReg, bool KillSrc,
bool RenamableDest, bool RenamableSrc) const {
// Handle SPR, DPR, and QPR copies.
if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
Expand Down
Loading
Loading