Skip to content

Commit

Permalink
Merge pull request #205 from vext01/o1
Browse files Browse the repository at this point in the history
O1
  • Loading branch information
ltratt authored Sep 23, 2024
2 parents cbe366d + cf8a59f commit 779ebd8
Show file tree
Hide file tree
Showing 13 changed files with 189 additions and 82 deletions.
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/StackMaps.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class MCStreamer;
class raw_ostream;
class TargetRegisterInfo;

unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI);

/// MI-level stackmap operands.
///
/// MI stackmap operations take the form:
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/Yk.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ void initYkOptions(void);
// YKFIXME: all of our command-line arguments should be collected here instead
// of us randomly introducing `extern bool`s all over the place.
extern bool YkOptNoneAfterIRPasses;
extern bool YkDontOptFuncABI;

#endif
3 changes: 3 additions & 0 deletions llvm/include/llvm/Transforms/Yk/ControlPoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
// right instruction in AOT from where to continue.
#define YK_RECONSTRUCT_FRAMES "__ykrt_reconstruct_frames"

// The name of the patchpoint intrinsic we use for the control point.
#define CP_PPNAME "llvm.experimental.patchpoint.void"

namespace llvm {
ModulePass *createYkControlPointPass();
} // namespace llvm
Expand Down
14 changes: 11 additions & 3 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1982,10 +1982,18 @@ void AsmPrinter::emitFunctionBody() {
break;
default:

// So that the ykpt decoder can work without disasembling instructions
// to find call-sites and sucessor blocks, we encode that info
// statically into the blockmap at AOT compile time.
//
// Some things that look like calls in IR don't actually emit a
// call into the binary. Namely a stackmap intrinsic.
//
// Note that patchpoint and statepoint intrinsics, although similar to
// the stackmap intrinsic, do actually emit a call in the binary, so we
// DO need to include those callsites.
if (YkExtendedLLVMBBAddrMapSection && MI.isCall() &&
(MI.getOpcode() != TargetOpcode::STACKMAP) &&
(MI.getOpcode() != TargetOpcode::PATCHPOINT) &&
(MI.getOpcode() != TargetOpcode::STATEPOINT)) {
(MI.getOpcode() != TargetOpcode::STACKMAP)) {
// Record the address of the call instruction itself.
MCSymbol *YkPreCallSym =
MF->getContext().createTempSymbol("yk_precall", true);
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/CodeGen/StackMaps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -211,7 +210,7 @@ unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) {
}

/// Go up the super-register chain until we hit a valid dwarf register number.
static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
unsigned llvm::getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
int RegNum;
for (MCPhysReg SR : TRI->superregs_inclusive(Reg)) {
RegNum = TRI->getDwarfRegNum(SR, false);
Expand Down
12 changes: 8 additions & 4 deletions llvm/lib/CodeGen/Yk/FixStackmapsSpillReloads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,10 @@ INITIALIZE_PASS_BEGIN(FixStackmapsSpillReloads, DEBUG_TYPE, "Fixup Stackmap Spil
INITIALIZE_PASS_END(FixStackmapsSpillReloads, DEBUG_TYPE, "Fixup Stackmap Spills",
false, false)

const TargetRegisterInfo *TRI;

bool FixStackmapsSpillReloads::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
bool Changed = false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
for (MachineBasicBlock &MBB : MF) {
Expand Down Expand Up @@ -144,7 +146,7 @@ bool FixStackmapsSpillReloads::runOnMachineFunction(MachineFunction &MF) {
MOI++;
while (MOI != MI.operands_end()) {
if (MOI->isReg()) {
Register Reg = MOI->getReg();
unsigned int Reg = getDwarfRegNum(MOI->getReg(), TRI);
// Check if the register operand in the stackmap is a restored
// spill.
// Since implicit operands are ignored by stackmaps (they are not
Expand Down Expand Up @@ -240,13 +242,15 @@ bool FixStackmapsSpillReloads::runOnMachineFunction(MachineFunction &MF) {
if (TII->isCopyInstr(MI) || TII->isLoadFromStackSlotPostFE(MI, FI)) {
// FIXME: Can there be multiple spill reloads here? Then this would
// need to be a loop.
if (TII->isCopyInstr(MI) && Spills.count(MI.getOperand(1).getReg())) {
unsigned int Op0 = getDwarfRegNum(MI.getOperand(0).getReg(), TRI);
unsigned int Op1 = getDwarfRegNum(MI.getOperand(1).getReg(), TRI);
if (TII->isCopyInstr(MI) && Spills.count(Op1)) {
// The source for this copy instruction is itself a spill reload.
// So we need to lookup the spill for the source and apply this
// instead.
Spills[MI.getOperand(0).getReg()] = Spills[MI.getOperand(1).getReg()];
Spills[Op0] = Spills[Op1];
} else {
Spills[MI.getOperand(0).getReg()] = &MI;
Spills[Op0] = &MI;
}
}
}
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Support/Yk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,26 @@ struct CreateYkEmbedIRParser {
} // namespace
static ManagedStatic<cl::opt<bool, true>, CreateYkEmbedIRParser> YkEmbedIRParser;

bool YkDontOptFuncABI;
namespace {
struct CreateYkDontOptFuncABIParser {
static void *call() {
return new cl::opt<bool, true>(
"yk-dont-opt-func-abi",
cl::desc(
"Don't change the ABIs of functions during optimisation"),
cl::NotHidden, cl::location(YkDontOptFuncABI));
}
};
} // namespace
static ManagedStatic<cl::opt<bool, true>, CreateYkDontOptFuncABIParser> YkDontOptFuncABIParser;

void llvm::initYkOptions() {
*YkExtendedLLVMBBAddrMapSectionParser;
*YkStackMapOffsetFixParser;
*YkStackMapAdditionalLocsParser;
*YkStackmapsSpillFixParser;
*YkOptNoneAfterIRPassesParser;
*YkEmbedIRParser;
*YkDontOptFuncABIParser;
}
36 changes: 14 additions & 22 deletions llvm/lib/Target/X86/X86AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
Expand Down Expand Up @@ -62,22 +63,6 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM,
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;

/// Go up the super-register chain until we hit a valid dwarf register number.
///
/// (duplicated/adapted from StackMaps.cpp so as to not introduce extra link
/// dependencies)
static unsigned getDwarfRegNum(unsigned Reg) {
int RegNum;
for (MCPhysReg SR : TRI->superregs_inclusive(Reg)) {
RegNum = TRI->getDwarfRegNum(SR, false);
if (RegNum >= 0)
break;
}

assert(RegNum >= 0 && "Invalid Dwarf register number.");
return (unsigned)RegNum;
}

/// Clear any mappings that map to the given register.
void clearRhs(Register Reg, std::map<Register, std::set<int64_t>> &SpillMap) {
auto I = SpillMap.begin();
Expand Down Expand Up @@ -116,6 +101,13 @@ void processInstructions(
continue;
}

// Because a patchpoint already captures the live values at the exact
// moment we desire, there's no need to compute a spillmap for them nor do
// we have to "patch them up". We can just skip them.
if (Instr.getOpcode() == TargetOpcode::PATCHPOINT) {
continue;
}

// Copying a value from one register B to another A, creates a mapping from
// A to B. If A is tracked by the stackmap, then B will also be tracked and
// assigned the same value during deoptimisation.
Expand All @@ -124,8 +116,8 @@ void processInstructions(
const MachineOperand Rhs = Instr.getOperand(1);
assert(Lhs.isReg() && "Is register.");
assert(Rhs.isReg() && "Is register.");
auto LhsDwReg = getDwarfRegNum(Lhs.getReg());
auto RhsDwReg = getDwarfRegNum(Rhs.getReg());
auto LhsDwReg = getDwarfRegNum(Lhs.getReg(), TRI);
auto RhsDwReg = getDwarfRegNum(Rhs.getReg(), TRI);
if (LhsDwReg == RhsDwReg) {
// Moves like `mov rax, rax` are effectively a NOP for this analysis.
continue;
Expand All @@ -149,7 +141,7 @@ void processInstructions(
const MachineOperand OffsetOp = Instr.getOperand(3);
const MachineOperand MO = Instr.getOperand(5);
assert(MO.isReg() && "Is register.");
const Register DwReg = getDwarfRegNum(MO.getReg());
const Register DwReg = getDwarfRegNum(MO.getReg(), TRI);
if (OffsetOp.isImm()) {
const int64_t Offset = OffsetOp.getImm();
// We don't need to do `clearRhs(DwReg)` and reset the `SpillMap` entry
Expand All @@ -168,7 +160,7 @@ void processInstructions(
const MachineOperand OffsetOp = Instr.getOperand(4);
const MachineOperand Lhs = Instr.getOperand(0);
assert(Lhs.isReg() && "Is register.");
const Register DwReg = getDwarfRegNum(Lhs.getReg());
const Register DwReg = getDwarfRegNum(Lhs.getReg(), TRI);
if (OffsetOp.isImm()) {
const int64_t Offset = OffsetOp.getImm();
clearRhs(DwReg, SpillMap);
Expand All @@ -180,7 +172,7 @@ void processInstructions(
// Any other assignments to tracked registers removes their mapping.
for (const MachineOperand MO : Instr.defs()) {
assert(MO.isReg() && "Is register.");
auto DwReg = getDwarfRegNum(MO.getReg());
auto DwReg = getDwarfRegNum(MO.getReg(), TRI);
SpillMap.erase(DwReg);
clearRhs(DwReg, SpillMap);
}
Expand All @@ -190,7 +182,7 @@ void processInstructions(
// be used.
for (const MachineOperand MO : Instr.uses()) {
if (MO.isReg() && MO.isKill()) {
auto DwReg = getDwarfRegNum(MO.getReg());
auto DwReg = getDwarfRegNum(MO.getReg(), TRI);
SpillMap.erase(DwReg);
clearRhs(DwReg, SpillMap);
}
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/IPO/GlobalOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Yk.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
Expand Down Expand Up @@ -1986,7 +1987,9 @@ OptimizeFunctions(Module &M,
}
}

if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
if (!YkDontOptFuncABI && hasChangeableCC(&F) && !F.isVarArg()
&& !F.hasAddressTaken())
{
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
// Fast calling convention.
Expand Down
91 changes: 64 additions & 27 deletions llvm/lib/Transforms/Yk/ControlPoint.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- ControlPoint.cpp - Synthesise the yk control point -----------------===//
//===- ControlPoint.cpp - Patch the yk control point -----------------===//
//
// This pass finds the user's call to the dummy control point and replaces it
// with a call to a new control point that implements the necessary logic to
Expand All @@ -19,30 +19,22 @@
// }
// ```
//
// Into one that looks like this (note that this transformation happens at the
// IR level):
// Into one that looks like this:
//
// ```
// // The YkCtrlPointStruct contains one member for each live LLVM variable
// // just before the call to the control point.
// struct YkCtrlPointStruct {
// size_t pc;
// }
//
// struct YkCtrlPointStruct cp_vars;
// pc = 0;
// while (...) {
// // Now we call the patched control point.
// cp_vars.pc = pc;
// __ykrt__control_point(mt, loc, &cp_vars);
// pc = cp_vars.pc;
// llvm.experimental.patchpoint.void(..., __ykrt_control_point, ...)
// bc = program[pc];
// switch (bc) {
// // bytecode handlers here.
// }
// }
// ```
//
// A patchpoint is used to capture the locations of live variables immediately
// before a call to __ykrt_control_point.
//
// Note that this transformation occurs at the LLVM IR level. The above example
// is shown as C code for easy comprehension.

Expand All @@ -58,6 +50,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Yk/LivenessAnalysis.h"

#define DEBUG_TYPE "yk-control-point"

Expand All @@ -68,6 +61,29 @@
#define YK_CONTROL_POINT_ARG_VARS_IDX 2
#define YK_CONTROL_POINT_NUM_ARGS 3

// Stackmap ID zero is reserved for the control point.
//
// This will need to change when we support >1 control point.
const unsigned CPStackMapID = 0;

// The number of shadow bytes required for the control point's patchpoint.
//
// This must be large enough to accommodate the call to patchpoint target
// function and if you use a too-big value LLVM will pad the space with NOP
// bytes.
//
// This early in the pipeline we have no idea how the backend will choose the
// encode this call, so for now we use the exact size of the observed
// instruction at the time of writing, as determined by disassembling the binary
// and eyeballing it.
//
// The good news is that LLVM will assert fail if you use a too small value.
#if defined(__x86_64__) || defined(_M_X64)
const unsigned CPShadow = 13;
#else
#error "unknown control point shadow size for this arch"
#endif

using namespace llvm;

/// Find the call to the dummy control point that we want to patch.
Expand Down Expand Up @@ -128,34 +144,55 @@ class YkControlPoint : public ModulePass {
}

// The old control point should be of the form:
// control_point(YkMT*, YkLocation*)
// yk_mt_control_point(YkMT*, YkLocation*)
assert(OldCtrlPointCall->arg_size() == YK_OLD_CONTROL_POINT_NUM_ARGS);
Type *YkMTTy =
OldCtrlPointCall->getArgOperand(YK_CONTROL_POINT_ARG_MT_IDX)->getType();
Type *YkLocTy =
OldCtrlPointCall->getArgOperand(YK_CONTROL_POINT_ARG_LOC_IDX)
->getType();

// Create the new control point, which is of the form:
// void new_control_point(YkMT*, YkLocation*, i64)
// Create a call to the "new" (patched) control point, but do so via a
// patchpoint so that we can capture the live variables at exactly the
// moment before the call.
Type *Int64Ty = Type::getInt64Ty(Context);
FunctionType *FType = FunctionType::get(Type::getVoidTy(Context),
{YkMTTy, YkLocTy, Int64Ty}, false);
Function *NF = Function::Create(FType, GlobalVariable::ExternalLinkage,
YK_NEW_CONTROL_POINT, M);

// At the top of the function, instantiate a `YkCtrlPointStruct` to pass in
// to the control point. We do so on the stack, so that we can pass the
// struct by pointer.
IRBuilder<> Builder(OldCtrlPointCall);

// Insert call to the new control point. The last argument is the stackmap
// id belonging to the control point. This is temporarily set to INT_MAX
// and overwritten by the stackmap pass.
Builder.CreateCall(
NF, {OldCtrlPointCall->getArgOperand(YK_CONTROL_POINT_ARG_MT_IDX),
OldCtrlPointCall->getArgOperand(YK_CONTROL_POINT_ARG_LOC_IDX),
Builder.getInt64(UINT64_MAX)});
const Intrinsic::ID SMFuncID = Function::lookupIntrinsicID(CP_PPNAME);
if (SMFuncID == Intrinsic::not_intrinsic) {
Context.emitError("can't find stackmap()");
return false;
}
Function *SMFunc = Intrinsic::getDeclaration(&M, SMFuncID);
assert(SMFunc != nullptr);

// Get live variables.
LivenessAnalysis LA(Caller);
auto Lives = LA.getLiveVarsBefore(OldCtrlPointCall);

Value *SMID = ConstantInt::get(Type::getInt64Ty(Context), CPStackMapID);
Value *Shadow = ConstantInt::get(Type::getInt32Ty(Context), CPShadow);
std::vector<Value *> Args = {
SMID,
Shadow,
NF,
ConstantInt::get(Type::getInt32Ty(Context), 3),
OldCtrlPointCall->getArgOperand(YK_CONTROL_POINT_ARG_MT_IDX),
OldCtrlPointCall->getArgOperand(YK_CONTROL_POINT_ARG_LOC_IDX),
SMID,
};

for (auto *Live : Lives) {
Args.push_back(Live);
}

Builder.CreateCall(SMFunc->getFunctionType(), SMFunc,
ArrayRef<Value *>(Args));

// Replace the call to the dummy control point.
OldCtrlPointCall->eraseFromParent();
Expand Down
Loading

0 comments on commit 779ebd8

Please sign in to comment.