From 4796e16aead90656079f81a6c5fc45b6b78304d4 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 27 Nov 2023 12:11:58 +0000 Subject: [PATCH] Don't conflate a call with an unknown target with an indirect call. You can call an external symbol (with an address statically unknown) directly. --- llvm/include/llvm/CodeGen/AsmPrinter.h | 6 +-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 29 +++++++++-- .../CodeGen/X86/yk-basic-block-sections.ll | 51 +++++++++++++++++++ 3 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/X86/yk-basic-block-sections.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 52f31e79e0a339..71e50767d4ba04 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -254,10 +254,10 @@ class AsmPrinter : public MachineFunctionPass { /// - A symbol marking the call instruction. /// - A symbol marking the return address of the call (if it were to return /// by conventional means) - /// - If it's a direct call, a symbol marking the target of the call, or - /// `nullptr` if the call is indirect. + /// - A symbol marking the target of the call, if known. + /// - A boolean indicating if it's a direct call (true) or not (false). std::map>> + SmallVector>> YkCallMarkerSyms; protected: diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a4bcbca5c187e8..601e57e6b0c921 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1498,6 +1498,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { } } // Emit the number of corresponding BasicBlocks. + OutStreamer->AddComment("num corresponding blocks"); OutStreamer->emitULEB128IntValue(CorrBBs.size()); // Emit the corresponding block indices. for (auto CorrBB : CorrBBs) { @@ -1513,6 +1514,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { } if (!Found) OutContext.reportError(SMLoc(), "Couldn't find the block's index"); + OutStreamer->AddComment("corresponding block"); OutStreamer->emitULEB128IntValue(I); } @@ -1524,18 +1526,25 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { // compute the distance from the start of the block and use uleb128 // encoding. const size_t NumCalls = YkCallMarkerSyms[&MBB].size(); + OutStreamer->AddComment("num calls"); OutStreamer->emitULEB128IntValue(NumCalls); for (auto Tup : YkCallMarkerSyms[&MBB]) { // Emit address of the call instruction. + OutStreamer->AddComment("call offset"); OutStreamer->emitSymbolValue(std::get<0>(Tup), getPointerSize()); // Emit the return address of the call. + OutStreamer->AddComment("return offset"); OutStreamer->emitSymbolValue(std::get<1>(Tup), getPointerSize()); // Emit address of target if known, or 0. + OutStreamer->AddComment("target offset"); MCSymbol *Target = std::get<2>(Tup); if (Target) OutStreamer->emitSymbolValue(Target, getPointerSize()); else OutStreamer->emitIntValue(0, getPointerSize()); + // Emit whether it's a direct call. + OutStreamer->AddComment("direct?"); + OutStreamer->emitIntValue(std::get<3>(Tup), 1); } // Emit successor information. @@ -2009,14 +2018,25 @@ void AsmPrinter::emitFunctionBody() { // If it's direct, then we know the call's target from the first // operand alone. const MachineOperand CallOpnd = MI.getOperand(0); + std::optional DirectCall; MCSymbol *CallTargetSym = nullptr; if (CallOpnd.isGlobal()) { - // Direct call. + // Global: direct call, known target. + DirectCall = true; CallTargetSym = getSymbol(CallOpnd.getGlobal()); } else if (CallOpnd.isMCSymbol()) { - // Also a direct call. + // MCSymbol: direct call, known target. + DirectCall = true; CallTargetSym = CallOpnd.getMCSymbol(); - } // Otherwise it's an indirect call. + } else if (CallOpnd.isSymbol()) { + // Symbol: direct call, unknown target. + DirectCall = true; + // CallTargetSym remains null. + } else { + // Otherwise: indirect call, therefore unknown target. + DirectCall = false; + // CallTargetSym remains null. + } // Ensure we are only working with near calls. This matters because // Intel PT optimises near calls, and it simplifies our implementation @@ -2025,7 +2045,8 @@ void AsmPrinter::emitFunctionBody() { assert(!MF->getSubtarget().getInstrInfo()->isFarCall(MI)); assert(YkCallMarkerSyms.find(&MBB) != YkCallMarkerSyms.end()); - YkCallMarkerSyms[&MBB].push_back({YkPreCallSym, YkPostCallSym, CallTargetSym}); + YkCallMarkerSyms[&MBB].push_back({ + YkPreCallSym, YkPostCallSym, CallTargetSym, DirectCall.value()}); } else { emitInstruction(&MI); } diff --git a/llvm/test/CodeGen/X86/yk-basic-block-sections.ll b/llvm/test/CodeGen/X86/yk-basic-block-sections.ll new file mode 100644 index 00000000000000..8db6172c5f6c0f --- /dev/null +++ b/llvm/test/CodeGen/X86/yk-basic-block-sections.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels -yk-extended-llvmbbaddrmap-section -emulated-tls | FileCheck %s + +@G = thread_local global i32 0 + +declare void @foo(ptr) + +define void @bar() noinline { + ret void +} + +declare void @baz() + +define dso_local void @the_func(ptr %0) { + ; Note that the emulated TLS access will make an extra direct call with an + ; unknown target. + call void @foo(ptr @G) + call void @bar() + call void %0() + ret void +} + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.the_func{{$}} +; CHECK-NEXT: .byte 2 # version +; CHECK-NEXT: .byte 0 # feature +; CHECK-NEXT: .quad .Lfunc_begin1 # function address +; CHECK-NEXT: .byte 1 # number of basic blocks +; CHECK-NEXT: .byte 0 # BB id +; CHECK-NEXT: .uleb128 .Lfunc_begin1-.Lfunc_begin1 +; CHECK-NEXT: .uleb128 .LBB_END1_0-.Lfunc_begin1 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 1 # num corresponding blocks +; CHECK-NEXT: .byte 0 # corresponding block +; CHECK-NEXT: .byte 4 # num calls +; CHECK-NEXT: .quad .Lyk_precall0 # call offset +; CHECK-NEXT: .quad .Lyk_postcall0 # return offset +; CHECK-NEXT: .quad 0 # target offset +; CHECK-NEXT: .byte 1 # direct? +; CHECK-NEXT: .quad .Lyk_precall1 # call offset +; CHECK-NEXT: .quad .Lyk_postcall1 # return offset +; CHECK-NEXT: .quad foo # target offset +; CHECK-NEXT: .byte 1 # direct? +; CHECK-NEXT: .quad .Lyk_precall2 # call offset +; CHECK-NEXT: .quad .Lyk_postcall2 # return offset +; CHECK-NEXT: .quad bar # target offset +; CHECK-NEXT: .byte 1 # direct? +; CHECK-NEXT: .quad .Lyk_precall3 # call offset +; CHECK-NEXT: .quad .Lyk_postcall3 # return offset +; CHECK-NEXT: .quad 0 # target offset +; CHECK-NEXT: .byte 0 # direct? + +; FIXME: test our other extensions to the blockmap.