From 4326f4fedf85ee081c265ad8290a2b83a524ccad Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 8 Jan 2025 01:06:48 +0000 Subject: [PATCH 1/2] Include a fingerprint of the specific arguments in mangled names. Instead of including the raw index of the specific, which is unstable across files and across unrelated changes, use a fingerprint of the constant values of the specific arguments. This is a placeholder until we decide on how we want to mangle specific functions. --- toolchain/lower/mangler.cpp | 12 +++++-- toolchain/lower/mangler.h | 5 +++ .../testdata/function/generic/call.carbon | 16 ++++----- .../function/generic/call_method.carbon | 4 +-- toolchain/sem_ir/inst_fingerprinter.cpp | 35 +++++++++++++++---- toolchain/sem_ir/inst_fingerprinter.h | 3 ++ 6 files changed, 55 insertions(+), 20 deletions(-) diff --git a/toolchain/lower/mangler.cpp b/toolchain/lower/mangler.cpp index 16ceb05d72574..8d32a015b7979 100644 --- a/toolchain/lower/mangler.cpp +++ b/toolchain/lower/mangler.cpp @@ -139,10 +139,16 @@ auto Mangler::Mangle(SemIR::FunctionId function_id, MangleInverseQualifiedNameScope(os, function.parent_scope_id); - // TODO: Add proper support for generic entities. The ID we emit here will not - // be consistent across object files. + // TODO: Add proper support for mangling generic entities. For now we use a + // fingerprint of the specific arguments, which should be stable across files, + // but isn't necessarily stable across toolchain changes. if (specific_id.is_valid()) { - os << "." << specific_id.index; + os << "."; + llvm::write_hex( + os, + fingerprinter_.GetOrCompute( + &sem_ir(), sem_ir().specifics().Get(specific_id).args_id), + llvm::HexPrintStyle::Lower, 16); } return os.str(); diff --git a/toolchain/lower/mangler.h b/toolchain/lower/mangler.h index 377b741a5be6f..fa582abf89fb6 100644 --- a/toolchain/lower/mangler.h +++ b/toolchain/lower/mangler.h @@ -10,6 +10,7 @@ #include "toolchain/lower/file_context.h" #include "toolchain/sem_ir/constant.h" #include "toolchain/sem_ir/ids.h" +#include "toolchain/sem_ir/inst_fingerprinter.h" namespace Carbon::Lower { @@ -49,6 +50,10 @@ class Mangler { } FileContext& file_context_; + + // TODO: If `file_context_` has an `InstNamer`, we could share its + // fingerprinter. + SemIR::InstFingerprinter fingerprinter_; }; } // namespace Carbon::Lower diff --git a/toolchain/lower/testdata/function/generic/call.carbon b/toolchain/lower/testdata/function/generic/call.carbon index 46b827f525474..452402fb8adbe 100644 --- a/toolchain/lower/testdata/function/generic/call.carbon +++ b/toolchain/lower/testdata/function/generic/call.carbon @@ -44,11 +44,11 @@ fn G() { // CHECK:STDOUT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %d.var, ptr align 1 @D.val.loc19_16, i64 0, i1 false), !dbg !9 // CHECK:STDOUT: call void @llvm.lifetime.start.p0(i64 4, ptr %n.var), !dbg !7 // CHECK:STDOUT: store i32 0, ptr %n.var, align 4, !dbg !10 -// CHECK:STDOUT: call void @_CF.Main.118(ptr %c.var), !dbg !11 -// CHECK:STDOUT: call void @_CF.Main.119(ptr %d.var), !dbg !12 +// CHECK:STDOUT: call void @_CF.Main.15b1f98bd9cc0c5b(ptr %c.var), !dbg !11 +// CHECK:STDOUT: call void @_CF.Main.2cc450fc05045897(ptr %d.var), !dbg !12 // CHECK:STDOUT: %.loc24 = load i32, ptr %n.var, align 4, !dbg !13 -// CHECK:STDOUT: call void @_CF.Main.120(i32 %.loc24), !dbg !14 -// CHECK:STDOUT: call void @_CF.Main.121(%type zeroinitializer), !dbg !15 +// CHECK:STDOUT: call void @_CF.Main.9b813875c98e31f1(i32 %.loc24), !dbg !14 +// CHECK:STDOUT: call void @_CF.Main.5754c7a55c7cbe4a(%type zeroinitializer), !dbg !15 // CHECK:STDOUT: ret void, !dbg !16 // CHECK:STDOUT: } // CHECK:STDOUT: @@ -58,13 +58,13 @@ fn G() { // CHECK:STDOUT: ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) // CHECK:STDOUT: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1 // CHECK:STDOUT: -// CHECK:STDOUT: declare void @_CF.Main.118(ptr) +// CHECK:STDOUT: declare void @_CF.Main.15b1f98bd9cc0c5b(ptr) // CHECK:STDOUT: -// CHECK:STDOUT: declare void @_CF.Main.119(ptr) +// CHECK:STDOUT: declare void @_CF.Main.2cc450fc05045897(ptr) // CHECK:STDOUT: -// CHECK:STDOUT: declare void @_CF.Main.120(i32) +// CHECK:STDOUT: declare void @_CF.Main.9b813875c98e31f1(i32) // CHECK:STDOUT: -// CHECK:STDOUT: declare void @_CF.Main.121(%type) +// CHECK:STDOUT: declare void @_CF.Main.5754c7a55c7cbe4a(%type) // CHECK:STDOUT: // CHECK:STDOUT: ; uselistorder directives // CHECK:STDOUT: uselistorder ptr @llvm.lifetime.start.p0, { 2, 1, 0 } diff --git a/toolchain/lower/testdata/function/generic/call_method.carbon b/toolchain/lower/testdata/function/generic/call_method.carbon index 9dcd371b8628e..aeb8b2180e130 100644 --- a/toolchain/lower/testdata/function/generic/call_method.carbon +++ b/toolchain/lower/testdata/function/generic/call_method.carbon @@ -34,7 +34,7 @@ fn CallF() -> i32 { // CHECK:STDOUT: call void @llvm.lifetime.start.p0(i64 4, ptr %n.var), !dbg !7 // CHECK:STDOUT: store i32 0, ptr %n.var, align 4, !dbg !9 // CHECK:STDOUT: %.loc20_14 = load i32, ptr %n.var, align 4, !dbg !10 -// CHECK:STDOUT: %F.call = call i32 @_CF.C.Main.118(ptr %c.var, i32 %.loc20_14), !dbg !11 +// CHECK:STDOUT: %F.call = call i32 @_CF.C.Main.9b813875c98e31f1(ptr %c.var, i32 %.loc20_14), !dbg !11 // CHECK:STDOUT: ret i32 %F.call, !dbg !12 // CHECK:STDOUT: } // CHECK:STDOUT: @@ -44,7 +44,7 @@ fn CallF() -> i32 { // CHECK:STDOUT: ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) // CHECK:STDOUT: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1 // CHECK:STDOUT: -// CHECK:STDOUT: declare i32 @_CF.C.Main.118(ptr, i32) +// CHECK:STDOUT: declare i32 @_CF.C.Main.9b813875c98e31f1(ptr, i32) // CHECK:STDOUT: // CHECK:STDOUT: ; uselistorder directives // CHECK:STDOUT: uselistorder ptr @llvm.lifetime.start.p0, { 1, 0 } diff --git a/toolchain/sem_ir/inst_fingerprinter.cpp b/toolchain/sem_ir/inst_fingerprinter.cpp index 8140f7dbbc1d5..56ba1d89ec476 100644 --- a/toolchain/sem_ir/inst_fingerprinter.cpp +++ b/toolchain/sem_ir/inst_fingerprinter.cpp @@ -17,7 +17,7 @@ namespace Carbon::SemIR { namespace { struct Worklist { // The file containing the instruction we're currently processing. - const File* sem_ir; + const File* sem_ir = nullptr; // The instructions we need to compute fingerprints for. llvm::SmallVector> todo; // The contents of the current instruction as accumulated so far. This is used @@ -28,6 +28,15 @@ struct Worklist { // the cache if not already present. Map, uint64_t>* fingerprints; + // Prepare to fingerprint a new instruction. + auto Prepare(const File* file) -> void { + sem_ir = file; + contents.clear(); + } + + // Finish fingerprinting and compute the fingerprint. + auto Finish() -> uint64_t { return llvm::stable_hash_combine(contents); } + // Add an invalid marker to the contents. This is used when the entity // contains an invalid ID. This uses an arbitrary fixed value that is assumed // to be unlikely to collide with a valid value. @@ -306,8 +315,7 @@ struct Worklist { auto [arg0_kind, arg1_kind] = inst.ArgKinds(); // Prepare to fingerprint this instruction. - sem_ir = next_sem_ir; - contents.clear(); + Prepare(next_sem_ir); // Add the instruction's fields to the contents. Add(inst.kind()); @@ -326,8 +334,7 @@ struct Worklist { // pop it from the todo list. Otherwise, we leave it on the todo list so // we can compute its fingerprint once we've finished the work we added. if (todo.size() == init_size) { - auto fingerprint = llvm::stable_hash_combine(contents); - fingerprints->Insert(std::pair(next_sem_ir, next_inst_id), fingerprint); + fingerprints->Insert(std::pair(next_sem_ir, next_inst_id), Finish()); todo.pop_back(); } } @@ -337,11 +344,25 @@ struct Worklist { auto InstFingerprinter::GetOrCompute(const File* file, InstId inst_id) -> uint64_t { - Worklist worklist = {.sem_ir = nullptr, - .todo = {{file, inst_id}}, + Worklist worklist = {.todo = {{file, inst_id}}, .fingerprints = &fingerprints_}; worklist.Run(); return fingerprints_.Lookup(std::pair(file, inst_id)).value(); } +auto InstFingerprinter::GetOrCompute(const File* file, + InstBlockId inst_block_id) -> uint64_t { + Worklist worklist = {.todo = {}, .fingerprints = &fingerprints_}; + worklist.Prepare(file); + worklist.Add(inst_block_id); + if (!worklist.todo.empty()) { + worklist.Run(); + worklist.Prepare(file); + worklist.Add(inst_block_id); + } + CARBON_CHECK(worklist.todo.empty(), + "Should not require more than two passes."); + return worklist.Finish(); +} + } // namespace Carbon::SemIR diff --git a/toolchain/sem_ir/inst_fingerprinter.h b/toolchain/sem_ir/inst_fingerprinter.h index ff80ae9c7c81f..adabf1276a624 100644 --- a/toolchain/sem_ir/inst_fingerprinter.h +++ b/toolchain/sem_ir/inst_fingerprinter.h @@ -17,6 +17,9 @@ class InstFingerprinter { // Gets or computes a fingerprint for the given instruction. auto GetOrCompute(const File* file, InstId inst_id) -> uint64_t; + // Gets or computes a fingerprint for the given instruction block. + auto GetOrCompute(const File* file, InstBlockId inst_block_id) -> uint64_t; + private: // The fingerprint for each instruction that has had its fingerprint computed, // indexed by the InstId's index. From 5b45ee666d83b7274ee45f9517cde38a61a3b948 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 8 Jan 2025 19:13:08 +0000 Subject: [PATCH 2/2] Preserve the most recent hash to avoid a lookup. Add TODO to try caching inst block fingerprints. --- toolchain/sem_ir/inst_fingerprinter.cpp | 24 +++++++++++++++++------- toolchain/sem_ir/inst_fingerprinter.h | 6 ++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/toolchain/sem_ir/inst_fingerprinter.cpp b/toolchain/sem_ir/inst_fingerprinter.cpp index 56ba1d89ec476..b705e428bc212 100644 --- a/toolchain/sem_ir/inst_fingerprinter.cpp +++ b/toolchain/sem_ir/inst_fingerprinter.cpp @@ -295,15 +295,22 @@ struct Worklist { table[kind.ToIndex()](*this, arg); } - // Ensure all the instructions on the todo list have fingerprints. - auto Run() -> void { - while (!todo.empty()) { + // Ensure all the instructions on the todo list have fingerprints. To avoid a + // re-lookup, returns the fingerprint of the first instruction on the todo + // list, and requires the todo list to be non-empty. + auto Run() -> uint64_t { + CARBON_CHECK(!todo.empty()); + while (true) { auto [next_sem_ir, next_inst_id] = todo.back(); // If we already have a fingerprint for this instruction, we have nothing // to do. Just pop it from `todo`. - if (fingerprints->Contains(std::pair(next_sem_ir, next_inst_id))) { + if (auto lookup = + fingerprints->Lookup(std::pair(next_sem_ir, next_inst_id))) { todo.pop_back(); + if (todo.empty()) { + return lookup.value(); + } continue; } @@ -334,8 +341,12 @@ struct Worklist { // pop it from the todo list. Otherwise, we leave it on the todo list so // we can compute its fingerprint once we've finished the work we added. if (todo.size() == init_size) { - fingerprints->Insert(std::pair(next_sem_ir, next_inst_id), Finish()); + uint64_t fingerprint = Finish(); + fingerprints->Insert(std::pair(next_sem_ir, next_inst_id), fingerprint); todo.pop_back(); + if (todo.empty()) { + return fingerprint; + } } } } @@ -346,8 +357,7 @@ auto InstFingerprinter::GetOrCompute(const File* file, InstId inst_id) -> uint64_t { Worklist worklist = {.todo = {{file, inst_id}}, .fingerprints = &fingerprints_}; - worklist.Run(); - return fingerprints_.Lookup(std::pair(file, inst_id)).value(); + return worklist.Run(); } auto InstFingerprinter::GetOrCompute(const File* file, diff --git a/toolchain/sem_ir/inst_fingerprinter.h b/toolchain/sem_ir/inst_fingerprinter.h index adabf1276a624..23ffbf386b3be 100644 --- a/toolchain/sem_ir/inst_fingerprinter.h +++ b/toolchain/sem_ir/inst_fingerprinter.h @@ -23,6 +23,12 @@ class InstFingerprinter { private: // The fingerprint for each instruction that has had its fingerprint computed, // indexed by the InstId's index. + // + // TODO: Experiment with also caching fingerprints for instruction blocks once + // we can get realistic performance measurements for this. This would simplify + // the `GetOrCompute` overload for `InstBlockId`s, and may save some work if + // the same canonical inst block is used by multiple instructions, for example + // as a specific argument list. Map, uint64_t> fingerprints_; };