From f463bdbf44e6be44f90790e7f828a957716c34a9 Mon Sep 17 00:00:00 2001 From: Don Isaac Date: Mon, 20 Jan 2025 18:51:55 -0800 Subject: [PATCH] fix: free VirtualMachine's mimalloc arena in debug builds --- scripts/leaks.sh | 54 ++++++++++++++++++++++++++++++++++ src/bun.js/web_worker.zig | 5 +--- src/bun.zig | 3 ++ src/bun_js.zig | 3 +- src/cli.zig | 2 +- src/cli/run_command.zig | 22 +++++++++----- src/cli/test_command.zig | 9 ++++++ src/env.zig | 2 ++ src/resolver/dir_info.zig | 13 ++++++-- src/resolver/resolver.zig | 3 +- src/resolver/tsconfig_json.zig | 1 + 11 files changed, 101 insertions(+), 16 deletions(-) create mode 100755 scripts/leaks.sh diff --git a/scripts/leaks.sh b/scripts/leaks.sh new file mode 100755 index 00000000000000..bae02435bb27ba --- /dev/null +++ b/scripts/leaks.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# @file leaks.sh +# @summary build + run bun with `leaks`. All args are forwarded to `bun-debug`. +# +# @description +# This script builds bun, signs it with debug entitlements, and runs it with +# `leaks` After running, a log file describing any found leaks gets saved to +# `logs/` +# +# This script requires `leaks` (from xcode cli tools) and only works on MacOS. + +set -e -o pipefail + +BUN="bun-debug" +DEBUG_BUN="build/debug/${BUN}" + +# at least one argument is required +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [bun args]" + echo " $0 test [bun args]" + exit 1 +fi + +bun run build + +echo "Signing ${BUN} binary..." +codesign --entitlements $(realpath entitlements.debug.plist) --force --timestamp --sign - -vvvv --deep --strict ${DEBUG_BUN} + +export BUN_JSC_logJITCodeForPerf=1 +export BUN_JSC_collectExtraSamplingProfilerData=1 +export BUN_JSC_sampleCCode=1 +export BUN_JSC_alwaysGeneratePCToCodeOriginMap=1 +export BUN_GARBAGE_COLLECTOR_LEVEL=2 +# MiMalloc options. https://microsoft.github.io/mimalloc/environment.html +export MIMALLOC_SHOW_ERRORS=1 + + +commit=$(git rev-parse --short HEAD) +logfile="./logs/${BUN}-leaks-$1-${commit}.log" +mkdir -p "logs" || true +# forwards all arguments to bun-debug +echo "Running ${file_to_run}, logs will be saved to ${logfile}..." + +# must be set just before `leaks` runs, since these affect all executables. +# see: https://developer.apple.com/library/archive/documentation/Performance/Conceptual/ManagingMemory/Articles/MallocDebug.html +export MallocStackLogging=1 +export MallocScribble=1 # fill free'd memory with 0x55 +export MallocPreScribble=1 # fill alloc'd memory with 0xAA +export MallocGuardEdges=1 # add guard pages before and after large allocations +export MallocCheckHeapStart=1000 # validate heap after n malloc() calls +export MallocCheckHeapEach=100 # validate heap after every n malloc() calls + +leaks -atExit -- "./${DEBUG_BUN}" ${@:1} | tee "${logfile}" diff --git a/src/bun.js/web_worker.zig b/src/bun.js/web_worker.zig index c8ae517d906542..369bb68420d732 100644 --- a/src/bun.js/web_worker.zig +++ b/src/bun.js/web_worker.zig @@ -319,6 +319,7 @@ pub const WebWorker = struct { for (this.preloads) |preload| { bun.default_allocator.free(preload); } + if (this.arena) |*arena| arena.deinit(); bun.default_allocator.free(this.preloads); bun.default_allocator.destroy(this); } @@ -513,7 +514,6 @@ pub const WebWorker = struct { globalObject = vm.global; vm_to_deinit = vm; } - var arena = this.arena; WebWorker__dispatchExit(globalObject, cpp_worker, exit_code); if (loop) |loop_| { @@ -527,9 +527,6 @@ pub const WebWorker = struct { vm.deinit(); // NOTE: deinit here isn't implemented, so freeing workers will leak the vm. } bun.deleteAllPoolsForThreadExit(); - if (arena) |*arena_| { - arena_.deinit(); - } bun.exitThread(); } diff --git a/src/bun.zig b/src/bun.zig index 5e2d7b3e2badd0..c10df1323b4201 100644 --- a/src/bun.zig +++ b/src/bun.zig @@ -3123,6 +3123,9 @@ pub fn New(comptime T: type) type { bun.destroy(self); } + /// Allocate a new uninitialized instance of `T` using the default + /// global allocator. You must call `T.destroy()` to avoid leaking + /// memory. pub inline fn new(t: T) *T { return bun.new(T, t); } diff --git a/src/bun_js.zig b/src/bun_js.zig index 9c783ade3dce57..cfddc827dda5a2 100644 --- a/src/bun_js.zig +++ b/src/bun_js.zig @@ -170,7 +170,7 @@ pub const Run = struct { return bun.shell.Interpreter.initAndRunFromFile(ctx, mini, entry_path); } - pub fn boot(ctx: Command.Context, entry_path: string) !void { + pub fn boot(ctx: Command.Context, _arena: ?*Arena, entry_path: string) !void { JSC.markBinding(@src()); if (!ctx.debug.loaded_bunfig) { @@ -190,6 +190,7 @@ pub const Run = struct { js_ast.Expr.Data.Store.create(); js_ast.Stmt.Data.Store.create(); var arena = try Arena.init(); + if (_arena) |a| a.* = arena; run = .{ .vm = try VirtualMachine.init( diff --git a/src/cli.zig b/src/cli.zig index 36407e2bedd778..02932aa625843b 100644 --- a/src/cli.zig +++ b/src/cli.zig @@ -2191,7 +2191,7 @@ pub const Command = struct { var entry_point_buf: [bun.MAX_PATH_BYTES + trigger.len]u8 = undefined; const cwd = try std.posix.getcwd(&entry_point_buf); @memcpy(entry_point_buf[cwd.len..][0..trigger.len], trigger); - try BunJS.Run.boot(ctx, entry_point_buf[0 .. cwd.len + trigger.len]); + try BunJS.Run.boot(ctx, null, entry_point_buf[0 .. cwd.len + trigger.len]); return; } diff --git a/src/cli/run_command.zig b/src/cli/run_command.zig index a09dcb0e8f82d7..0d9b1d0d71cc86 100644 --- a/src/cli/run_command.zig +++ b/src/cli/run_command.zig @@ -804,6 +804,7 @@ pub const RunCommand = struct { const DirInfo = @import("../resolver/dir_info.zig"); pub fn configureEnvForRun( ctx: Command.Context, + /// Gets initialized this_transpiler: *transpiler.Transpiler, env: ?*DotEnv.Loader, log_errors: bool, @@ -1264,7 +1265,10 @@ pub const RunCommand = struct { fn _bootAndHandleError(ctx: Command.Context, path: string) bool { Global.configureAllocator(.{ .long_running = true }); - Run.boot(ctx, ctx.allocator.dupe(u8, path) catch return false) catch |err| { + var arena: bun.MimallocArena = .{}; + // leaking memory on exit is fine, but it makes leak detection more difficult. + defer if (comptime bun.Environment.detect_leaks) arena.deinit(); + Run.boot(ctx, &arena, ctx.allocator.dupe(u8, path) catch return false) catch |err| { ctx.log.print(Output.errorWriter()) catch {}; Output.prettyErrorln("error: Failed to run {s} due to error {s}", .{ @@ -1274,6 +1278,7 @@ pub const RunCommand = struct { bun.handleErrorReturnTrace(err, @errorReturnTrace()); Global.exit(1); }; + return true; } fn maybeOpenWithBunJS(ctx: Command.Context) bool { @@ -1334,7 +1339,7 @@ pub const RunCommand = struct { .err => return false, } - Global.configureAllocator(.{ .long_running = true }); + Global.configureAllocator(.{ .long_running = true }); // CHECKME absolute_script_path = brk: { if (comptime !Environment.isWindows) break :brk bun.getFdPath(file, &script_name_buf) catch return false; @@ -1400,7 +1405,10 @@ pub const RunCommand = struct { const force_using_bun = ctx.debug.run_in_bun; var ORIGINAL_PATH: string = ""; var this_transpiler: transpiler.Transpiler = undefined; - const root_dir_info = try configureEnvForRun(ctx, &this_transpiler, null, log_errors, false); + const root_dir_info: *DirInfo = try configureEnvForRun(ctx, &this_transpiler, null, log_errors, false); + defer if (comptime bun.Environment.detect_leaks) { + root_dir_info.deinit(); + }; try configurePathForRun(ctx, root_dir_info, &this_transpiler, &ORIGINAL_PATH, root_dir_info.abs_path, force_using_bun); this_transpiler.env.map.put("npm_command", "run-script") catch unreachable; @@ -1441,7 +1449,7 @@ pub const RunCommand = struct { @memcpy(entry_point_buf[cwd.len..][0..trigger.len], trigger); const entry_path = entry_point_buf[0 .. cwd.len + trigger.len]; - Run.boot(ctx, ctx.allocator.dupe(u8, entry_path) catch return false) catch |err| { + Run.boot(ctx, null, ctx.allocator.dupe(u8, entry_path) catch return false) catch |err| { ctx.log.print(Output.errorWriter()) catch {}; Output.prettyErrorln("error: Failed to run {s} due to error {s}", .{ @@ -1619,7 +1627,7 @@ pub const RunCommand = struct { var entry_point_buf: [bun.MAX_PATH_BYTES + trigger.len]u8 = undefined; const cwd = try std.posix.getcwd(&entry_point_buf); @memcpy(entry_point_buf[cwd.len..][0..trigger.len], trigger); - try Run.boot(ctx, entry_point_buf[0 .. cwd.len + trigger.len]); + try Run.boot(ctx, null, entry_point_buf[0 .. cwd.len + trigger.len]); return; } @@ -1649,7 +1657,7 @@ pub const RunCommand = struct { ); }; - Run.boot(ctx, normalized_filename) catch |err| { + Run.boot(ctx, null, normalized_filename) catch |err| { ctx.log.print(Output.errorWriter()) catch {}; Output.err(err, "Failed to run script \"{s}\"", .{std.fs.path.basename(normalized_filename)}); @@ -1724,7 +1732,7 @@ pub const BunXFastPath = struct { bun.reinterpretSlice(u8, &direct_launch_buffer), wpath, ) catch return; - Run.boot(ctx, utf8) catch |err| { + Run.boot(ctx, null, utf8) catch |err| { ctx.log.print(Output.errorWriter()) catch {}; Output.err(err, "Failed to run bin \"{s}\"", .{std.fs.path.basename(utf8)}); Global.exit(1); diff --git a/src/cli/test_command.zig b/src/cli/test_command.zig index d76218c15e77e7..07ea2111526702 100644 --- a/src/cli/test_command.zig +++ b/src/cli/test_command.zig @@ -1200,9 +1200,16 @@ pub const TestCommand = struct { var snapshot_values = Snapshots.ValuesHashMap.init(ctx.allocator); var snapshot_counts = bun.StringHashMap(usize).init(ctx.allocator); var inline_snapshots_to_write = std.AutoArrayHashMap(TestRunner.File.ID, std.ArrayList(Snapshots.InlineSnapshotToWrite)).init(ctx.allocator); + defer if (comptime bun.Environment.detect_leaks) { + snapshot_file_buf.deinit(); + snapshot_values.deinit(); + snapshot_counts.deinit(); + inline_snapshots_to_write.deinit(); + }; JSC.isBunTest = true; var reporter = try ctx.allocator.create(CommandLineReporter); + defer ctx.allocator.destroy(reporter); reporter.* = CommandLineReporter{ .jest = TestRunner{ .allocator = ctx.allocator, @@ -1244,6 +1251,7 @@ pub const TestCommand = struct { }; } + // FIXME: calling .deinit() on these causes compile errors js_ast.Expr.Data.Store.create(); js_ast.Stmt.Data.Store.create(); var vm = try JSC.VirtualMachine.init( @@ -1263,6 +1271,7 @@ pub const TestCommand = struct { .is_main_thread = true, }, ); + defer vm.deinit(); vm.argv = ctx.passthrough; vm.preload = ctx.preloads; vm.transpiler.options.rewrite_jest_for_tests = true; diff --git a/src/env.zig b/src/env.zig index 482d7d50589b38..b7d0421b7fccb1 100644 --- a/src/env.zig +++ b/src/env.zig @@ -45,6 +45,8 @@ pub const canary_revision = if (is_canary) build_options.canary_revision else "" pub const dump_source = isDebug and !isTest; pub const base_path = build_options.base_path; pub const enable_logs = build_options.enable_logs or isDebug; +/// We're trying to find memory leaks +pub const detect_leaks = isDebug or builtin.valgrind_support; pub const codegen_path = build_options.codegen_path; pub const codegen_embed = build_options.codegen_embed; diff --git a/src/resolver/dir_info.zig b/src/resolver/dir_info.zig index 5ff0767b54c0ca..2a94f76ad302c5 100644 --- a/src/resolver/dir_info.zig +++ b/src/resolver/dir_info.zig @@ -40,8 +40,12 @@ package_json_for_dependencies: ?*PackageJSON = null, abs_path: string = "", entries: Index = undefined, -package_json: ?*PackageJSON = null, // Is there a "package.json" file? -tsconfig_json: ?*TSConfigJSON = null, // Is there a "tsconfig.json" file in this directory or a parent directory? +/// Is there a "package.json" file? +/// +/// Allocated with `bun.default_allocator`. +package_json: ?*PackageJSON = null, +/// Is there a "tsconfig.json" file in this directory or a parent directory? +tsconfig_json: ?*TSConfigJSON = null, abs_real_path: string = "", // If non-empty, this is the real absolute path resolving any symlinks flags: Flags.Set = Flags.Set{}, @@ -119,6 +123,11 @@ pub fn getEnclosingBrowserScope(i: *const DirInfo) ?*DirInfo { return HashMap.instance.atIndex(i.enclosing_browser_scope); } +pub fn deinit(this: *DirInfo) void { + if (this.package_json) |package_json| package_json.destroy(); + if (this.tsconfig_json) |tsconfig_json| tsconfig_json.destroy(); +} + // Goal: Really fast, low allocation directory map exploiting cache locality where we don't worry about lifetimes much. // 1. Don't store the keys or values of directories that don't exist // 2. Don't expect a provided key to exist after it's queried diff --git a/src/resolver/resolver.zig b/src/resolver/resolver.zig index 17875431c2899b..5129d39fae532c 100644 --- a/src/resolver/resolver.zig +++ b/src/resolver/resolver.zig @@ -2521,7 +2521,7 @@ pub const Resolver = struct { ) orelse return null; } - return PackageJSON.new(pkg); + return PackageJSON.new(pkg); // FIXME: memory leak. Looks like both Transpiler and VirtualMachine parse package.json } fn dirInfoCached(r: *ThisResolver, path: string) !?*DirInfo { @@ -3874,6 +3874,7 @@ pub const Resolver = struct { fn dirInfoUncached( r: *ThisResolver, + // This gets initialized. info: *DirInfo, path: string, _entries: *Fs.FileSystem.RealFS.EntriesOption, diff --git a/src/resolver/tsconfig_json.zig b/src/resolver/tsconfig_json.zig index c4e40a70569ef4..c40bff190ea131 100644 --- a/src/resolver/tsconfig_json.zig +++ b/src/resolver/tsconfig_json.zig @@ -146,6 +146,7 @@ pub const TSConfigJSON = struct { return string_builder.allocatedSlice()[0 .. string_builder.len - 1]; } + /// Allocates. Call `tsconfig.destroy()` when done. pub fn parse( allocator: std.mem.Allocator, log: *logger.Log,