From dc63b98eda9774944641b31919ba06c9bcd42810 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Tue, 3 Dec 2024 06:04:47 -0500 Subject: [PATCH 1/9] Precompilation is cool, we should do more of it --- Project.toml | 1 + src/Enzyme.jl | 2 + src/compiler/validation.jl | 150 ------------------------------------- src/precompile.jl | 13 ++++ 4 files changed, 16 insertions(+), 150 deletions(-) create mode 100644 src/precompile.jl diff --git a/Project.toml b/Project.toml index 0a8ef9e338..7a2040f9ef 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" ObjectFile = "d8793406-e978-5875-9003-1fc021f44a92" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/src/Enzyme.jl b/src/Enzyme.jl index 942df0581c..92ec9a623f 100644 --- a/src/Enzyme.jl +++ b/src/Enzyme.jl @@ -1587,4 +1587,6 @@ Returns true if within autodiff, otherwise false. """ @inline EnzymeCore.within_autodiff() = false +include("precompile.jl") + end # module diff --git a/src/compiler/validation.jl b/src/compiler/validation.jl index e90f7d0712..391e194fb4 100644 --- a/src/compiler/validation.jl +++ b/src/compiler/validation.jl @@ -4,159 +4,11 @@ using Libdl module FFI using LLVM -module BLASSupport -# TODO: LAPACK handling -using LinearAlgebra -using ObjectFile -using Libdl -function __init__() - global blas_handle = Libdl.dlopen(BLAS.libblastrampoline) -end -function get_blas_symbols() - symbols = BLAS.get_config().exported_symbols - if BLAS.USE_BLAS64 - return map(Base.Fix2(*, "64_"), symbols) - end - return symbols -end - -function lookup_blas_symbol(name::String) - Libdl.dlsym(blas_handle::Ptr{Cvoid}, name; throw_error = false) -end -end - -const ptr_map = Dict{Ptr{Cvoid},String}() - -function __init__() - known_names = ( - "jl_alloc_array_1d", - "jl_alloc_array_2d", - "jl_alloc_array_3d", - "ijl_alloc_array_1d", - "ijl_alloc_array_2d", - "ijl_alloc_array_3d", - "jl_new_array", - "ijl_new_array", - "jl_array_copy", - "ijl_array_copy", - "jl_alloc_string", - "jl_in_threaded_region", - "jl_enter_threaded_region", - "jl_exit_threaded_region", - "jl_set_task_tid", - "jl_new_task", - "malloc", - "memmove", - "memcpy", - "memset", - "jl_array_grow_beg", - "ijl_array_grow_beg", - "jl_array_grow_end", - "ijl_array_grow_end", - "jl_array_grow_at", - "ijl_array_grow_at", - "jl_array_del_beg", - "ijl_array_del_beg", - "jl_array_del_end", - "ijl_array_del_end", - "jl_array_del_at", - "ijl_array_del_at", - "jl_array_ptr", - "ijl_array_ptr", - "jl_value_ptr", - "jl_get_ptls_states", - "jl_gc_add_finalizer_th", - "jl_symbol_n", - "jl_", - "jl_object_id", - "jl_reshape_array", - "ijl_reshape_array", - "jl_matching_methods", - "ijl_matching_methods", - "jl_array_sizehint", - "ijl_array_sizehint", - "jl_get_keyword_sorter", - "ijl_get_keyword_sorter", - "jl_ptr_to_array", - "jl_box_float32", - "ijl_box_float32", - "jl_box_float64", - "ijl_box_float64", - "jl_ptr_to_array_1d", - "jl_eqtable_get", - "ijl_eqtable_get", - "memcmp", - "memchr", - "jl_get_nth_field_checked", - "ijl_get_nth_field_checked", - "jl_stored_inline", - "ijl_stored_inline", - "jl_array_isassigned", - "ijl_array_isassigned", - "jl_array_ptr_copy", - "ijl_array_ptr_copy", - "jl_array_typetagdata", - "ijl_array_typetagdata", - "jl_idtable_rehash", - ) - for name in known_names - sym = LLVM.find_symbol(name) - if sym == C_NULL - continue - end - if haskey(ptr_map, sym) - # On MacOS memcpy and memmove seem to collide? - if name == "memcpy" - continue - end - end - @assert !haskey(ptr_map, sym) - ptr_map[sym] = name - end - for sym in BLASSupport.get_blas_symbols() - ptr = BLASSupport.lookup_blas_symbol(sym) - if ptr !== nothing - if haskey(ptr_map, ptr) - if ptr_map[ptr] != sym - @warn "Duplicated symbol in ptr_map" ptr, sym, ptr_map[ptr] - end - continue - end - ptr_map[ptr] = sym - end - end -end - -function memoize!(ptr::Ptr{Cvoid}, fn::String)::String - fn = get(ptr_map, ptr, fn) - if !haskey(ptr_map, ptr) - ptr_map[ptr] = fn - else - @assert ptr_map[ptr] == fn - end - return fn -end -end import GPUCompiler: IRError, InvalidIRError function restore_lookups(mod::LLVM.Module)::Nothing T_size_t = convert(LLVM.LLVMType, Int) - for (v, k) in FFI.ptr_map - if haskey(functions(mod), k) - f = functions(mod)[k] - replace_uses!( - f, - LLVM.Value( - LLVM.API.LLVMConstIntToPtr( - ConstantInt(T_size_t, convert(UInt, v)), - value_type(f), - ), - ), - ) - eraseInst(mod, f) - end - end for f in functions(mod) for fattr in collect(function_attributes(f)) if isa(fattr, LLVM.StringAttribute) @@ -648,8 +500,6 @@ function check_ir!(@nospecialize(job::CompilerJob), errors::Vector{IRError}, imp return errors end -const libjulia = Ref{Ptr{Cvoid}}(C_NULL) - # List of methods to location of arg which is the mi/function, then start of args const generic_method_offsets = Dict{String,Tuple{Int,Int}}(( "jl_f__apply_latest" => (2, 3), diff --git a/src/precompile.jl b/src/precompile.jl new file mode 100644 index 0000000000..6a8cdba7e0 --- /dev/null +++ b/src/precompile.jl @@ -0,0 +1,13 @@ +using PrecompileTools: @setup_workload, @compile_workload + +@setup_workload begin + precompile_module = @eval module $(gensym()) + f(x) = x^2 + end + + kernel() = nothing + + @compile_workload begin + Enzyme.autodiff(Reverse, precompile_module.f, Active(2.0)) + end +end \ No newline at end of file From 4669251100250c652f5530ccad94e8ca4ee953d0 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Tue, 3 Dec 2024 06:12:19 -0500 Subject: [PATCH 2/9] fix --- src/compiler/validation.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/compiler/validation.jl b/src/compiler/validation.jl index 391e194fb4..5299351a12 100644 --- a/src/compiler/validation.jl +++ b/src/compiler/validation.jl @@ -2,9 +2,6 @@ using LLVM using ObjectFile using Libdl -module FFI -using LLVM - import GPUCompiler: IRError, InvalidIRError function restore_lookups(mod::LLVM.Module)::Nothing From aca53ada70615266253252a1083b484afe5ebc51 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 10:26:06 -0500 Subject: [PATCH 3/9] tm stuff --- src/compiler/optimize.jl | 50 +++++++++++++++++++++++++--------------- src/compiler/orcv2.jl | 1 + 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl index f9769881a4..b8fc02bdba 100644 --- a/src/compiler/optimize.jl +++ b/src/compiler/optimize.jl @@ -54,15 +54,17 @@ end function run_jl_pipeline(pm::ModulePassManager, tm::LLVM.TargetMachine; kwargs...) config = Ref(pipeline_options(; kwargs...)) function jl_pipeline(m) - @dispose pb = NewPMPassBuilder() begin - add!(pb, NewPMModulePassManager()) do mpm - @ccall jl_build_newpm_pipeline( - mpm.ref::Ptr{Cvoid}, - pb.ref::Ptr{Cvoid}, - config::Ptr{PipelineConfig}, - )::Cvoid + if tm.ref != C_NULL + @dispose pb = NewPMPassBuilder() begin + add!(pb, NewPMModulePassManager()) do mpm + @ccall jl_build_newpm_pipeline( + mpm.ref::Ptr{Cvoid}, + pb.ref::Ptr{Cvoid}, + config::Ptr{PipelineConfig}, + )::Cvoid + end + LLVM.run!(mpm, m, tm) end - LLVM.run!(mpm, m, tm) end return true end @@ -207,7 +209,9 @@ end function loop_optimizations_tm!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachine) @static if true || VERSION < v"1.11-" - lower_simdloop_tm!(pm, tm) + if tm.ref != C_NULL + lower_simdloop_tm!(pm, tm) + end licm!(pm) if LLVM.version() >= v"15" simple_loop_unswitch_legacy!(pm) @@ -242,8 +246,10 @@ function more_loop_optimizations_tm!(pm::LLVM.ModulePassManager, tm::LLVM.Target loop_idiom!(pm) # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards - lower_simdloop_tm!(pm, tm) # Annotate loop marked with "loopinfo" as LLVM parallel loop - licm!(pm) + if tm.ref != C_NULL + lower_simdloop_tm!(pm, tm) # Annotate loop marked with "loopinfo" as LLVM parallel loop + licm!(pm) + end julia_licm_tm!(pm, tm) # Subsequent passes not stripping metadata from terminator instruction_combining!(pm) # TODO: createInstSimplifyLegacy @@ -456,8 +462,9 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) # then finish Julia GC ModulePassManager() do pm add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) - + if tm.ref != C_NULL + add_transform_info!(pm, tm) + end propagate_julia_addrsp_tm!(pm, tm) scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) @@ -477,7 +484,9 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) ModulePassManager() do pm add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) + if tm.ref != C_NULL + add_transform_info!(pm, tm) + end scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) @@ -491,8 +500,9 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) ModulePassManager() do pm add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) - + if tm.ref != C_NULL + add_transform_info!(pm, tm) + end scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) basic_alias_analysis!(pm) @@ -566,7 +576,9 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) # known functions ModulePassManager() do pm add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) + if tm.ref != C_NULL + add_transform_info!(pm, tm) + end scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) @@ -585,7 +597,9 @@ end # https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L603 function addTargetPasses!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachine, trip::String) add_library_info!(pm, trip) - add_transform_info!(pm, tm) + if tm.ref != C_NULL + add_transform_info!(pm, tm) + end end # https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L620 diff --git a/src/compiler/orcv2.jl b/src/compiler/orcv2.jl index 7588eddb78..c3f4d86b5e 100644 --- a/src/compiler/orcv2.jl +++ b/src/compiler/orcv2.jl @@ -96,6 +96,7 @@ function __init__() tempTM = LLVM.JITTargetMachine(LLVM.triple(), cpu_name(), cpu_features(); optlevel) LLVM.asm_verbosity!(tempTM, true) tm[] = tempTM + @show tm, tempTM lljit = JuliaOJIT() From 9ab86523cba5c5b50303910e34e44bd6d8ab26fa Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 18:50:30 -0500 Subject: [PATCH 4/9] ix attempt --- src/compiler.jl | 6 +----- src/compiler/optimize.jl | 8 ++++---- src/compiler/orcv2.jl | 17 ++++++++++------- src/precompile.jl | 6 +++--- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/compiler.jl b/src/compiler.jl index 5eccd1e80d..b72a507b96 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -4047,11 +4047,7 @@ end ) end - if primal_job.config.target isa GPUCompiler.NativeCompilerTarget - target_machine = JIT.get_tm() - else - target_machine = GPUCompiler.llvm_machine(primal_job.config.target) - end + target_machine = GPUCompiler.llvm_machine(primal_job.config.target) parallel = parent_job === nothing ? Threads.nthreads() > 1 : false process_module = false diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl index b8fc02bdba..de26378f36 100644 --- a/src/compiler/optimize.jl +++ b/src/compiler/optimize.jl @@ -463,19 +463,19 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) ModulePassManager() do pm add_library_info!(pm, triple(mod)) if tm.ref != C_NULL - add_transform_info!(pm, tm) + #add_transform_info!(pm, tm) end - propagate_julia_addrsp_tm!(pm, tm) + #propagate_julia_addrsp_tm!(pm, tm) scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) basic_alias_analysis!(pm) cfgsimplification!(pm) dce!(pm) - cpu_features_tm!(pm, tm) + #cpu_features_tm!(pm, tm) scalar_repl_aggregates_ssa!(pm) # SSA variant? mem_cpy_opt!(pm) always_inliner!(pm) - alloc_opt_tm!(pm, tm) + #alloc_opt_tm!(pm, tm) LLVM.run!(pm, mod) end diff --git a/src/compiler/orcv2.jl b/src/compiler/orcv2.jl index c3f4d86b5e..4734f7b398 100644 --- a/src/compiler/orcv2.jl +++ b/src/compiler/orcv2.jl @@ -83,7 +83,7 @@ function define_absolute_symbol(jd, name) return false end -function __init__() +function setup_globals() opt_level = Base.JLOptions().opt_level if opt_level < 2 optlevel = LLVM.API.LLVMCodeGenLevelNone @@ -96,7 +96,6 @@ function __init__() tempTM = LLVM.JITTargetMachine(LLVM.triple(), cpu_name(), cpu_features(); optlevel) LLVM.asm_verbosity!(tempTM, true) tm[] = tempTM - @show tm, tempTM lljit = JuliaOJIT() @@ -106,11 +105,6 @@ function __init__() dg = LLVM.CreateDynamicLibrarySearchGeneratorForProcess(prefix) LLVM.add!(jd_main, dg) - if Sys.iswindows() && Int === Int64 - # TODO can we check isGNU? - define_absolute_symbol(jd_main, mangle(lljit, "___chkstk_ms")) - end - es = ExecutionSession(lljit) try lctm = LLVM.LocalLazyCallThroughManager(triple(lljit), es) @@ -120,6 +114,15 @@ function __init__() @warn "OrcV2 initialization failed with" err jit[] = CompilerInstance(lljit, nothing, nothing) end +end + +function __init__() + setup_globals() + + if Sys.iswindows() && Int === Int64 + # TODO can we check isGNU? + define_absolute_symbol(jd_main, mangle(lljit, "___chkstk_ms")) + end hnd = unsafe_load(cglobal(:jl_libjulia_handle, Ptr{Cvoid})) for (k, v) in Compiler.JuliaGlobalNameMap diff --git a/src/precompile.jl b/src/precompile.jl index 6a8cdba7e0..c20eaac149 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -4,10 +4,10 @@ using PrecompileTools: @setup_workload, @compile_workload precompile_module = @eval module $(gensym()) f(x) = x^2 end - - kernel() = nothing + + Compiler.JIT.setup_globals() @compile_workload begin Enzyme.autodiff(Reverse, precompile_module.f, Active(2.0)) end -end \ No newline at end of file +end From bf22c75a062c448a5fb48a3a597157e5369e85b5 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 18:54:07 -0500 Subject: [PATCH 5/9] reset --- src/compiler.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/compiler.jl b/src/compiler.jl index b72a507b96..5eccd1e80d 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -4047,7 +4047,11 @@ end ) end - target_machine = GPUCompiler.llvm_machine(primal_job.config.target) + if primal_job.config.target isa GPUCompiler.NativeCompilerTarget + target_machine = JIT.get_tm() + else + target_machine = GPUCompiler.llvm_machine(primal_job.config.target) + end parallel = parent_job === nothing ? Threads.nthreads() > 1 : false process_module = false From 97c09b43eba0c67aa2de8a15ec8ed2b71f60c657 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 19:02:07 -0500 Subject: [PATCH 6/9] more --- src/compiler/orcv2.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/orcv2.jl b/src/compiler/orcv2.jl index 4734f7b398..11b3c77c7e 100644 --- a/src/compiler/orcv2.jl +++ b/src/compiler/orcv2.jl @@ -114,10 +114,12 @@ function setup_globals() @warn "OrcV2 initialization failed with" err jit[] = CompilerInstance(lljit, nothing, nothing) end + + jd_main end function __init__() - setup_globals() + jd_main = setup_globals() if Sys.iswindows() && Int === Int64 # TODO can we check isGNU? From 47d88138e175bfbdb1dfa03c5b1f92c95ced32c8 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 19:08:22 -0500 Subject: [PATCH 7/9] ix --- src/compiler/orcv2.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/orcv2.jl b/src/compiler/orcv2.jl index 11b3c77c7e..1640b05db2 100644 --- a/src/compiler/orcv2.jl +++ b/src/compiler/orcv2.jl @@ -115,11 +115,11 @@ function setup_globals() jit[] = CompilerInstance(lljit, nothing, nothing) end - jd_main + jd_main, lljit end function __init__() - jd_main = setup_globals() + jd_main, lljit = setup_globals() if Sys.iswindows() && Int === Int64 # TODO can we check isGNU? From d38f1a4b8ffd906eab109e54e7ce983f34702814 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 19:16:29 -0500 Subject: [PATCH 8/9] reduce --- src/compiler/optimize.jl | 56 +++++++++++++++------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl index de26378f36..f9769881a4 100644 --- a/src/compiler/optimize.jl +++ b/src/compiler/optimize.jl @@ -54,17 +54,15 @@ end function run_jl_pipeline(pm::ModulePassManager, tm::LLVM.TargetMachine; kwargs...) config = Ref(pipeline_options(; kwargs...)) function jl_pipeline(m) - if tm.ref != C_NULL - @dispose pb = NewPMPassBuilder() begin - add!(pb, NewPMModulePassManager()) do mpm - @ccall jl_build_newpm_pipeline( - mpm.ref::Ptr{Cvoid}, - pb.ref::Ptr{Cvoid}, - config::Ptr{PipelineConfig}, - )::Cvoid - end - LLVM.run!(mpm, m, tm) + @dispose pb = NewPMPassBuilder() begin + add!(pb, NewPMModulePassManager()) do mpm + @ccall jl_build_newpm_pipeline( + mpm.ref::Ptr{Cvoid}, + pb.ref::Ptr{Cvoid}, + config::Ptr{PipelineConfig}, + )::Cvoid end + LLVM.run!(mpm, m, tm) end return true end @@ -209,9 +207,7 @@ end function loop_optimizations_tm!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachine) @static if true || VERSION < v"1.11-" - if tm.ref != C_NULL - lower_simdloop_tm!(pm, tm) - end + lower_simdloop_tm!(pm, tm) licm!(pm) if LLVM.version() >= v"15" simple_loop_unswitch_legacy!(pm) @@ -246,10 +242,8 @@ function more_loop_optimizations_tm!(pm::LLVM.ModulePassManager, tm::LLVM.Target loop_idiom!(pm) # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards - if tm.ref != C_NULL - lower_simdloop_tm!(pm, tm) # Annotate loop marked with "loopinfo" as LLVM parallel loop - licm!(pm) - end + lower_simdloop_tm!(pm, tm) # Annotate loop marked with "loopinfo" as LLVM parallel loop + licm!(pm) julia_licm_tm!(pm, tm) # Subsequent passes not stripping metadata from terminator instruction_combining!(pm) # TODO: createInstSimplifyLegacy @@ -462,20 +456,19 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) # then finish Julia GC ModulePassManager() do pm add_library_info!(pm, triple(mod)) - if tm.ref != C_NULL - #add_transform_info!(pm, tm) - end - #propagate_julia_addrsp_tm!(pm, tm) + add_transform_info!(pm, tm) + + propagate_julia_addrsp_tm!(pm, tm) scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) basic_alias_analysis!(pm) cfgsimplification!(pm) dce!(pm) - #cpu_features_tm!(pm, tm) + cpu_features_tm!(pm, tm) scalar_repl_aggregates_ssa!(pm) # SSA variant? mem_cpy_opt!(pm) always_inliner!(pm) - #alloc_opt_tm!(pm, tm) + alloc_opt_tm!(pm, tm) LLVM.run!(pm, mod) end @@ -484,9 +477,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) ModulePassManager() do pm add_library_info!(pm, triple(mod)) - if tm.ref != C_NULL - add_transform_info!(pm, tm) - end + add_transform_info!(pm, tm) scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) @@ -500,9 +491,8 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) ModulePassManager() do pm add_library_info!(pm, triple(mod)) - if tm.ref != C_NULL - add_transform_info!(pm, tm) - end + add_transform_info!(pm, tm) + scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) basic_alias_analysis!(pm) @@ -576,9 +566,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine) # known functions ModulePassManager() do pm add_library_info!(pm, triple(mod)) - if tm.ref != C_NULL - add_transform_info!(pm, tm) - end + add_transform_info!(pm, tm) scoped_no_alias_aa!(pm) type_based_alias_analysis!(pm) @@ -597,9 +585,7 @@ end # https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L603 function addTargetPasses!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachine, trip::String) add_library_info!(pm, trip) - if tm.ref != C_NULL - add_transform_info!(pm, tm) - end + add_transform_info!(pm, tm) end # https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L620 From 8035ee04838184d13b2bea8da85c4c36638d1568 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 3 Dec 2024 19:19:45 -0500 Subject: [PATCH 9/9] fix --- src/compiler/validation.jl | 153 +++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/src/compiler/validation.jl b/src/compiler/validation.jl index 5299351a12..e90f7d0712 100644 --- a/src/compiler/validation.jl +++ b/src/compiler/validation.jl @@ -2,10 +2,161 @@ using LLVM using ObjectFile using Libdl +module FFI +using LLVM +module BLASSupport +# TODO: LAPACK handling +using LinearAlgebra +using ObjectFile +using Libdl +function __init__() + global blas_handle = Libdl.dlopen(BLAS.libblastrampoline) +end +function get_blas_symbols() + symbols = BLAS.get_config().exported_symbols + if BLAS.USE_BLAS64 + return map(Base.Fix2(*, "64_"), symbols) + end + return symbols +end + +function lookup_blas_symbol(name::String) + Libdl.dlsym(blas_handle::Ptr{Cvoid}, name; throw_error = false) +end +end + +const ptr_map = Dict{Ptr{Cvoid},String}() + +function __init__() + known_names = ( + "jl_alloc_array_1d", + "jl_alloc_array_2d", + "jl_alloc_array_3d", + "ijl_alloc_array_1d", + "ijl_alloc_array_2d", + "ijl_alloc_array_3d", + "jl_new_array", + "ijl_new_array", + "jl_array_copy", + "ijl_array_copy", + "jl_alloc_string", + "jl_in_threaded_region", + "jl_enter_threaded_region", + "jl_exit_threaded_region", + "jl_set_task_tid", + "jl_new_task", + "malloc", + "memmove", + "memcpy", + "memset", + "jl_array_grow_beg", + "ijl_array_grow_beg", + "jl_array_grow_end", + "ijl_array_grow_end", + "jl_array_grow_at", + "ijl_array_grow_at", + "jl_array_del_beg", + "ijl_array_del_beg", + "jl_array_del_end", + "ijl_array_del_end", + "jl_array_del_at", + "ijl_array_del_at", + "jl_array_ptr", + "ijl_array_ptr", + "jl_value_ptr", + "jl_get_ptls_states", + "jl_gc_add_finalizer_th", + "jl_symbol_n", + "jl_", + "jl_object_id", + "jl_reshape_array", + "ijl_reshape_array", + "jl_matching_methods", + "ijl_matching_methods", + "jl_array_sizehint", + "ijl_array_sizehint", + "jl_get_keyword_sorter", + "ijl_get_keyword_sorter", + "jl_ptr_to_array", + "jl_box_float32", + "ijl_box_float32", + "jl_box_float64", + "ijl_box_float64", + "jl_ptr_to_array_1d", + "jl_eqtable_get", + "ijl_eqtable_get", + "memcmp", + "memchr", + "jl_get_nth_field_checked", + "ijl_get_nth_field_checked", + "jl_stored_inline", + "ijl_stored_inline", + "jl_array_isassigned", + "ijl_array_isassigned", + "jl_array_ptr_copy", + "ijl_array_ptr_copy", + "jl_array_typetagdata", + "ijl_array_typetagdata", + "jl_idtable_rehash", + ) + for name in known_names + sym = LLVM.find_symbol(name) + if sym == C_NULL + continue + end + if haskey(ptr_map, sym) + # On MacOS memcpy and memmove seem to collide? + if name == "memcpy" + continue + end + end + @assert !haskey(ptr_map, sym) + ptr_map[sym] = name + end + for sym in BLASSupport.get_blas_symbols() + ptr = BLASSupport.lookup_blas_symbol(sym) + if ptr !== nothing + if haskey(ptr_map, ptr) + if ptr_map[ptr] != sym + @warn "Duplicated symbol in ptr_map" ptr, sym, ptr_map[ptr] + end + continue + end + ptr_map[ptr] = sym + end + end +end + +function memoize!(ptr::Ptr{Cvoid}, fn::String)::String + fn = get(ptr_map, ptr, fn) + if !haskey(ptr_map, ptr) + ptr_map[ptr] = fn + else + @assert ptr_map[ptr] == fn + end + return fn +end +end + import GPUCompiler: IRError, InvalidIRError function restore_lookups(mod::LLVM.Module)::Nothing T_size_t = convert(LLVM.LLVMType, Int) + for (v, k) in FFI.ptr_map + if haskey(functions(mod), k) + f = functions(mod)[k] + replace_uses!( + f, + LLVM.Value( + LLVM.API.LLVMConstIntToPtr( + ConstantInt(T_size_t, convert(UInt, v)), + value_type(f), + ), + ), + ) + eraseInst(mod, f) + end + end for f in functions(mod) for fattr in collect(function_attributes(f)) if isa(fattr, LLVM.StringAttribute) @@ -497,6 +648,8 @@ function check_ir!(@nospecialize(job::CompilerJob), errors::Vector{IRError}, imp return errors end +const libjulia = Ref{Ptr{Cvoid}}(C_NULL) + # List of methods to location of arg which is the mi/function, then start of args const generic_method_offsets = Dict{String,Tuple{Int,Int}}(( "jl_f__apply_latest" => (2, 3),