diff --git a/src/compiler.jl b/src/compiler.jl index 771dc896db..52ccb6290f 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -4366,6 +4366,9 @@ function GPUCompiler.codegen(output::Symbol, job::CompilerJob{<:EnzymeTarget}; mod, meta = GPUCompiler.codegen(:llvm, primal_job; optimize=false, toplevel=toplevel, cleanup=false, validate=false, parent_job=parent_job) prepare_llvm(mod, primal_job, meta) + for f in functions(mod) + permit_inlining!(f) + end LLVM.ModulePassManager() do pm API.AddPreserveNVVMPass!(pm, #=Begin=#true) diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl index f430830dd4..70ee9b6912 100644 --- a/src/compiler/optimize.jl +++ b/src/compiler/optimize.jl @@ -1690,6 +1690,7 @@ function post_optimze!(mod, tm, machine=true) addOptimizationPasses!(pm) run!(pm, mod) end + println(string(mod)) if machine # TODO enable validate_return_roots # validate_return_roots!(mod)