Skip to content

Commit

Permalink
Shadowalloc (#2218)
Browse files Browse the repository at this point in the history
* Zero shadow alloc gc before primal

* fix

* Fix

* only npointers != 0

* pointerfree

* fix

* Update compiler.jl

* Update compiler.jl

* Update compiler.jl

* fix

* datatype

* Update compiler.jl

* Update compiler.jl
  • Loading branch information
wsmoses authored Dec 24, 2024
1 parent e2551ac commit 8d892e7
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 8 deletions.
113 changes: 105 additions & 8 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -594,26 +594,112 @@ function julia_undef_value_for_type(
throw(AssertionError("Unknown type to val: $(Ty)"))
end

function shadow_alloc_rewrite(V::LLVM.API.LLVMValueRef, gutils::API.EnzymeGradientUtilsRef)
function create_recursive_stores(B::LLVM.IRBuilder, @nospecialize(Ty::DataType), @nospecialize(prev::LLVM.Value))::Nothing
if Base.datatype_pointerfree(Ty)
return
end

isboxed_ref = Ref{Bool}()
LLVMType = LLVM.LLVMType(ccall(:jl_type_to_llvm, LLVM.API.LLVMTypeRef,
(Any, LLVM.Context, Ptr{Bool}), Ty, LLVM.context(), isboxed_ref))

if !isboxed_ref[]
zeroAll = false
T_int64 = LLVM.Int64Type()
prev = bitcast!(B, prev, LLVM.PointerType(LLVMType, addrspace(value_type(prev))))
prev = addrspacecast!(B, prev, LLVM.PointerType(LLVMType, Derived))
zero_single_allocation(B, Ty, LLVMType, prev, zeroAll, LLVM.ConstantInt(T_int64, 0); atomic=true)
else
@assert fieldcount(Ty) != 0

T_jlvalue = LLVM.StructType(LLVM.LLVMType[])
T_prjlvalue = LLVM.PointerType(T_jlvalue, Tracked)

T_int8 = LLVM.Int8Type()
T_int64 = LLVM.Int64Type()

T_pint8 = LLVM.PointerType(T_int8)

prev2 = bitcast!(B, prev, LLVM.PointerType(T_int8, addrspace(value_type(prev))))

for i in 1:fieldcount(Ty)
Ty2 = fieldtype(Ty, i)
off = fieldoffset(Ty, i)

if Ty2 <: DataType && Base.datatype_pointerfree(Ty2)
continue
end

prev3 = inbounds_gep!(
B,
T_int8,
prev2,
LLVM.Value[LLVM.ConstantInt(Int64(off))],
)

fallback = Base.isabstracttype(Ty2) || Ty2 isa Union

@static if VERSION < v"1.11-"
fallback |= Ty2 <: Array
else
fallback |= Ty2 <: GenericMemory
end

if fallback
Ty2 = Any
zeroAll = false
prev3 = bitcast!(B, prev3, LLVM.PointerType(T_prjlvalue, addrspace(value_type(prev3))))
if addrspace(value_type(prev3)) != Derived
prev3 = addrspacecast!(B, prev3, LLVM.PointerType(T_prjlvalue, Derived))
end
zero_single_allocation(B, Ty2, T_prjlvalue, prev3, zeroAll, LLVM.ConstantInt(T_int64, 0); atomic=true)
else
create_recursive_stores(B, Ty2, prev3)
end
end
end
end

function shadow_alloc_rewrite(V::LLVM.API.LLVMValueRef, gutils::API.EnzymeGradientUtilsRef, Orig::LLVM.API.LLVMValueRef, idx::UInt64, prev::API.LLVMValueRef)
V = LLVM.CallInst(V)
gutils = GradientUtils(gutils)
mode = get_mode(gutils)
has, Ty, byref = abs_typeof(V)
if !has
throw(AssertionError("$(string(fn))\n Allocation could not have its type statically determined $(string(V))"))
end
if mode == API.DEM_ReverseModePrimal ||
mode == API.DEM_ReverseModeGradient ||
mode == API.DEM_ReverseModeCombined
fn = LLVM.parent(LLVM.parent(V))
world = enzyme_extract_world(fn)
has, Ty, byref = abs_typeof(V)
if !has
throw(AssertionError("$(string(fn))\n Allocation could not have its type statically determined $(string(V))"))
end
rt = active_reg_inner(Ty, (), world)
if rt == ActiveState || rt == MixedState
B = LLVM.IRBuilder()
position!(B, V)
operands(V)[3] = unsafe_to_llvm(B, Base.RefValue{Ty})
end
end

if mode == API.DEM_ForwardMode
# Zero any jlvalue_t inner elements of preceeding allocation.
# Specifically in forward mode, you will first run the original allocation,
# then all shadow allocations. These allocations will thus all run before
# any value may store into them. For example, as follows:
# %orig = julia.gc_alloc(...)
# %"orig'" = julia.gcalloc(...)
# store orig[0] = jlvaluet
# store "orig'"[0] = jlvaluet'
# As a result, by the time of the subsequent GC allocation, the memory in the preceeding
# allocation might be undefined, and trigger a GC error. To avoid this,
# we will explicitly zero the GC'd fields of the previous allocation.
prev = LLVM.Instruction(prev)
B = LLVM.IRBuilder()
position!(B, LLVM.Instruction(LLVM.API.LLVMGetNextInstruction(prev)))

create_recursive_stores(B, Ty, prev)
end

nothing
end

Expand Down Expand Up @@ -671,7 +757,7 @@ function zero_allocation(B::LLVM.API.LLVMBuilderRef, LLVMType::LLVM.API.LLVMType
return nothing
end

function zero_single_allocation(builder::LLVM.IRBuilder, @nospecialize(jlType::DataType), @nospecialize(LLVMType::LLVM.LLVMType), @nospecialize(nobj::LLVM.Value), zeroAll::Bool, @nospecialize(idx::LLVM.Value))
function zero_single_allocation(builder::LLVM.IRBuilder, @nospecialize(jlType::DataType), @nospecialize(LLVMType::LLVM.LLVMType), @nospecialize(nobj::LLVM.Value), zeroAll::Bool, @nospecialize(idx::LLVM.Value); write_barrier=false, atomic=false)
T_jlvalue = LLVM.StructType(LLVM.LLVMType[])
T_prjlvalue = LLVM.PointerType(T_jlvalue, Tracked)
T_prjlvalue_UT = LLVM.PointerType(T_jlvalue)
Expand All @@ -682,19 +768,26 @@ function zero_single_allocation(builder::LLVM.IRBuilder, @nospecialize(jlType::D
jlType,
)]

addedvals = LLVM.Value[]
while length(todo) != 0
path, ty, jlty = popfirst!(todo)
if isa(ty, LLVM.PointerType)
if any_jltypes(ty)
loc = gep!(builder, LLVMType, nobj, path)
mod = LLVM.parent(LLVM.parent(Base.position(builder)))
fill_val = unsafe_nothing_to_llvm(mod)
push!(addedvals, fill_val)
loc = bitcast!(
builder,
loc,
LLVM.PointerType(T_prjlvalue, addrspace(value_type(loc))),
)
store!(builder, fill_val, loc)
st = store!(builder, fill_val, loc)
if atomic
ordering!(st, LLVM.API.LLVMAtomicOrderingRelease)
syncscope!(st, LLVM.SyncScope("singlethread"))
metadata(st)["enzymejl_atomicgc"] = LLVM.MDNode(LLVM.Metadata[])
end
elseif zeroAll
loc = gep!(builder, LLVMType, nobj, path)
store!(builder, LLVM.null(ty), loc)
Expand Down Expand Up @@ -741,6 +834,10 @@ function zero_single_allocation(builder::LLVM.IRBuilder, @nospecialize(jlType::D
continue
end
end
if length(addedvals) != 0 && write_barrier
pushfirst!(addedvals, get_base_and_offset(nobj; offsetAllowed=false, inttoptr=false)[1])
emit_writebarrier!(builder, addedvals)
end
return nothing

end
Expand Down Expand Up @@ -1127,7 +1224,7 @@ function __init__()
@cfunction(
shadow_alloc_rewrite,
Cvoid,
(LLVM.API.LLVMValueRef, API.EnzymeGradientUtilsRef)
(LLVM.API.LLVMValueRef, API.EnzymeGradientUtilsRef, LLVM.API.LLVMValueRef, UInt64, LLVM.API.LLVMValueRef)
)
)
register_alloc_rules()
Expand Down
1 change: 1 addition & 0 deletions src/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ function addOptimizationPasses!(pm::LLVM.ModulePassManager, tm::LLVM.TargetMachi
jl_inst_simplify!(pm)
jump_threading!(pm)
dead_store_elimination!(pm)
add!(pm, FunctionPass("SafeAtomicToRegularStore", safe_atomic_to_regular_store!))

# More dead allocation (store) deletion before loop optimization
# consider removing this:
Expand Down
18 changes: 18 additions & 0 deletions src/llvm/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2446,3 +2446,21 @@ function removeDeadArgs!(mod::LLVM.Module, tm::LLVM.TargetMachine)
eraseInst(mod, func)
end

function safe_atomic_to_regular_store!(f::LLVM.Function)
changed = false
for bb in blocks(f), inst in instructions(bb)
if isa(inst, LLVM.StoreInst)
continue
end
if !haskey(metadata(inst), "enzymejl_atomicgc")
continue
end
Base.delete!(metadata(inst), "enzymejl_atomicgc")
syncscope!(inst, LLVM.SyncScope("system"))
ordering!(inst, LLVM.API.LLVMAtomicOrderingNotAtomic)
changed = true
end
return changed
end


2 comments on commit 8d892e7

@wsmoses
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/121951

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.13.25 -m "<description of version>" 8d892e7eaacbc5e7cd98f13bd3b796d57cf029df
git push origin v0.13.25

Please sign in to comment.