diff --git a/Project.toml b/Project.toml index 8eda5df0..61498d02 100644 --- a/Project.toml +++ b/Project.toml @@ -5,13 +5,11 @@ version = "0.4.4" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -23,6 +21,7 @@ NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" +PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" @@ -31,46 +30,46 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" Sobol = "ed01d8cd-4d21-5b2a-85b4-cc3bdc58bad4" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" [weakdeps] +LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" TaylorDiff = "b36ab563-344f-407b-a36a-4f200bebf99c" [extensions] SophonOptimisersExt = "Optimisers" SophonTaylorDiffExt = "TaylorDiff" +SophonTaylorDiffLuxCUDAExt = ["TaylorDiff", "LuxCUDA"] +SophonLuxCUDAExt = "LuxCUDA" [compat] Adapt = "3" -CUDA = "5" ChainRulesCore = "1" ComponentArrays = "0.15" Distributions = "0.25" DomainSets = "0.5, 0.6, 0.7" ForwardDiff = "0.10" -GPUArrays = "9" GPUArraysCore = "0.1" LRUCache = "1" Lux = "0.5.6" +LuxCUDA = "0.3" MacroTools = "0.5" Memoize = "0.4" ModelingToolkit = "8" -NNlib = "0.9" +NNlib = "0.8, 0.9" Optimisers = "0.2" Optimization = "3" OptimizationOptimisers = "0.1" +PackageExtensionCompat = "1" ProgressBars = "1.5" QuasiMonteCarlo = "0.2, 0.3" Requires = "1" RuntimeGeneratedFunctions = "0.5" SciMLBase = "2" Sobol = "1, 2" -StaticArrays = "1.5" -StaticArraysCore = "1" StatsBase = "0.33, 0.34" Symbolics = "5" julia = "1.8" @@ -83,6 +82,7 @@ OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" TaylorDiff = "b36ab563-344f-407b-a36a-4f200bebf99c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" [targets] test = ["Test", "Zygote", "ModelingToolkit", "DomainSets", "OptimizationOptimJL", "TaylorDiff"] diff --git a/docs/Project.toml b/docs/Project.toml index 2d69b6e6..f516c55d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -6,6 +6,7 @@ DocThemeIndigo = "8bac0ac5-51bf-41f9-885e-2bf1ac2bec5f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" Integrals = "de52edbc-65ea-441a-8357-d3a637375a31" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" @@ -18,5 +19,6 @@ OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Sophon = "077df616-1c15-4d29-b519-7542a62df138" +StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/docs/src/qa.md b/docs/src/qa.md index ca3bf579..10fc5660 100644 --- a/docs/src/qa.md +++ b/docs/src/qa.md @@ -1,10 +1,10 @@ -## Q: How can I train the model using GPUs? +## Q: How can I train the model using my GPU? -A: To train the model on GPUs, invoke the gpu function on instances of PINN: +A: To train the model on a single GPU, do the following: ```julia -using Lux -pinn = gpu(PINN(...)) +using Lux, LuxCUDA +prob = Sophon.discretize(...) |> gpu_device() ``` ## Q: How can I monitor the loss for each loss function? diff --git a/docs/src/tutorials/helmholtz.md b/docs/src/tutorials/helmholtz.md index 130ad2b9..a09b3cde 100644 --- a/docs/src/tutorials/helmholtz.md +++ b/docs/src/tutorials/helmholtz.md @@ -38,7 +38,7 @@ bcs = [u(-1,y) ~ 0, u(1,y) ~ 0, u(x, -1) ~ 0, u(x, 1) ~ 0] Note that the boundary conditions are compatible with periocity, which allows us to apply [`BACON`](@ref). ```@example helmholtz chain = BACON(2, 1, 5, 2; hidden_dims = 32, num_layers=5) -pinn = PINN(chain) # call `gpu` on it if you want to use gpu +pinn = PINN(chain) sampler = QuasiRandomSampler(300, 100) strategy = NonAdaptiveTraining() @@ -50,14 +50,12 @@ prob = Sophon.discretize(helmholtz, pinn, sampler, strategy) Let's plot the result. ```@example helmholtz phi = pinn.phi - +ps = res.u xs, ys= [infimum(d.domain):0.01:supremum(d.domain) for d in domains] u_analytic(x,y) = sinpi(a1*x)*sinpi(a2*y) u_real = [u_analytic(x,y) for x in xs, y in ys] -phi_cpu = cpu(phi) # in case you are using GPU -ps_cpu = cpu(res.u) -u_pred = [sum(phi_cpu(([x,y]), ps_cpu)) for x in xs, y in ys] +u_pred = [sum(phi(([x,y]), ps)) for x in xs, y in ys] using CairoMakie axis = (xlabel="x", ylabel="y", title="Analytical Solution") diff --git a/ext/SophonLuxCUDAExt.jl b/ext/SophonLuxCUDAExt.jl new file mode 100644 index 00000000..47a5c92b --- /dev/null +++ b/ext/SophonLuxCUDAExt.jl @@ -0,0 +1,11 @@ +module SophonLuxCUDAExt + +using Lux, LuxCUDA, Sophon, Optimization, Adapt + +function (::LuxCUDADevice)(prob::OptimizationProblem) + u0 = adapt(CuArray, prob.u0) + p = Tuple(adapt(CuArray, prob.p[i]) for i in 1:length(prob.p)) # have to use tuple here... + return Optimization.OptimizationProblem(prob.f, u0, p) +end + +end diff --git a/ext/SophonTaylorDiffExt.jl b/ext/SophonTaylorDiffExt.jl index b15cbb00..47a0136b 100644 --- a/ext/SophonTaylorDiffExt.jl +++ b/ext/SophonTaylorDiffExt.jl @@ -115,7 +115,7 @@ for N in 1:5 end @inline function taylordiff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{N}) where {T <: Number, N} - ε = Sophon.maybe_adapt(x, ε_) + ε = Sophon.maybe_convert(x, ε_) return TaylorDiff.derivative(Base.Fix2(phi, θ), x, ε, Val{N+1}()) end @@ -147,13 +147,6 @@ for l in 1:4 end end -# avoid NaN -function Base.:*(A::Union{Sophon.CuMatrix{T}, LinearAlgebra.Transpose{T, Sophon.CuArray}}, - B::Sophon.CuMatrix{TaylorScalar{T, N}}) where {T, N} - C = similar(B, (size(A, 1), size(B, 2))) - fill!(C, zero(eltype(C))) - return LinearAlgebra.mul!(C, A, B) -end function __init__() @static if VERSION >= v"1.9.0" diff --git a/ext/SophonTaylorDiffLuxCUDAExt.jl b/ext/SophonTaylorDiffLuxCUDAExt.jl new file mode 100644 index 00000000..d4dc2f85 --- /dev/null +++ b/ext/SophonTaylorDiffLuxCUDAExt.jl @@ -0,0 +1,12 @@ +module SophonTaylorDiffLuxCUDAExt + +using TaylorDiff, LuxCUDA, Sophon + +function Base.:*(A::Union{Sophon.CuMatrix{T}, LinearAlgebra.Transpose{T, Sophon.CuArray}}, + B::Sophon.CuMatrix{TaylorScalar{T, N}}) where {T, N} + C = similar(B, (size(A, 1), size(B, 2))) + fill!(C, zero(eltype(C))) + return LinearAlgebra.mul!(C, A, B) +end + +end diff --git a/src/Sophon.jl b/src/Sophon.jl index 7f7d852a..cde41cae 100644 --- a/src/Sophon.jl +++ b/src/Sophon.jl @@ -13,7 +13,7 @@ using ComponentArrays import SciMLBase import SciMLBase: parameterless_type, __solve, build_solution, NullParameters using StatsBase, QuasiMonteCarlo -using Adapt, ChainRulesCore, CUDA, GPUArrays, GPUArraysCore +using Adapt, ChainRulesCore, GPUArraysCore import GPUArraysCore: AbstractGPUArray import QuasiMonteCarlo import Sobol @@ -26,7 +26,7 @@ using ForwardDiff using MacroTools using MacroTools: prewalk, postwalk using Requires -using StaticArrays: SVector +using StaticArraysCore: SVector RuntimeGeneratedFunctions.init(@__MODULE__) @@ -38,7 +38,6 @@ include("layers/nets.jl") include("layers/utils.jl") include("layers/operators.jl") -include("pde/componentarrays.jl") include("pde/pinn_types.jl") include("pde/utils.jl") include("pde/sym_utils.jl") @@ -46,11 +45,9 @@ include("pde/training_strategies.jl") include("pde/pinnsampler.jl") include("pde/discretize.jl") +using PackageExtensionCompat function __init__() - @static if !isdefined(Base, :get_extension) - @require Optimisers="3bd65402-5787-11e9-1adc-39752487f4e2" begin include("../ext/SophonOptimisersExt.jl") end - @require TaylorDiff="b36ab563-344f-407b-a36a-4f200bebf99c" begin include("../ext/SophonTaylorDiffExt.jl") end - end + @require_extensions end export @showprogress diff --git a/src/pde/componentarrays.jl b/src/pde/componentarrays.jl deleted file mode 100644 index 7a95ba02..00000000 --- a/src/pde/componentarrays.jl +++ /dev/null @@ -1,15 +0,0 @@ -const AbstractGPUComponentArray{T, N, Ax} = ComponentArray{T, N, - <:GPUArraysCore.AbstractGPUVector, - Ax} -const AbstractGPUComponentVector{T, Ax} = ComponentArray{T, 1, - <:GPUArraysCore.AbstractGPUVector, - Ax} -const AbstractGPUComponentMatrix{T, Ax} = ComponentArray{T, 2, - <:GPUArraysCore.AbstractGPUMatrix, - Ax} -const AbstractGPUComponentVecorMat{T, Ax} = Union{AbstractGPUComponentVector{T, Ax}, - AbstractGPUComponentMatrix{T, Ax}} - -function _ComponentArray(nt::NamedTuple) - return isongpu(nt) ? adapt(CuArray, ComponentArray(cpu(nt))) : ComponentArray(nt) -end diff --git a/src/pde/discretize.jl b/src/pde/discretize.jl index 585bb0f8..51dc4a3d 100644 --- a/src/pde/discretize.jl +++ b/src/pde/discretize.jl @@ -76,11 +76,9 @@ function discretize(pde_system, pinn::PINN, sampler::PINNSampler, adtype=Optimization.AutoZygote()) datasets = sample(pde_system, sampler) init_params = Lux.fmap(Base.Fix1(broadcast, fdtype), pinn.init_params) - init_params = _ComponentArray(init_params) + init_params = ComponentArray(init_params) datasets = map(Base.Fix1(broadcast, fdtype), datasets) - datasets = init_params isa AbstractGPUComponentVector ? - map(Base.Fix1(adapt, CuArray), datasets) : datasets pde_and_bcs_loss_function = build_loss_function(pde_system, pinn, strategy, derivative, derivative_bc, fdtype) diff --git a/src/pde/pinn_types.jl b/src/pde/pinn_types.jl index faed9b62..09d707ee 100644 --- a/src/pde/pinn_types.jl +++ b/src/pde/pinn_types.jl @@ -2,7 +2,7 @@ PINN(chain, rng::AbstractRNG=Random.default_rng()) PINN(rng::AbstractRNG=Random.default_rng(); kwargs...) -A container for a neural network, its states and its initial parameters. Call `gpu` and `cpu` to move the neural network to the GPU and CPU respectively. +A container for a neural network, its states and its initial parameters. The default element type of the parameters is `Float64`. ## Fields @@ -120,40 +120,29 @@ end const NTofChainState{names} = NamedTuple{names, <:Tuple{Vararg{ChainState}}} -function Lux.cpu(cs::ChainState) - Lux.@set! cs.state = cpu(cs.state) - return cs -end - -function Lux.gpu(cs::ChainState) - Lux.@set! cs.state = adapt(CuArray, cs.state) - return cs -end - -function Lux.cpu(cs::NamedTuple{names, <:Tuple{Vararg{ChainState}}}) where {names} - return map(cs) do c - return cpu(c) +for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) + ldev = Symbol("Lux$(dev)Device") + ladaptor = Symbol("Lux$(dev)Adaptor") + @eval begin + function (device::$ldev)(cs::ChainState) + Lux.@set! cs.state = device(cs.state) + return cs + end + + function (device::$ldev)(cs::NTofChainState{names}) where {names} + return map(cs) do c + return device(c) + end + end + + function (device::$ldev)(pinn::PINN) + Lux.@set! pinn.phi = device(pinn.phi) + Lux.@set! pinn.init_params = adapt($(ladaptor)(), pinn.init_params) + return pinn + end end end -function Lux.gpu(cs::NamedTuple{names, <:Tuple{Vararg{ChainState}}}) where {names} - return map(cs) do c - return gpu(c) - end -end - -function Lux.gpu(pinn::PINN) - Lux.@set! pinn.phi = gpu(pinn.phi) - Lux.@set! pinn.init_params = adapt(CuArray, pinn.init_params) - return pinn -end - -function Lux.cpu(pinn::PINN) - Lux.@set! pinn.phi = cpu(pinn.phi) - Lux.@set! pinn.init_params = cpu(pinn.init_params) - return pinn -end - """ using Sophon, ModelingToolkit, DomainSets using DomainSets: × diff --git a/src/pde/utils.jl b/src/pde/utils.jl index b0a292c1..24550f2f 100644 --- a/src/pde/utils.jl +++ b/src/pde/utils.jl @@ -1,7 +1,3 @@ -function isongpu(nt::NamedTuple) - return any(x -> x isa AbstractGPUArray, Lux.fcollect(nt)) -end - function get_l2_loss_function(loss_function, dataset) loss(θ) = mean(abs2, loss_function(dataset, θ)) return loss @@ -14,27 +10,28 @@ This function is only used for the first order derivative. """ forwarddiff(phi, t, εs, order, θ) = ForwardDiff.gradient(sum ∘ Base.Fix2(phi, θ), t) -@inline maybe_adapt(x::AbstractGPUArray, ε_) = ChainRulesCore.@ignore_derivatives convert(CuArray, ε_) -@inline maybe_adapt(x, ε_) = ChainRulesCore.@ignore_derivatives ε_ +@memoize maybe_convert(x::AbstractGPUArray, ε) = convert(parameterless_type(x), ε) +@memoize maybe_convert(x, ε) = ε +ChainRulesCore.@non_differentiable maybe_convert(x, ε) @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{1}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ ε, θ) .- phi(x .- ε, θ)) .* (h / 2) end @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{2}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ ε, θ) .+ phi(x .- ε, θ) .- 2 .* phi(x, θ)) .* h^2 end @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{3}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ 2 .* ε, θ) .- 2 .* phi(x .+ ε, θ) .+ 2 .* phi(x .- ε, θ) - phi(x .- 2 .* ε, θ)) .* h^3 ./ 2 end @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{4}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ 2 .* ε, θ) .- 4 .* phi(x .+ ε, θ) .+ 6 .* phi(x, θ) .- 4 .* phi(x .- ε, θ) .+ phi(x .- 2 .* ε, θ)) .* h^4 end diff --git a/test/runtests.jl b/test/runtests.jl index 335976aa..fd1f7e23 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -288,27 +288,6 @@ rng = Random.default_rng() @test_nowarn AdaptiveTraining((θ, p) -> p, 5) @test_nowarn AdaptiveTraining(((θ, p) -> p, (θ, p) -> θ), (3, 4, 5)) end - - #= - @testset "GPU" begin - @testset "single model" begin - pinn = PINN(DiscreteFourierFeature(2,1,2,2)) - pinn = pinn |> gpu - @test getdata(pinn.init_params) isa CuArray - phi = pinn.phi - @test phi.state.weight isa CuArray - end - - @testset "multiple models" begin - pinn = PINN(u = DiscreteFourierFeature(2,1,2,2), - v = DiscreteFourierFeature(2,1,2,2)) - pinn = pinn |> gpu - @test getdata(pinn.init_params) isa CuArray - phi = pinn.phi - @test phi.u.state.weight isa CuArray - end - end - =# end @testset "BetaSampler" begin include("betasampler.jl") end