From 6fedbcaf4d0afcc3a233cdfcdb172bdcb16156a6 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 02:13:48 +0800 Subject: [PATCH 01/16] update to Lux new interfave --- Project.toml | 1 - docs/src/qa.md | 9 +++--- docs/src/tutorials/helmholtz.md | 2 +- src/Sophon.jl | 2 +- src/pde/componentarrays.jl | 2 +- src/pde/discretize.jl | 2 +- src/pde/pinn_types.jl | 54 ++++++++++++++------------------- src/pde/utils.jl | 8 ++++- 8 files changed, 38 insertions(+), 42 deletions(-) diff --git a/Project.toml b/Project.toml index 853f05ea..14e800bd 100644 --- a/Project.toml +++ b/Project.toml @@ -46,7 +46,6 @@ SophonTaylorDiffExt = "TaylorDiff" [compat] Adapt = "3" -CUDA = "5" ChainRulesCore = "1" ComponentArrays = "0.15" Distributions = "0.25" diff --git a/docs/src/qa.md b/docs/src/qa.md index ca3bf579..c0207650 100644 --- a/docs/src/qa.md +++ b/docs/src/qa.md @@ -1,10 +1,11 @@ -## Q: How can I train the model using GPUs? +## Q: How can I train the model using my GPU? -A: To train the model on GPUs, invoke the gpu function on instances of PINN: +A: To train the model on a single GPU, do the following: ```julia -using Lux -pinn = gpu(PINN(...)) +using Lux, LuxCUDA +device = gpu_device() +pinn = PINN(...) |> device ``` ## Q: How can I monitor the loss for each loss function? diff --git a/docs/src/tutorials/helmholtz.md b/docs/src/tutorials/helmholtz.md index 130ad2b9..c4663d7a 100644 --- a/docs/src/tutorials/helmholtz.md +++ b/docs/src/tutorials/helmholtz.md @@ -38,7 +38,7 @@ bcs = [u(-1,y) ~ 0, u(1,y) ~ 0, u(x, -1) ~ 0, u(x, 1) ~ 0] Note that the boundary conditions are compatible with periocity, which allows us to apply [`BACON`](@ref). ```@example helmholtz chain = BACON(2, 1, 5, 2; hidden_dims = 32, num_layers=5) -pinn = PINN(chain) # call `gpu` on it if you want to use gpu +pinn = PINN(chain) sampler = QuasiRandomSampler(300, 100) strategy = NonAdaptiveTraining() diff --git a/src/Sophon.jl b/src/Sophon.jl index 7f7d852a..8198eb5f 100644 --- a/src/Sophon.jl +++ b/src/Sophon.jl @@ -13,7 +13,7 @@ using ComponentArrays import SciMLBase import SciMLBase: parameterless_type, __solve, build_solution, NullParameters using StatsBase, QuasiMonteCarlo -using Adapt, ChainRulesCore, CUDA, GPUArrays, GPUArraysCore +using Adapt, ChainRulesCore, GPUArrays, GPUArraysCore import GPUArraysCore: AbstractGPUArray import QuasiMonteCarlo import Sobol diff --git a/src/pde/componentarrays.jl b/src/pde/componentarrays.jl index 7a95ba02..23c11001 100644 --- a/src/pde/componentarrays.jl +++ b/src/pde/componentarrays.jl @@ -11,5 +11,5 @@ const AbstractGPUComponentVecorMat{T, Ax} = Union{AbstractGPUComponentVector{T, AbstractGPUComponentMatrix{T, Ax}} function _ComponentArray(nt::NamedTuple) - return isongpu(nt) ? adapt(CuArray, ComponentArray(cpu(nt))) : ComponentArray(nt) + return isongpu(nt) ? adapt(get_gpu_adaptor(), ComponentArray(cpu(nt))) : ComponentArray(nt) end diff --git a/src/pde/discretize.jl b/src/pde/discretize.jl index 585bb0f8..86a5ca2f 100644 --- a/src/pde/discretize.jl +++ b/src/pde/discretize.jl @@ -80,7 +80,7 @@ function discretize(pde_system, pinn::PINN, sampler::PINNSampler, datasets = map(Base.Fix1(broadcast, fdtype), datasets) datasets = init_params isa AbstractGPUComponentVector ? - map(Base.Fix1(adapt, CuArray), datasets) : datasets + map(Base.Fix1(adapt, get_gpu_adaptor()), datasets) : datasets pde_and_bcs_loss_function = build_loss_function(pde_system, pinn, strategy, derivative, derivative_bc, fdtype) diff --git a/src/pde/pinn_types.jl b/src/pde/pinn_types.jl index faed9b62..60fc3144 100644 --- a/src/pde/pinn_types.jl +++ b/src/pde/pinn_types.jl @@ -2,7 +2,8 @@ PINN(chain, rng::AbstractRNG=Random.default_rng()) PINN(rng::AbstractRNG=Random.default_rng(); kwargs...) -A container for a neural network, its states and its initial parameters. Call `gpu` and `cpu` to move the neural network to the GPU and CPU respectively. +A container for a neural network, its states and its initial parameters. Call `Lux.gpu_device()` +and `Lux.cpu_device()` to move the neural network to the GPU and CPU respectively. The default element type of the parameters is `Float64`. ## Fields @@ -120,40 +121,29 @@ end const NTofChainState{names} = NamedTuple{names, <:Tuple{Vararg{ChainState}}} -function Lux.cpu(cs::ChainState) - Lux.@set! cs.state = cpu(cs.state) - return cs -end - -function Lux.gpu(cs::ChainState) - Lux.@set! cs.state = adapt(CuArray, cs.state) - return cs -end - -function Lux.cpu(cs::NamedTuple{names, <:Tuple{Vararg{ChainState}}}) where {names} - return map(cs) do c - return cpu(c) +for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) + ldev = Symbol("Lux$(dev)Device") + ladaptor = Symbol("Lux$(dev)Adaptor") + @eval begin + function (device::$ldev)(cs::ChainState) + Lux.@set! cs.state = device(cs.state) + return c + end + + function (device::$ldev)(cs::NTofChainState{names}) where {names} + return map(cs) do c + return device(c) + end + end + + function (device::$ldev)(pinn::PINN) + Lux.@set! pinn.phi = device(pinn.phi) + Lux.@set! pinn.init_params = adapt($(ladaptor)(), pinn.init_params) + return pinn + end end end -function Lux.gpu(cs::NamedTuple{names, <:Tuple{Vararg{ChainState}}}) where {names} - return map(cs) do c - return gpu(c) - end -end - -function Lux.gpu(pinn::PINN) - Lux.@set! pinn.phi = gpu(pinn.phi) - Lux.@set! pinn.init_params = adapt(CuArray, pinn.init_params) - return pinn -end - -function Lux.cpu(pinn::PINN) - Lux.@set! pinn.phi = cpu(pinn.phi) - Lux.@set! pinn.init_params = cpu(pinn.init_params) - return pinn -end - """ using Sophon, ModelingToolkit, DomainSets using DomainSets: × diff --git a/src/pde/utils.jl b/src/pde/utils.jl index b0a292c1..89658155 100644 --- a/src/pde/utils.jl +++ b/src/pde/utils.jl @@ -14,7 +14,13 @@ This function is only used for the first order derivative. """ forwarddiff(phi, t, εs, order, θ) = ForwardDiff.gradient(sum ∘ Base.Fix2(phi, θ), t) -@inline maybe_adapt(x::AbstractGPUArray, ε_) = ChainRulesCore.@ignore_derivatives convert(CuArray, ε_) +for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) + ldev = Symbol("Lux$(dev)Device") + ladaptor = Symbol("Lux$(dev)Adaptor") + @inline get_adaptor(::$(ldev)) = $(ladaptor)() +end +@inline get_gpu_adaptor() = get_adaptor(gpu_device()) +@inline maybe_adapt(x::AbstractGPUArray, ε_) = ChainRulesCore.@ignore_derivatives adapt(get_gpu_adaptor(), ε_) @inline maybe_adapt(x, ε_) = ChainRulesCore.@ignore_derivatives ε_ @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{1}) where {T<:AbstractFloat} From ab3c7425de0d703f16e975e0b474b5f5ef12fce3 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 02:16:20 +0800 Subject: [PATCH 02/16] Update Project.toml --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index 14e800bd..d3095fc2 100644 --- a/Project.toml +++ b/Project.toml @@ -5,7 +5,6 @@ version = "0.4.4" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" From 13a8a3d497607003242e0c5405e8e36127a31b84 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 02:20:48 +0800 Subject: [PATCH 03/16] rm GPUArrays --- Project.toml | 2 -- src/Sophon.jl | 2 +- src/pde/utils.jl | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index d3095fc2..972b2e37 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -50,7 +49,6 @@ ComponentArrays = "0.15" Distributions = "0.25" DomainSets = "0.5, 0.6" ForwardDiff = "0.10" -GPUArrays = "9" GPUArraysCore = "0.1" LRUCache = "1" Lux = "0.5.6" diff --git a/src/Sophon.jl b/src/Sophon.jl index 8198eb5f..00442659 100644 --- a/src/Sophon.jl +++ b/src/Sophon.jl @@ -13,7 +13,7 @@ using ComponentArrays import SciMLBase import SciMLBase: parameterless_type, __solve, build_solution, NullParameters using StatsBase, QuasiMonteCarlo -using Adapt, ChainRulesCore, GPUArrays, GPUArraysCore +using Adapt, ChainRulesCore, GPUArraysCore import GPUArraysCore: AbstractGPUArray import QuasiMonteCarlo import Sobol diff --git a/src/pde/utils.jl b/src/pde/utils.jl index 89658155..441f6fbd 100644 --- a/src/pde/utils.jl +++ b/src/pde/utils.jl @@ -17,7 +17,7 @@ forwarddiff(phi, t, εs, order, θ) = ForwardDiff.gradient(sum ∘ Base.Fix2(phi for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) ldev = Symbol("Lux$(dev)Device") ladaptor = Symbol("Lux$(dev)Adaptor") - @inline get_adaptor(::$(ldev)) = $(ladaptor)() + @eval @inline get_adaptor(::$(ldev)) = $(ladaptor)() end @inline get_gpu_adaptor() = get_adaptor(gpu_device()) @inline maybe_adapt(x::AbstractGPUArray, ε_) = ChainRulesCore.@ignore_derivatives adapt(get_gpu_adaptor(), ε_) From 0f9d613e0bd4ef9a495a4df3133b91b87c8f7f1a Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 02:56:00 +0800 Subject: [PATCH 04/16] Update pinn_types.jl --- src/pde/pinn_types.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pde/pinn_types.jl b/src/pde/pinn_types.jl index 60fc3144..9ff7578e 100644 --- a/src/pde/pinn_types.jl +++ b/src/pde/pinn_types.jl @@ -127,7 +127,7 @@ for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) @eval begin function (device::$ldev)(cs::ChainState) Lux.@set! cs.state = device(cs.state) - return c + return cs end function (device::$ldev)(cs::NTofChainState{names}) where {names} From 62636e896370a38f80ada26f1f83f660530116c2 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 12:41:36 +0800 Subject: [PATCH 05/16] using LuxCUDA --- Project.toml | 8 +++++--- docs/Project.toml | 1 + ext/SophonTaylorDiffExt.jl | 7 ------- ext/SophonTaylorDiffLuxCUDAExt.jl | 14 ++++++++++++++ src/Sophon.jl | 8 +++----- 5 files changed, 23 insertions(+), 15 deletions(-) create mode 100644 ext/SophonTaylorDiffLuxCUDAExt.jl diff --git a/Project.toml b/Project.toml index 972b2e37..68e4144e 100644 --- a/Project.toml +++ b/Project.toml @@ -21,6 +21,7 @@ NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" +PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" @@ -29,18 +30,19 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" Sobol = "ed01d8cd-4d21-5b2a-85b4-cc3bdc58bad4" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" [weakdeps] +LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" TaylorDiff = "b36ab563-344f-407b-a36a-4f200bebf99c" [extensions] SophonOptimisersExt = "Optimisers" SophonTaylorDiffExt = "TaylorDiff" +SophonTaylorDiffLuxExt = ["TaylorDiff", "LuxCUDA"] [compat] Adapt = "3" @@ -52,6 +54,7 @@ ForwardDiff = "0.10" GPUArraysCore = "0.1" LRUCache = "1" Lux = "0.5.6" +LuxCUDA = "0.3" MacroTools = "0.5" Memoize = "0.4" ModelingToolkit = "8" @@ -59,14 +62,13 @@ NNlib = "0.9" Optimisers = "0.2" Optimization = "3" OptimizationOptimisers = "0.1" +PackageExtensionCompat = "1" ProgressBars = "1.5" QuasiMonteCarlo = "0.2, 0.3" Requires = "1" RuntimeGeneratedFunctions = "0.5" SciMLBase = "2" Sobol = "1, 2" -StaticArrays = "1.5" -StaticArraysCore = "1" StatsBase = "0.33, 0.34" Symbolics = "4, 5" julia = "1.8" diff --git a/docs/Project.toml b/docs/Project.toml index 2d69b6e6..adb4ece9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -6,6 +6,7 @@ DocThemeIndigo = "8bac0ac5-51bf-41f9-885e-2bf1ac2bec5f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" Integrals = "de52edbc-65ea-441a-8357-d3a637375a31" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" diff --git a/ext/SophonTaylorDiffExt.jl b/ext/SophonTaylorDiffExt.jl index b15cbb00..916b591c 100644 --- a/ext/SophonTaylorDiffExt.jl +++ b/ext/SophonTaylorDiffExt.jl @@ -147,13 +147,6 @@ for l in 1:4 end end -# avoid NaN -function Base.:*(A::Union{Sophon.CuMatrix{T}, LinearAlgebra.Transpose{T, Sophon.CuArray}}, - B::Sophon.CuMatrix{TaylorScalar{T, N}}) where {T, N} - C = similar(B, (size(A, 1), size(B, 2))) - fill!(C, zero(eltype(C))) - return LinearAlgebra.mul!(C, A, B) -end function __init__() @static if VERSION >= v"1.9.0" diff --git a/ext/SophonTaylorDiffLuxCUDAExt.jl b/ext/SophonTaylorDiffLuxCUDAExt.jl new file mode 100644 index 00000000..50ee4b37 --- /dev/null +++ b/ext/SophonTaylorDiffLuxCUDAExt.jl @@ -0,0 +1,14 @@ +module SophonTaylorDiffLuxCUDAExt + +using TaylorDiff, LuxCUDA, Sophon + + +# avoid NaN +function Base.:*(A::Union{Sophon.CuMatrix{T}, LinearAlgebra.Transpose{T, Sophon.CuArray}}, + B::Sophon.CuMatrix{TaylorScalar{T, N}}) where {T, N} + C = similar(B, (size(A, 1), size(B, 2))) + fill!(C, zero(eltype(C))) + return LinearAlgebra.mul!(C, A, B) +end + +emd diff --git a/src/Sophon.jl b/src/Sophon.jl index 00442659..31a87233 100644 --- a/src/Sophon.jl +++ b/src/Sophon.jl @@ -26,7 +26,7 @@ using ForwardDiff using MacroTools using MacroTools: prewalk, postwalk using Requires -using StaticArrays: SVector +using StaticArraysCore: SVector RuntimeGeneratedFunctions.init(@__MODULE__) @@ -46,11 +46,9 @@ include("pde/training_strategies.jl") include("pde/pinnsampler.jl") include("pde/discretize.jl") +using PackageExtensionCompat function __init__() - @static if !isdefined(Base, :get_extension) - @require Optimisers="3bd65402-5787-11e9-1adc-39752487f4e2" begin include("../ext/SophonOptimisersExt.jl") end - @require TaylorDiff="b36ab563-344f-407b-a36a-4f200bebf99c" begin include("../ext/SophonTaylorDiffExt.jl") end - end + @require_extensions end export @showprogress From cca3b11f51454e405fbaf9aa0b3878910cb18eb7 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 12:58:00 +0800 Subject: [PATCH 06/16] Update Project.toml --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 68e4144e..93d63202 100644 --- a/Project.toml +++ b/Project.toml @@ -81,6 +81,7 @@ OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" TaylorDiff = "b36ab563-344f-407b-a36a-4f200bebf99c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" [targets] test = ["Test", "Zygote", "ModelingToolkit", "DomainSets", "OptimizationOptimJL", "TaylorDiff"] From 61742a6eeefcaf4dca6788aebe5d9ae13b1a49a8 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 13:03:44 +0800 Subject: [PATCH 07/16] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 93d63202..8fc1a328 100644 --- a/Project.toml +++ b/Project.toml @@ -58,7 +58,7 @@ LuxCUDA = "0.3" MacroTools = "0.5" Memoize = "0.4" ModelingToolkit = "8" -NNlib = "0.9" +NNlib = "0.8, 0.9" Optimisers = "0.2" Optimization = "3" OptimizationOptimisers = "0.1" From c3dd66260abc638ce3810201dcb7cff59b980557 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 13:53:20 +0800 Subject: [PATCH 08/16] fix finitediff on gpu --- docs/Project.toml | 1 + ext/SophonTaylorDiffLuxCUDAExt.jl | 6 ++---- src/pde/utils.jl | 6 ++++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index adb4ece9..f516c55d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -19,5 +19,6 @@ OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Sophon = "077df616-1c15-4d29-b519-7542a62df138" +StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/ext/SophonTaylorDiffLuxCUDAExt.jl b/ext/SophonTaylorDiffLuxCUDAExt.jl index 50ee4b37..d4dc2f85 100644 --- a/ext/SophonTaylorDiffLuxCUDAExt.jl +++ b/ext/SophonTaylorDiffLuxCUDAExt.jl @@ -2,13 +2,11 @@ module SophonTaylorDiffLuxCUDAExt using TaylorDiff, LuxCUDA, Sophon - -# avoid NaN function Base.:*(A::Union{Sophon.CuMatrix{T}, LinearAlgebra.Transpose{T, Sophon.CuArray}}, - B::Sophon.CuMatrix{TaylorScalar{T, N}}) where {T, N} + B::Sophon.CuMatrix{TaylorScalar{T, N}}) where {T, N} C = similar(B, (size(A, 1), size(B, 2))) fill!(C, zero(eltype(C))) return LinearAlgebra.mul!(C, A, B) end -emd +end diff --git a/src/pde/utils.jl b/src/pde/utils.jl index 441f6fbd..13695811 100644 --- a/src/pde/utils.jl +++ b/src/pde/utils.jl @@ -20,8 +20,10 @@ for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) @eval @inline get_adaptor(::$(ldev)) = $(ladaptor)() end @inline get_gpu_adaptor() = get_adaptor(gpu_device()) -@inline maybe_adapt(x::AbstractGPUArray, ε_) = ChainRulesCore.@ignore_derivatives adapt(get_gpu_adaptor(), ε_) -@inline maybe_adapt(x, ε_) = ChainRulesCore.@ignore_derivatives ε_ + +@memoize maybe_adapt(x::AbstractGPUArray, ε) = convert(typeof(x), ε) +@memoize maybe_adapt(x, ε) = ε +ChainRulesCore.@non_differentiable maybe_adapt(x, ε) @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{1}) where {T<:AbstractFloat} ε = maybe_adapt(x, ε_) From ff9b95ca9aa949a664434bf1622954e68d07758c Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 14:00:44 +0800 Subject: [PATCH 09/16] Update utils.jl --- src/pde/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pde/utils.jl b/src/pde/utils.jl index 13695811..b6463c2f 100644 --- a/src/pde/utils.jl +++ b/src/pde/utils.jl @@ -21,7 +21,7 @@ for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) end @inline get_gpu_adaptor() = get_adaptor(gpu_device()) -@memoize maybe_adapt(x::AbstractGPUArray, ε) = convert(typeof(x), ε) +@memoize maybe_adapt(x::AbstractGPUArray, ε) = convert(parameterless_type(x), ε) @memoize maybe_adapt(x, ε) = ε ChainRulesCore.@non_differentiable maybe_adapt(x, ε) From 6b8a28cbd292eb1d3290b1bd2050308e866cc094 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 14:55:33 +0800 Subject: [PATCH 10/16] gpu_device() on prob --- Project.toml | 3 ++- docs/src/qa.md | 3 +-- ext/SophonLuxCUDAExt.jl | 12 ++++++++++++ src/Sophon.jl | 1 - src/pde/componentarrays.jl | 15 --------------- src/pde/discretize.jl | 4 +--- src/pde/pinn_types.jl | 3 +-- src/pde/utils.jl | 25 +++++++------------------ test/runtests.jl | 21 --------------------- 9 files changed, 24 insertions(+), 63 deletions(-) create mode 100644 ext/SophonLuxCUDAExt.jl delete mode 100644 src/pde/componentarrays.jl diff --git a/Project.toml b/Project.toml index 8fc1a328..f16e9ad0 100644 --- a/Project.toml +++ b/Project.toml @@ -42,7 +42,8 @@ TaylorDiff = "b36ab563-344f-407b-a36a-4f200bebf99c" [extensions] SophonOptimisersExt = "Optimisers" SophonTaylorDiffExt = "TaylorDiff" -SophonTaylorDiffLuxExt = ["TaylorDiff", "LuxCUDA"] +SophonTaylorDiffLuxCUDAExt = ["TaylorDiff", "LuxCUDA"] +SophonLuxCUDAExt = "LuxCUDA" [compat] Adapt = "3" diff --git a/docs/src/qa.md b/docs/src/qa.md index c0207650..10fc5660 100644 --- a/docs/src/qa.md +++ b/docs/src/qa.md @@ -4,8 +4,7 @@ A: To train the model on a single GPU, do the following: ```julia using Lux, LuxCUDA -device = gpu_device() -pinn = PINN(...) |> device +prob = Sophon.discretize(...) |> gpu_device() ``` ## Q: How can I monitor the loss for each loss function? diff --git a/ext/SophonLuxCUDAExt.jl b/ext/SophonLuxCUDAExt.jl new file mode 100644 index 00000000..b309ef28 --- /dev/null +++ b/ext/SophonLuxCUDAExt.jl @@ -0,0 +1,12 @@ +module SophonLuxCUDAExt + +using Lux, LuxCUDA, Sophon, ModelingToolkit + +function (::LuxCUDADevice)(prob::Union{ModelingToolkit.PDESystem, Sophon.PDESystem}) + u0 = adapt(CuArray, prob.u0) + p = [adapt(CuArray, prob.p[i]) for i in 1:length(prob.p)] + prob = remake(prob, u0=u0, p=p) + return prob +end + +end diff --git a/src/Sophon.jl b/src/Sophon.jl index 31a87233..cde41cae 100644 --- a/src/Sophon.jl +++ b/src/Sophon.jl @@ -38,7 +38,6 @@ include("layers/nets.jl") include("layers/utils.jl") include("layers/operators.jl") -include("pde/componentarrays.jl") include("pde/pinn_types.jl") include("pde/utils.jl") include("pde/sym_utils.jl") diff --git a/src/pde/componentarrays.jl b/src/pde/componentarrays.jl deleted file mode 100644 index 23c11001..00000000 --- a/src/pde/componentarrays.jl +++ /dev/null @@ -1,15 +0,0 @@ -const AbstractGPUComponentArray{T, N, Ax} = ComponentArray{T, N, - <:GPUArraysCore.AbstractGPUVector, - Ax} -const AbstractGPUComponentVector{T, Ax} = ComponentArray{T, 1, - <:GPUArraysCore.AbstractGPUVector, - Ax} -const AbstractGPUComponentMatrix{T, Ax} = ComponentArray{T, 2, - <:GPUArraysCore.AbstractGPUMatrix, - Ax} -const AbstractGPUComponentVecorMat{T, Ax} = Union{AbstractGPUComponentVector{T, Ax}, - AbstractGPUComponentMatrix{T, Ax}} - -function _ComponentArray(nt::NamedTuple) - return isongpu(nt) ? adapt(get_gpu_adaptor(), ComponentArray(cpu(nt))) : ComponentArray(nt) -end diff --git a/src/pde/discretize.jl b/src/pde/discretize.jl index 86a5ca2f..51dc4a3d 100644 --- a/src/pde/discretize.jl +++ b/src/pde/discretize.jl @@ -76,11 +76,9 @@ function discretize(pde_system, pinn::PINN, sampler::PINNSampler, adtype=Optimization.AutoZygote()) datasets = sample(pde_system, sampler) init_params = Lux.fmap(Base.Fix1(broadcast, fdtype), pinn.init_params) - init_params = _ComponentArray(init_params) + init_params = ComponentArray(init_params) datasets = map(Base.Fix1(broadcast, fdtype), datasets) - datasets = init_params isa AbstractGPUComponentVector ? - map(Base.Fix1(adapt, get_gpu_adaptor()), datasets) : datasets pde_and_bcs_loss_function = build_loss_function(pde_system, pinn, strategy, derivative, derivative_bc, fdtype) diff --git a/src/pde/pinn_types.jl b/src/pde/pinn_types.jl index 9ff7578e..09d707ee 100644 --- a/src/pde/pinn_types.jl +++ b/src/pde/pinn_types.jl @@ -2,8 +2,7 @@ PINN(chain, rng::AbstractRNG=Random.default_rng()) PINN(rng::AbstractRNG=Random.default_rng(); kwargs...) -A container for a neural network, its states and its initial parameters. Call `Lux.gpu_device()` -and `Lux.cpu_device()` to move the neural network to the GPU and CPU respectively. +A container for a neural network, its states and its initial parameters. The default element type of the parameters is `Float64`. ## Fields diff --git a/src/pde/utils.jl b/src/pde/utils.jl index b6463c2f..24550f2f 100644 --- a/src/pde/utils.jl +++ b/src/pde/utils.jl @@ -1,7 +1,3 @@ -function isongpu(nt::NamedTuple) - return any(x -> x isa AbstractGPUArray, Lux.fcollect(nt)) -end - function get_l2_loss_function(loss_function, dataset) loss(θ) = mean(abs2, loss_function(dataset, θ)) return loss @@ -14,35 +10,28 @@ This function is only used for the first order derivative. """ forwarddiff(phi, t, εs, order, θ) = ForwardDiff.gradient(sum ∘ Base.Fix2(phi, θ), t) -for (dev) in (:CPU, :CUDA, :AMDGPU, :Metal) - ldev = Symbol("Lux$(dev)Device") - ladaptor = Symbol("Lux$(dev)Adaptor") - @eval @inline get_adaptor(::$(ldev)) = $(ladaptor)() -end -@inline get_gpu_adaptor() = get_adaptor(gpu_device()) - -@memoize maybe_adapt(x::AbstractGPUArray, ε) = convert(parameterless_type(x), ε) -@memoize maybe_adapt(x, ε) = ε -ChainRulesCore.@non_differentiable maybe_adapt(x, ε) +@memoize maybe_convert(x::AbstractGPUArray, ε) = convert(parameterless_type(x), ε) +@memoize maybe_convert(x, ε) = ε +ChainRulesCore.@non_differentiable maybe_convert(x, ε) @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{1}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ ε, θ) .- phi(x .- ε, θ)) .* (h / 2) end @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{2}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ ε, θ) .+ phi(x .- ε, θ) .- 2 .* phi(x, θ)) .* h^2 end @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{3}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ 2 .* ε, θ) .- 2 .* phi(x .+ ε, θ) .+ 2 .* phi(x .- ε, θ) - phi(x .- 2 .* ε, θ)) .* h^3 ./ 2 end @inline function finitediff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{4}) where {T<:AbstractFloat} - ε = maybe_adapt(x, ε_) + ε = maybe_convert(x, ε_) return (phi(x .+ 2 .* ε, θ) .- 4 .* phi(x .+ ε, θ) .+ 6 .* phi(x, θ) .- 4 .* phi(x .- ε, θ) .+ phi(x .- 2 .* ε, θ)) .* h^4 end diff --git a/test/runtests.jl b/test/runtests.jl index 335976aa..fd1f7e23 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -288,27 +288,6 @@ rng = Random.default_rng() @test_nowarn AdaptiveTraining((θ, p) -> p, 5) @test_nowarn AdaptiveTraining(((θ, p) -> p, (θ, p) -> θ), (3, 4, 5)) end - - #= - @testset "GPU" begin - @testset "single model" begin - pinn = PINN(DiscreteFourierFeature(2,1,2,2)) - pinn = pinn |> gpu - @test getdata(pinn.init_params) isa CuArray - phi = pinn.phi - @test phi.state.weight isa CuArray - end - - @testset "multiple models" begin - pinn = PINN(u = DiscreteFourierFeature(2,1,2,2), - v = DiscreteFourierFeature(2,1,2,2)) - pinn = pinn |> gpu - @test getdata(pinn.init_params) isa CuArray - phi = pinn.phi - @test phi.u.state.weight isa CuArray - end - end - =# end @testset "BetaSampler" begin include("betasampler.jl") end From 855804504de9254a929b913dc755103855969978 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 14:56:27 +0800 Subject: [PATCH 11/16] Update SophonLuxCUDAExt.jl --- ext/SophonLuxCUDAExt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/SophonLuxCUDAExt.jl b/ext/SophonLuxCUDAExt.jl index b309ef28..6705bea6 100644 --- a/ext/SophonLuxCUDAExt.jl +++ b/ext/SophonLuxCUDAExt.jl @@ -1,8 +1,8 @@ module SophonLuxCUDAExt -using Lux, LuxCUDA, Sophon, ModelingToolkit +using Lux, LuxCUDA, Sophon, Optimization -function (::LuxCUDADevice)(prob::Union{ModelingToolkit.PDESystem, Sophon.PDESystem}) +function (::LuxCUDADevice)(prob::OptimizationProblem) u0 = adapt(CuArray, prob.u0) p = [adapt(CuArray, prob.p[i]) for i in 1:length(prob.p)] prob = remake(prob, u0=u0, p=p) From 07d78c655abf0985e2f6f395fecd329189c00ee3 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 15:11:28 +0800 Subject: [PATCH 12/16] Update SophonTaylorDiffExt.jl --- ext/SophonTaylorDiffExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/SophonTaylorDiffExt.jl b/ext/SophonTaylorDiffExt.jl index 916b591c..47a0136b 100644 --- a/ext/SophonTaylorDiffExt.jl +++ b/ext/SophonTaylorDiffExt.jl @@ -115,7 +115,7 @@ for N in 1:5 end @inline function taylordiff(phi, x, θ, ε_::AbstractVector{T}, h::T, ::Val{N}) where {T <: Number, N} - ε = Sophon.maybe_adapt(x, ε_) + ε = Sophon.maybe_convert(x, ε_) return TaylorDiff.derivative(Base.Fix2(phi, θ), x, ε, Val{N+1}()) end From 77f7d24eaac0d570cdca6508d5c4b3d9d08cb815 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 15:11:30 +0800 Subject: [PATCH 13/16] Update helmholtz.md --- docs/src/tutorials/helmholtz.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/src/tutorials/helmholtz.md b/docs/src/tutorials/helmholtz.md index c4663d7a..58713e44 100644 --- a/docs/src/tutorials/helmholtz.md +++ b/docs/src/tutorials/helmholtz.md @@ -55,9 +55,7 @@ xs, ys= [infimum(d.domain):0.01:supremum(d.domain) for d in domains] u_analytic(x,y) = sinpi(a1*x)*sinpi(a2*y) u_real = [u_analytic(x,y) for x in xs, y in ys] -phi_cpu = cpu(phi) # in case you are using GPU -ps_cpu = cpu(res.u) -u_pred = [sum(phi_cpu(([x,y]), ps_cpu)) for x in xs, y in ys] +u_pred = [sum(phi(([x,y]), ps)) for x in xs, y in ys] using CairoMakie axis = (xlabel="x", ylabel="y", title="Analytical Solution") From fcc879d22f1cfb0e4e512a981dcf007b646fa3f0 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 17:02:04 +0800 Subject: [PATCH 14/16] Update helmholtz.md --- docs/src/tutorials/helmholtz.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorials/helmholtz.md b/docs/src/tutorials/helmholtz.md index 58713e44..a09b3cde 100644 --- a/docs/src/tutorials/helmholtz.md +++ b/docs/src/tutorials/helmholtz.md @@ -50,7 +50,7 @@ prob = Sophon.discretize(helmholtz, pinn, sampler, strategy) Let's plot the result. ```@example helmholtz phi = pinn.phi - +ps = res.u xs, ys= [infimum(d.domain):0.01:supremum(d.domain) for d in domains] u_analytic(x,y) = sinpi(a1*x)*sinpi(a2*y) u_real = [u_analytic(x,y) for x in xs, y in ys] From 6f750b6aa2d6afa9f8f837974578dfa5de5a5fd0 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 17:10:13 +0800 Subject: [PATCH 15/16] Update SophonLuxCUDAExt.jl --- ext/SophonLuxCUDAExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/SophonLuxCUDAExt.jl b/ext/SophonLuxCUDAExt.jl index 6705bea6..23cbd78d 100644 --- a/ext/SophonLuxCUDAExt.jl +++ b/ext/SophonLuxCUDAExt.jl @@ -1,6 +1,6 @@ module SophonLuxCUDAExt -using Lux, LuxCUDA, Sophon, Optimization +using Lux, LuxCUDA, Sophon, Optimization, Adapt function (::LuxCUDADevice)(prob::OptimizationProblem) u0 = adapt(CuArray, prob.u0) From 798e747755552dfdb487ac992f72f5070dcf7321 Mon Sep 17 00:00:00 2001 From: Yicheng Wu Date: Sat, 25 Nov 2023 18:22:01 +0800 Subject: [PATCH 16/16] fix zygote error by using tuple --- ext/SophonLuxCUDAExt.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ext/SophonLuxCUDAExt.jl b/ext/SophonLuxCUDAExt.jl index 23cbd78d..47a5c92b 100644 --- a/ext/SophonLuxCUDAExt.jl +++ b/ext/SophonLuxCUDAExt.jl @@ -4,9 +4,8 @@ using Lux, LuxCUDA, Sophon, Optimization, Adapt function (::LuxCUDADevice)(prob::OptimizationProblem) u0 = adapt(CuArray, prob.u0) - p = [adapt(CuArray, prob.p[i]) for i in 1:length(prob.p)] - prob = remake(prob, u0=u0, p=p) - return prob + p = Tuple(adapt(CuArray, prob.p[i]) for i in 1:length(prob.p)) # have to use tuple here... + return Optimization.OptimizationProblem(prob.f, u0, p) end end