trixi-framework · jkravs · May 24, 2023 · May 29, 2023 · May 29, 2023 · May 29, 2023
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -0,0 +1,21 @@
+stages:
+    - test
+
+.trigger-template:
+    stage: test
+    trigger:
+        include: /.test-ci.yml
+        strategy: depend
+        forward: 
+            yaml_variables: true
+
+julia-1.8-test:
+    extends: .trigger-template
+    allow_failure: true
+    variables:
+        JULIA_EXEC: "julia-1.8"
+
+julia-1.9-test:
+    extends: .trigger-template
+    variables:
+        JULIA_EXEC: "julia-1.9"
diff --git a/.test-ci.yml b/.test-ci.yml
@@ -0,0 +1,84 @@
+stages:
+    - precompile
+    - test
+    - benchmark
+
+
+default:
+    tags: [ "downscope" ]
+
+.julia-job:
+    variables:
+        SLURM_PARAM_ACCOUNT: "-A thes1464"
+        SLURM_PARAM_TASKS: "-n 1"
+        SLURM_PARAM_CPUS: "--cpus-per-task=24"
+        SLURM_PARAM_TIME: "-t 10:00:00"
+    before_script:
+        - source /work/co693196/MA/julia.sh
+
+precompile-job:
+    extends: .julia-job
+    stage: precompile
+    script:
+        - mkdir run
+        - cd run
+        - $JULIA_EXEC --project="." -e 'using Pkg; Pkg.develop(PackageSpec(path=".."))'
+
+.test-job:
+    extends: .julia-job
+    stage: test
+    before_script:
+        - source /work/co693196/MA/julia.sh
+        - mkdir run
+        - cd run
+        - $JULIA_EXEC --project="." -e 'using Pkg; Pkg.add(["OrdinaryDiffEq", "KernelAbstractions"]); Pkg.develop(PackageSpec(path=".."));'
+
+.benchmark-job:
+    extends: .julia-job
+    stage: benchmark
+    before_script:
+        - source /work/co693196/MA/julia.sh
+        - mkdir run
+        - cd run
+        - $JULIA_EXEC --project="." -e 'using Pkg; Pkg.add(["OrdinaryDiffEq", "KernelAbstractions", "BenchmarkTools"]); Pkg.develop(PackageSpec(path=".."));'
+
+cpu-test-job:
+    extends: .test-job
+    script:
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_tree_2d_advection.jl"), offload=false)'
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_p4est_2d.jl"), offload=false)'
+
+cpu-offload-test-job:
+    extends: .test-job
+    script:
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_tree_2d_advection.jl"), offload=true)'
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi; trixi_include(pkgdir(Trixi, "test", "test_p4est_2d.jl"), offload=true)'
+
+gpu-offload-test-job:
+    extends: .test-job
+    variables:
+        SLURM_PARAM_GPUS: "--gres=gpu:volta:1"
+        SLURM_PARAM_PARTITION: "--partition=c18g"
+    script:
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Pkg; Pkg.add("CUDA")'
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, CUDA; using CUDA.CUDAKernels; trixi_include(pkgdir(Trixi, "test", "test_tree_2d_advection.jl"), offload=true, backend=CUDABackend())'
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, CUDA; using CUDA.CUDAKernels; trixi_include(pkgdir(Trixi, "test", "test_p4est_2d.jl"), offload=true, backend=CUDABackend())'
+
+cpu-benchmark-job:
+    extends: .benchmark-job
+    script:
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, BenchmarkTools; show(stderr, "text/plain", @benchmark trixi_include($joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl"), offload=false))' 1> /dev/null
+
+cpu-offload-benchmark-job:
+    extends: .benchmark-job
+    script:
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, BenchmarkTools; show(stderr, "text/plain", @benchmark trixi_include($joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl"), offload=true))' 1> /dev/null
+
+gpu-offload-benchmark-job:
+    extends: .benchmark-job
+    variables:
+        SLURM_PARAM_GPUS: "--gres=gpu:volta:1"
+        SLURM_PARAM_PARTITION: "--partition=c18g"
+    script:
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Pkg; Pkg.add("CUDA")'
+        - $JULIA_EXEC --project="." --threads=24 -e 'using Trixi, CUDA, CUDA.CUDAKernels, BenchmarkTools; show(stderr, "text/plain", @benchmark trixi_include($joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl"), offload=true, backend=CUDABackend()))' 1> /dev/null
diff --git a/Project.toml b/Project.toml
@@ -10,8 +10,10 @@ DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
@@ -44,10 +46,18 @@ TriplotBase = "981d1d27-644d-49a2-9326-4793e63143c3"
 TriplotRecipes = "808ab39a-a642-4abf-81ff-4cb34ebbffa3"
 
 [weakdeps]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
 
 [extensions]
+TrixiAMDGPUExt = "AMDGPU"
+TrixiCUDAExt = "CUDA"
 TrixiMakieExt = "Makie"
+TrixiMetalExt = "Metal"
+TrixiOneAPIExt = "oneAPI"
 
 [compat]
 CodeTracking = "1.0.5"
@@ -92,4 +102,8 @@ TriplotRecipes = "0.1"
 julia = "1.8"
 
 [extras]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic.jl b/examples/p4est_2d_dgsem/elixir_advection_basic.jl
@@ -3,15 +3,17 @@
 
 using OrdinaryDiffEq
 using Trixi
+using KernelAbstractions
 
 ###############################################################################
 # semidiscretization of the linear advection equation
 
 advection_velocity = (0.2, -0.7)
 equations = LinearScalarAdvectionEquation2D(advection_velocity)
+backend = CPU()
 
 # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs, backend=backend)
 
 coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
 coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y))
@@ -24,14 +26,13 @@ mesh = P4estMesh(trees_per_dimension, polydeg = 3,
                  initial_refinement_level = 1)
 
 # A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
-                                    solver)
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver; backend=backend)
 
 ###############################################################################
 # ODE solvers, callbacks etc.
 
 # Create ODE problem with time span from 0.0 to 1.0
-ode = semidiscretize(semi, (0.0, 1.0));
+ode = semidiscretize(semi, (0.0, 1.0); offload=false, backend=backend);
 
 # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
 # and resets the timers

diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_fd.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_fd.jl
@@ -0,0 +1,63 @@
+using OrdinaryDiffEq
+using Trixi
+using KernelAbstractions
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+backend = CPU()
+
+advection_velocity = (0.2, -0.7, 0.5)
+equations = LinearScalarAdvectionEquation3D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralFluxDifferencing(flux_lax_friedrichs), backend=backend)
+
+coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z))
+coordinates_max = ( 1.0,  1.0,  1.0) # maximum coordinates (max(x), max(y), max(z))
+
+# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`)
+trees_per_dimension = (4, 4, 4)
+mesh = P4estMesh(trees_per_dimension, polydeg=1,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 initial_refinement_level=1)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver; backend=backend)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan; offload=false, backend=backend)
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval=100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval=100,
+                                     solution_variables=cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl=1.2)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl
@@ -0,0 +1,80 @@
+using OrdinaryDiffEq
+using Trixi
+using KernelAbstractions
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations3D(1.4)
+
+"""
+    initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
+
+The classical inviscid Taylor-Green vortex.
+"""
+function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
+  A  = 1.0 # magnitude of speed
+  Ms = 0.1 # maximum Mach number
+
+  rho = 1.0
+  v1  =  A * sin(x[1]) * cos(x[2]) * cos(x[3])
+  v2  = -A * cos(x[1]) * sin(x[2]) * cos(x[3])
+  v3  = 0.0
+  p   = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms
+  p   = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3]))
+
+  return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+backend = CPU()
+
+initial_condition = initial_condition_taylor_green_vortex
+
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralFluxDifferencing(flux_lax_friedrichs), backend=backend)
+
+coordinates_min = (-1.0, -1.0, -1.0) .* pi
+coordinates_max = ( 1.0,  1.0,  1.0) .* pi
+
+# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`)
+trees_per_dimension = (4, 4, 4)
+mesh = P4estMesh(trees_per_dimension, polydeg=1,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 initial_refinement_level=1)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver; backend=backend)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 5.0)
+ode = semidiscretize(semi, tspan; offload=true, backend=backend)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true,
+                                     solution_variables=cons2prim)
+
+stepsize_callback = StepsizeCallback(cfl=0.9)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        save_solution,
+                        stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_advection_basic.jl b/examples/tree_2d_dgsem/elixir_advection_basic.jl
@@ -1,15 +1,17 @@
 
 using OrdinaryDiffEq
+using KernelAbstractions
 using Trixi
 
 ###############################################################################
 # semidiscretization of the linear advection equation
 
+backend = CPU()
 advection_velocity = (0.2, -0.7)
 equations = LinearScalarAdvectionEquation2D(advection_velocity)
 
 # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs, backend=backend)
 
 coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
 coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y))
@@ -20,14 +22,14 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
                 n_cells_max = 30_000) # set maximum capacity of tree data structure
 
 # A semidiscretization collects data structures and functions for the spatial discretization
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
-                                    solver)
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver; backend=backend)
+
 
 ###############################################################################
 # ODE solvers, callbacks etc.
 
 # Create ODE problem with time span from 0.0 to 1.0
-ode = semidiscretize(semi, (0.0, 1.0));
+ode = semidiscretize(semi, (0.0, 1.0); offload=false, backend=backend);
 
 # At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
 # and resets the timers

diff --git a/ext/TrixiAMDGPUExt.jl b/ext/TrixiAMDGPUExt.jl
@@ -0,0 +1,19 @@
+# Package extension for some GPGPU API calls missing in KernelAbstractions
+
+module TrixiAMDGPUExt
+
+using Trixi
+if isdefined(Base, :get_extension)
+    using AMDGPU: ROCArray
+    using AMDGPU.ROCKernels: ROCBackend
+else
+  # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
+    using ..AMDGPU: ROCArray
+    using ..AMDGPU.ROCKernels: ROCBackend
+end
+
+function Trixi.get_array_type(backend::ROCBackend)
+    return ROCArray
+end
+
+end