diff --git a/examples/p4est_2d_dgsem/elixir_shallowwater_wellbalanced.jl b/examples/p4est_2d_dgsem/elixir_shallowwater_wellbalanced.jl new file mode 100644 index 0000000..5be32db --- /dev/null +++ b/examples/p4est_2d_dgsem/elixir_shallowwater_wellbalanced.jl @@ -0,0 +1,186 @@ + +using Downloads: download +using OrdinaryDiffEq +using Trixi +using TrixiShallowWater + +############################################################################### +# semidiscretization of the shallow water equations with a continuous +# bottom topography function + +equations = ShallowWaterEquationsWetDry2D(gravity_constant = 9.812, H0 = 3.0) + +function initial_condition_wb_testing(x, t, equations::ShallowWaterEquationsWetDry2D) + # Set up polar coordinates + inicenter = SVector(0.15, 0.15) + x_norm = x[1] - inicenter[1] + y_norm = x[2] - inicenter[2] + r = sqrt(x_norm^2 + y_norm^2) + + # Calculate primitive variables + v1 = 0.0 + v2 = 0.0 + # v1 = r < 0.6 ? 1.75 : 0.0 + # v2 = r < 0.6 ? -1.75 : 0.0 + # bottom topography taken from Pond.control in [HOHQMesh](https://github.com/trixi-framework/HOHQMesh) + x1, x2 = x + b = (1.5 / exp(0.5 * ((x1 - 1.0)^2 + (x2 - 1.0)^2)) + + + 0.75 / exp(0.5 * ((x1 + 1.0)^2 + (x2 + 1.0)^2))) + # H = equations.H0 + H = max(equations.H0, b + equations.threshold_limiter) + + return prim2cons(SVector(H, v1, v2, b), equations) +end + +initial_condition = initial_condition_wb_testing + +boundary_condition = Dict(:OuterCircle => boundary_condition_slip_wall) + +############################################################################### +# Get the DG approximation space + +# # ersing flux in both +# volume_flux = (flux_wintermeyer_etal, flux_nonconservative_ersing_etal) +# surface_fluxes = (flux_wintermeyer_etal, flux_nonconservative_ersing_etal) + +# Wintermeyer flux in the volume +volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) + +# # Wintermeyer flux in the surface for testing +# surface_fluxes = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) + +surface_fluxes = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle), + flux_nonconservative_chen_noelle) + + +# # Fjordholms flux for testing +# surface_fluxes = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal) + +# Audusse HR with Rusanov flux +# surface_fluxes = (FluxHydrostaticReconstruction(flux_lax_friedrichs, +# hydrostatic_reconstruction_audusse_etal), +# flux_nonconservative_audusse_etal) + +# Audusse HR with HLL flux +# surface_fluxes = (FluxHydrostaticReconstruction(flux_hll, hydrostatic_reconstruction_audusse_etal), +# flux_nonconservative_audusse_etal) + +# Create the solver +solver = DGSEM(polydeg = 4, surface_flux = surface_fluxes, + volume_integral = VolumeIntegralFluxDifferencing(volume_flux)) + +############################################################################### +# This setup is for the curved, split form well-balancedness testing + +# Get the unstructured quad mesh from a file (downloads the file if not available locally) + +default_mesh_file = joinpath(@__DIR__, "abaqus_outer_circle.inp") +isfile(default_mesh_file) || + download("https://gist.githubusercontent.com/andrewwinters5000/df92dd4986909927e96af23c37f6db5f/raw/8620823342f98c505a36351b210aab7f3b368041/abaqus_outer_circle.inp", + default_mesh_file) +mesh_file = default_mesh_file + +mesh_file = joinpath(@__DIR__, "pond.inp") + +mesh = P4estMesh{2}(mesh_file) + +# Refine bottom left quadrant of each tree to level 2 +function refine_fn(p4est, which_tree, quadrant) + quadrant_obj = unsafe_load(quadrant) + if quadrant_obj.x == 0 && quadrant_obj.y == 0 && quadrant_obj.level < 2 + # return true (refine) + return Cint(1) + else + # return false (don't refine) + return Cint(0) + end +end + +# Refine recursively until each bottom left quadrant of a tree has level 2 +# The mesh will be rebalanced before the simulation starts +refine_fn_c = @cfunction(refine_fn, Cint, + (Ptr{Trixi.p4est_t}, Ptr{Trixi.p4est_topidx_t}, + Ptr{Trixi.p4est_quadrant_t})) +Trixi.refine_p4est!(mesh.p4est, true, refine_fn_c, C_NULL) + +# Create the semi discretization object +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + boundary_conditions = boundary_condition) + +############################################################################### +# ODE solvers, callbacks, etc. + +tspan = (0.0, 3.0) +ode = semidiscretize(semi, tspan) + +############################################################################### +# # Workaround to set a discontinuous bottom topography for debugging and testing. + +# # alternative version of the initial conditinon used to setup a truly discontinuous +# # bottom topography function for this academic testcase. +# # The errors from the analysis callback are not important but the error for this lake at rest test case +# # `∑|H0-(h+b)|` should be around machine roundoff +# # In contrast to the usual signature of initial conditions, this one get passed the +# # `element_id` explicitly. In particular, this initial conditions works as intended +# # only for the specific mesh loaded above! +# function initial_condition_discontinuous_well_balancedness(x, t, element_id, equations::ShallowWaterEquations2D) +# # Set the background values +# H = equations.H0 +# v1 = 0.0 +# v2 = 0.0 + +# x1, x2 = x +# b = ( 1.5 / exp( 0.5 * ((x1 - 1.0)^2 + (x2 - 1.0)^2) ) +# + 0.75 / exp( 0.5 * ((x1 + 1.0)^2 + (x2 + 1.0)^2) ) ) + +# # Setup a discontinuous bottom topography using the element id number +# # if element_id > 50 && element_id < 80 # for the original mesh file +# # if element_id > 200 && element_id < 300 # for no hanging node grid with < 1 in function above +# if element_id > 300 && element_id < 400 # for the forced hanging node grid with < 2 in function above +# # if element_id > 400 && element_id < 650 # for the forced hanging node grid with < 3 in function above +# b = 0.0 +# end + +# return prim2cons(SVector(H, v1, v2, b), equations) +# end + +# # point to the data we want to augment +# u = Trixi.wrap_array(ode.u0, semi) +# # reset the initial condition +# for element in eachelement(semi.solver, semi.cache) +# for j in eachnode(semi.solver), i in eachnode(semi.solver) +# x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, semi.solver, i, j, element) +# u_node = initial_condition_discontinuous_well_balancedness(x_node, first(tspan), element, equations) +# Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, j, element) +# end +# end +############################################################################### + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + extra_analysis_integrals = (lake_at_rest_error,)) + +alive_callback = AliveCallback(analysis_interval = analysis_interval) + +save_solution = SaveSolutionCallback(dt = 1.0, + save_initial_solution = true, + save_final_solution = true) + +stepsize_callback = StepsizeCallback(cfl = 2.0) + +callbacks = CallbackSet(summary_callback, + analysis_callback, + alive_callback, + save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep = false, callback = callbacks); +summary_callback() # print the timer summary diff --git a/src/TrixiShallowWater.jl b/src/TrixiShallowWater.jl index 5522fc5..0b73535 100644 --- a/src/TrixiShallowWater.jl +++ b/src/TrixiShallowWater.jl @@ -1,8 +1,8 @@ module TrixiShallowWater -# While `using Trixi` makes all exported symbols available, in order to extend a method from the +# While `using Trixi` makes all exported symbols available, in order to extend a method from the # `Trixi.jl` module, symbols need to be explicitly qualified with `Trixi.function_name`. -# For more information, see +# For more information, see # https://github.com/trixi-framework/TrixiShallowWater.jl/pull/10#discussion_r1433720559 using Trixi # Import additional symbols that are not exported by Trixi.jl @@ -16,6 +16,7 @@ include("equations/equations.jl") include("equations/numerical_fluxes.jl") include("callbacks_stage/callbacks_stage.jl") include("solvers/indicators.jl") +include("solvers/scratch_p4est.jl") # Export types/functions that define the public API of TrixiShallowWater.jl export ShallowWaterEquationsWetDry1D, ShallowWaterEquationsWetDry2D, diff --git a/src/solvers/scratch_p4est.jl b/src/solvers/scratch_p4est.jl new file mode 100644 index 0000000..e4e6b09 --- /dev/null +++ b/src/solvers/scratch_p4est.jl @@ -0,0 +1,302 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +function Trixi.prolong2mortars!(cache, u, + mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + equations::ShallowWaterEquationsWetDry2D, + mortar_l2::Trixi.LobattoLegendreMortarL2, + surface_integral, dg::DGSEM) + @unpack neighbor_ids, node_indices = cache.mortars + index_range = eachnode(dg) + + Trixi.@threaded for mortar in Trixi.eachmortar(dg, cache) + # Copy solution data from the small elements using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + small_indices = node_indices[1, mortar] + + i_small_start, i_small_step = Trixi.index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = Trixi.index_to_start_step_2d(small_indices[2], + index_range) + + for position in 1:2 + i_small = i_small_start + j_small = j_small_start + element = neighbor_ids[position, mortar] + for i in eachnode(dg) + # TODO: need to form the sigma variable from Benov et al. (essentially H = h+b) + # and then save sigma, momenta, and bottom topography into the buffer for projection + temp = zeros(eltype(cache.mortars.u), 4) + if u[1, i_small, j_small, element] > equations.threshold_wet + temp[1] = u[1, i_small, j_small, element] + u[4, i_small, j_small, element] + else + temp[1] = equations.H0 + end + temp[2:4] = u[2:4, i_small, j_small, element] + + # println(temp) + + for v in eachvariable(equations) + cache.mortars.u[1, v, position, i, mortar] = temp[v] + # cache.mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small, + # element] + end + i_small += i_small_step + j_small += j_small_step + end + end + + # Buffer to copy solution values of the large element in the correct orientation + # before interpolating + u_buffer = cache.u_threaded[Threads.threadid()] + + # Copy solution of large element face to buffer in the + # correct orientation + large_indices = node_indices[2, mortar] + + i_large_start, i_large_step = Trixi.index_to_start_step_2d(large_indices[1], + index_range) + j_large_start, j_large_step = Trixi.index_to_start_step_2d(large_indices[2], + index_range) + + i_large = i_large_start + j_large = j_large_start + element = neighbor_ids[3, mortar] + for i in eachnode(dg) + # TODO: need to form the sigma variable from Benov et al. (essentially H = h+b) + # and then save sigma, momenta, and bottom topography into the buffer for projection + temp = zeros(eltype(cache.mortars.u), 4) + if u[1, i_large, j_large, element] > equations.threshold_wet + temp[1] = u[1, i_large, j_large, element] + u[4, i_large, j_large, element] + else + temp[1] = equations.H0 + end + temp[2:4] = u[2:4, i_large, j_large, element] + + for v in eachvariable(equations) + u_buffer[v, i] = temp[v] + # u_buffer[v, i] = u[v, i_large, j_large, element] + end + i_large += i_large_step + j_large += j_large_step + end + + # Interpolate large element face data from buffer to small face locations + Trixi.multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, mortar), + mortar_l2.forward_lower, + u_buffer) + Trixi.multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, mortar), + mortar_l2.forward_upper, + u_buffer) + # # TODO: Force the bottom topography to not be projected? + # cache.mortars.u[2, 4, 1, :, mortar] .= cache.mortars.u[1, 4, 1, :, mortar] + # cache.mortars.u[2, 4, 2, :, mortar] .= cache.mortars.u[1, 4, 2, :, mortar] + end + + return nothing +end + +function Trixi.calc_mortar_flux!(surface_flux_values, + mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + nonconservative_terms, + equations::ShallowWaterEquationsWetDry2D, + mortar_l2::Trixi.LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.mortars + @unpack contravariant_vectors = cache.elements + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + index_range = eachnode(dg) + + # println("in mortar flux computation") + + Trixi.@threaded for mortar in Trixi.eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar = (fstar_lower_threaded[Threads.threadid()], + fstar_upper_threaded[Threads.threadid()]) + + # Get index information on the small elements + small_indices = node_indices[1, mortar] + small_direction = Trixi.indices2direction(small_indices) + + i_small_start, i_small_step = Trixi.index_to_start_step_2d(small_indices[1], + index_range) + j_small_start, j_small_step = Trixi.index_to_start_step_2d(small_indices[2], + index_range) + + for position in 1:2 + i_small = i_small_start + j_small = j_small_start + element = neighbor_ids[position, mortar] + for node in eachnode(dg) + # Get the normal direction on the small element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = Trixi.get_normal_direction(small_direction, + contravariant_vectors, + i_small, j_small, element) + + Trixi.calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations, + surface_integral, dg, cache, + mortar, position, normal_direction, + node) + + i_small += i_small_step + j_small += j_small_step + end + end + + # Buffer to interpolate flux values of the large element to before + # copying in the correct orientation + u_buffer = cache.u_threaded[Threads.threadid()] + + # in calc_interface_flux!, the interface flux is computed once over each + # interface using the normal from the "primary" element. The result is then + # passed back to the "secondary" element, flipping the sign to account for the + # change in the normal direction. For mortars, this sign flip occurs in + # "mortar_fluxes_to_elements!" instead. + Trixi.mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar, u_buffer) + end + + # wololo() + + return nothing +end + +# Inlined version of the mortar flux computation on small elements for equations with conservative and +# nonconservative terms +@inline function Trixi.calc_mortar_flux!(fstar, + mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + nonconservative_terms::True, + equations::ShallowWaterEquationsWetDry2D, + surface_integral, dg::DG, cache, + mortar_index, position_index, normal_direction, + node_index) + @unpack u = cache.mortars + surface_flux, nonconservative_flux = surface_integral.surface_flux + + u_ll, u_rr = Trixi.get_surface_node_vars(u, equations, dg, position_index, node_index, + mortar_index) + # TODO: These surface values are still in terms of sigma, momenta, and bottom topography + # (1) unpack the sigma to create the water height on the mortar from Eq. 41 in Benov et al. + # (2) perform hydrostatic reconstruction on the mortar solution + # (3) use these HR quantities to compute the flux and nonconservative terms + # These last two steps, as far as I recall, occur within the surface flux from the + # provided solution states + u_ll_temp = zeros(eltype(u_ll), 4) + u_rr_temp = zeros(eltype(u_rr), 4) + + # go back to the conservative variables on either side + u_ll_temp[1] = max(u_ll[1] - u_ll[4], equations.threshold_wet) # zero(eltype(u_ll))) + u_ll_temp[2:4] = u_ll[2:4] + + u_rr_temp[1] = max(u_rr[1] - u_rr[4], equations.threshold_wet) # zero(eltype(u_rr))) + u_rr_temp[2:4] = u_rr[2:4] + + # println(u_rr_temp[4] - u_ll_temp[4]) + + # println(u_ll_temp) + # println(u_rr_temp) + # println() + + # Compute conservative flux + flux = surface_flux(u_ll_temp, u_rr_temp, normal_direction, equations) + # flux = surface_flux(u_ll, u_rr, normal_direction, equations) + + # Compute nonconservative flux and add it to the conservative flux. + # The nonconservative flux is scaled by a factor of 0.5 based on + # the interpretation of global SBP operators coupled discontinuously via + # central fluxes/SATs + noncons = nonconservative_flux(u_ll_temp, u_rr_temp, + normal_direction, normal_direction, + equations) + # noncons = nonconservative_flux(u_rr_temp, u_ll_temp, + # normal_direction, normal_direction, + # equations) + # noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, + # equations) + + flux_plus_noncons = flux + 0.5 * noncons + # println(flux) + # println(surface_flux(u_ll_temp, u_ll_temp, normal_direction, equations)) + # println(nonconservative_flux(u_ll_temp, u_rr_temp, + # normal_direction, normal_direction, + # equations)) + # println(noncons) + # println() + + # Copy to buffer + set_node_vars!(fstar[position_index], flux_plus_noncons, equations, dg, node_index) +end + +@inline function Trixi.mortar_fluxes_to_elements!(surface_flux_values, + mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + equations::ShallowWaterEquationsWetDry2D, + mortar_l2::Trixi.LobattoLegendreMortarL2, + dg::DGSEM, cache, mortar, fstar, u_buffer) + @unpack neighbor_ids, node_indices = cache.mortars + + # Copy solution small to small + small_indices = node_indices[1, mortar] + small_direction = Trixi.indices2direction(small_indices) + + for position in 1:2 + element = neighbor_ids[position, mortar] + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, small_direction, element] = fstar[position][v, + i] + end + end + end + + # TODO: Does this back projection need modified, e.g., as a strong form penalty? + + # Project small fluxes to large element. + Trixi.multiply_dimensionwise!(u_buffer, + mortar_l2.reverse_upper, fstar[2], + mortar_l2.reverse_lower, fstar[1]) + + # println(u_buffer[2,:]) + + # The flux is calculated in the outward direction of the small elements, + # so the sign must be switched to get the flux in outward direction + # of the large element. + # The contravariant vectors of the large element (and therefore the normal + # vectors of the large element as well) are twice as large as the + # contravariant vectors of the small elements. Therefore, the flux needs + # to be scaled by a factor of 2 to obtain the flux of the large element. + u_buffer .*= -2 + + # Copy interpolated flux values from buffer to large element face in the + # correct orientation. + # Note that the index of the small sides will always run forward but + # the index of the large side might need to run backwards for flipped sides. + large_element = neighbor_ids[3, mortar] + large_indices = node_indices[2, mortar] + large_direction = Trixi.indices2direction(large_indices) + + if :i_backward in large_indices + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, + i] + end + end + else + for i in eachnode(dg) + for v in eachvariable(equations) + surface_flux_values[v, i, large_direction, large_element] = u_buffer[v, + i] + end + end + end + + return nothing +end +end # @muladd diff --git a/src/solvers/scratch_tree.jl b/src/solvers/scratch_tree.jl new file mode 100644 index 0000000..ecddb59 --- /dev/null +++ b/src/solvers/scratch_tree.jl @@ -0,0 +1,269 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +function prolong2mortars!(cache, u, + mesh::TreeMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, surface_integral, + dg::DGSEM) + @threaded for mortar in eachmortar(dg, cache) + large_element = cache.mortars.neighbor_ids[3, mortar] + upper_element = cache.mortars.neighbor_ids[2, mortar] + lower_element = cache.mortars.neighbor_ids[1, mortar] + + # Copy solution small to small + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[2, v, l, mortar] = u[v, 1, l, + upper_element] + cache.mortars.u_lower[2, v, l, mortar] = u[v, 1, l, + lower_element] + end + end + else + # L2 mortars in y-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[2, v, l, mortar] = u[v, l, 1, + upper_element] + cache.mortars.u_lower[2, v, l, mortar] = u[v, l, 1, + lower_element] + end + end + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l, + upper_element] + cache.mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l, + lower_element] + end + end + else + # L2 mortars in y-direction + for l in eachnode(dg) + for v in eachvariable(equations) + cache.mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg), + upper_element] + cache.mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg), + lower_element] + end + end + end + end + + # Interpolate large element face data to small interface locations + if cache.mortars.large_sides[mortar] == 1 # -> large element on left side + leftright = 1 + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, nnodes(dg), :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, nnodes(dg), large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + end + else # large_sides[mortar] == 2 -> large element on right side + leftright = 2 + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + u_large = view(u, :, 1, :, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + else + # L2 mortars in y-direction + u_large = view(u, :, :, 1, large_element) + element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, + mortar, u_large) + end + end + end + + return nothing +end + +@inline function element_solutions_to_mortars!(mortars, + mortar_l2::LobattoLegendreMortarL2, + leftright, mortar, + u_large::AbstractArray{<:Any, 2}) + multiply_dimensionwise!(view(mortars.u_upper, leftright, :, :, mortar), + mortar_l2.forward_upper, u_large) + multiply_dimensionwise!(view(mortars.u_lower, leftright, :, :, mortar), + mortar_l2.forward_lower, u_large) + return nothing +end + +function calc_mortar_flux!(surface_flux_values, + mesh::TreeMesh{2}, + nonconservative_terms::True, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + surface_flux, nonconservative_flux = surface_integral.surface_flux + @unpack u_lower, u_upper, orientations, large_sides = cache.mortars + @unpack fstar_upper_threaded, fstar_lower_threaded = cache + + @threaded for mortar in eachmortar(dg, cache) + # Choose thread-specific pre-allocated container + fstar_upper = fstar_upper_threaded[Threads.threadid()] + fstar_lower = fstar_lower_threaded[Threads.threadid()] + + # Calculate fluxes + orientation = orientations[mortar] + calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, + orientation) + calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, + orientation) + + # Add nonconservative fluxes. + # These need to be adapted on the geometry (left/right) since the order of + # the arguments matters, based on the global SBP operator interpretation. + # The same interpretation (global SBP operators coupled discontinuously via + # central fluxes/SATs) explains why we need the factor 0.5. + # Alternatively, you can also follow the argumentation of Bohm et al. 2018 + # ("nonconservative diamond flux") + if large_sides[mortar] == 1 # -> small elements on right side + for i in eachnode(dg) + # Pull the left and right solutions + u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, + i, mortar) + u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, + i, mortar) + # Call pointwise nonconservative term + noncons_upper = nonconservative_flux(u_upper_ll, u_upper_rr, + orientation, equations) + noncons_lower = nonconservative_flux(u_lower_ll, u_lower_rr, + orientation, equations) + # Add to primary and secondary temporary storage + multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, + dg, i) + multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, + dg, i) + end + else # large_sides[mortar] == 2 -> small elements on the left + for i in eachnode(dg) + # Pull the left and right solutions + u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, + i, mortar) + u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, + i, mortar) + # Call pointwise nonconservative term + noncons_upper = nonconservative_flux(u_upper_rr, u_upper_ll, + orientation, equations) + noncons_lower = nonconservative_flux(u_lower_rr, u_lower_ll, + orientation, equations) + # Add to primary and secondary temporary storage + multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, + dg, i) + multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, + dg, i) + end + end + + mortar_fluxes_to_elements!(surface_flux_values, + mesh, equations, mortar_l2, dg, cache, + mortar, fstar_upper, fstar_lower) + end + + return nothing +end + +@inline function calc_fstar!(destination::AbstractArray{<:Any, 2}, equations, + surface_flux, dg::DGSEM, + u_interfaces, interface, orientation) + for i in eachnode(dg) + # Call pointwise two-point numerical flux function + u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, interface) + flux = surface_flux(u_ll, u_rr, orientation, equations) + + # Copy flux to left and right element storage + set_node_vars!(destination, flux, equations, dg, i) + end + + return nothing +end + +@inline function mortar_fluxes_to_elements!(surface_flux_values, + mesh::TreeMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, + dg::DGSEM, cache, + mortar, fstar_upper, fstar_lower) + large_element = cache.mortars.neighbor_ids[3, mortar] + upper_element = cache.mortars.neighbor_ids[2, mortar] + lower_element = cache.mortars.neighbor_ids[1, mortar] + + # Copy flux small to small + if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + else + # L2 mortars in y-direction + direction = 3 + end + else # large_sides[mortar] == 2 -> small elements on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + else + # L2 mortars in y-direction + direction = 4 + end + end + surface_flux_values[:, :, direction, upper_element] .= fstar_upper + surface_flux_values[:, :, direction, lower_element] .= fstar_lower + + # Project small fluxes to large element + if cache.mortars.large_sides[mortar] == 1 # -> large element on left side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 2 + else + # L2 mortars in y-direction + direction = 4 + end + else # large_sides[mortar] == 2 -> large element on right side + if cache.mortars.orientations[mortar] == 1 + # L2 mortars in x-direction + direction = 1 + else + # L2 mortars in y-direction + direction = 3 + end + end + + # TODO: Taal performance + # for v in eachvariable(equations) + # # The code below is semantically equivalent to + # # surface_flux_values[v, :, direction, large_element] .= + # # (mortar_l2.reverse_upper * fstar_upper[v, :] + mortar_l2.reverse_lower * fstar_lower[v, :]) + # # but faster and does not allocate. + # # Note that `true * some_float == some_float` in Julia, i.e. `true` acts as + # # a universal `one`. Hence, the second `mul!` means "add the matrix-vector + # # product to the current value of the destination". + # @views mul!(surface_flux_values[v, :, direction, large_element], + # mortar_l2.reverse_upper, fstar_upper[v, :]) + # @views mul!(surface_flux_values[v, :, direction, large_element], + # mortar_l2.reverse_lower, fstar_lower[v, :], true, true) + # end + # The code above could be replaced by the following code. However, the relative efficiency + # depends on the types of fstar_upper/fstar_lower and dg.l2mortar_reverse_upper. + # Using StaticArrays for both makes the code above faster for common test cases. + multiply_dimensionwise!(view(surface_flux_values, :, :, direction, large_element), + mortar_l2.reverse_upper, fstar_upper, + mortar_l2.reverse_lower, fstar_lower) + + return nothing +end +end # @muladd