diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl
index 45f03fba8fe..b0afd02aff8 100644
--- a/src/callbacks_step/amr.jl
+++ b/src/callbacks_step/amr.jl
@@ -228,7 +228,7 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,
 
     if mpi_isparallel()
         # Collect lambda for all elements
-        lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
+        lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(mesh, dg, cache))
         # Use parent because n_elements_by_rank is an OffsetArray
         recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
         MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
@@ -380,7 +380,7 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,
         error("MPI has not been verified yet for parabolic AMR")
 
         # Collect lambda for all elements
-        lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
+        lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(mesh, dg, cache))
         # Use parent because n_elements_by_rank is an OffsetArray
         recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
         MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 7b4a97c2a79..2c8497dc28d 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -308,11 +308,11 @@ function (analysis_callback::AnalysisCallback)(u_ode, du_ode, integrator, semi)
         mpi_println("                 " * "              " *
                     "               " *
                     " PID:            " * @sprintf("%10.8e s", performance_index))
-        mpi_println(" #DOFs per field:" * @sprintf("% 14d", ndofs(semi)) *
+        mpi_println(" #DOFs per field:" * @sprintf("% 14d", ndofsglobal(semi)) *
                     "               " *
                     " alloc'd memory: " * @sprintf("%14.3f MiB", memory_use))
         mpi_println(" #elements:      " *
-                    @sprintf("% 14d", nelements(mesh, solver, cache)))
+                    @sprintf("% 14d", nelementsglobal(mesh, solver, cache)))
 
         # Level information (only show for AMR)
         print_amr_information(integrator.opts.callback, mesh, solver, cache)
@@ -494,88 +494,53 @@ function print_amr_information(callbacks, mesh, solver, cache)
     # Return early if there is nothing to print
     uses_amr(callbacks) || return nothing
 
-    levels = Vector{Int}(undef, nelements(solver, cache))
-    min_level = typemax(Int)
-    max_level = typemin(Int)
-    for element in eachelement(solver, cache)
-        current_level = mesh.tree.levels[cache.elements.cell_ids[element]]
-        levels[element] = current_level
-        min_level = min(min_level, current_level)
-        max_level = max(max_level, current_level)
+    # Get global minimum and maximum level from the AMRController
+    min_level = max_level = 0
+    for cb in callbacks.discrete_callbacks
+        if cb.affect! isa AMRCallback
+            min_level = cb.affect!.controller.base_level
+            max_level = cb.affect!.controller.max_level
+        end
     end
 
+    # Get local element count per level
+    elements_per_level = get_elements_per_level(min_level, max_level, mesh, solver,
+                                                cache)
+
+    # Sum up across all ranks
+    MPI.Reduce!(elements_per_level, +, mpi_root(), mpi_comm())
+
+    # Print
     for level in max_level:-1:(min_level + 1)
         mpi_println(" ├── level $level:    " *
-                    @sprintf("% 14d", count(==(level), levels)))
+                    @sprintf("% 14d", elements_per_level[level + 1 - min_level]))
     end
     mpi_println(" └── level $min_level:    " *
-                @sprintf("% 14d", count(==(min_level), levels)))
+                @sprintf("% 14d", elements_per_level[1]))
 
     return nothing
 end
 
-# Print level information only if AMR is enabled
-function print_amr_information(callbacks, mesh::P4estMesh, solver, cache)
-
-    # Return early if there is nothing to print
-    uses_amr(callbacks) || return nothing
-
+function get_elements_per_level(min_level, max_level, mesh::P4estMesh, solver, cache)
     elements_per_level = zeros(P4EST_MAXLEVEL + 1)
 
     for tree in unsafe_wrap_sc(p4est_tree_t, mesh.p4est.trees)
         elements_per_level .+= tree.quadrants_per_level
     end
 
-    # levels start at zero but Julia's standard indexing starts at 1
-    min_level_1 = findfirst(i -> i > 0, elements_per_level)
-    max_level_1 = findlast(i -> i > 0, elements_per_level)
-
-    # Check if there is at least one level with an element
-    if isnothing(min_level_1) || isnothing(max_level_1)
-        return nothing
-    end
-
-    min_level = min_level_1 - 1
-    max_level = max_level_1 - 1
-
-    for level in max_level:-1:(min_level + 1)
-        mpi_println(" ├── level $level:    " *
-                    @sprintf("% 14d", elements_per_level[level + 1]))
-    end
-    mpi_println(" └── level $min_level:    " *
-                @sprintf("% 14d", elements_per_level[min_level + 1]))
-
-    return nothing
+    return @view(elements_per_level[(min_level + 1):(max_level + 1)])
 end
 
-# Print level information only if AMR is enabled
-function print_amr_information(callbacks, mesh::T8codeMesh, solver, cache)
-
-    # Return early if there is nothing to print
-    uses_amr(callbacks) || return nothing
-
-    # TODO: Switch to global element levels array when MPI supported or find
-    # another solution.
+function get_elements_per_level(min_level, max_level, mesh::T8codeMesh, solver, cache)
     levels = trixi_t8_get_local_element_levels(mesh.forest)
 
-    min_level = minimum(levels)
-    max_level = maximum(levels)
-
-    mpi_println(" minlevel = $min_level")
-    mpi_println(" maxlevel = $max_level")
-
-    if min_level > 0
-        elements_per_level = [count(==(l), levels) for l in 1:max_level]
-
-        for level in max_level:-1:(min_level + 1)
-            mpi_println(" ├── level $level:    " *
-                        @sprintf("% 14d", elements_per_level[level]))
-        end
-        mpi_println(" └── level $min_level:    " *
-                    @sprintf("% 14d", elements_per_level[min_level]))
-    end
+    return [count(==(l), levels) for l in min_level:max_level]
+end
 
-    return nothing
+function get_elements_per_level(min_level, max_level, mesh::TreeMesh, solver, cache)
+    levels = [mesh.tree.levels[cache.elements.cell_ids[element]]
+              for element in eachelement(solver, cache)]
+    return [count(==(l), levels) for l in min_level:max_level]
 end
 
 # Iterate over tuples of analysis integrals in a type-stable way using "lispy tuple programming".
diff --git a/src/callbacks_step/analysis_dgmulti.jl b/src/callbacks_step/analysis_dgmulti.jl
index dc294de9e7b..1f0eec2de34 100644
--- a/src/callbacks_step/analysis_dgmulti.jl
+++ b/src/callbacks_step/analysis_dgmulti.jl
@@ -185,6 +185,13 @@ end
 SolutionAnalyzer(rd::RefElemData) = rd
 
 nelements(mesh::DGMultiMesh, ::DGMulti, other_args...) = mesh.md.num_elements
+function nelementsglobal(mesh::DGMultiMesh, solver::DGMulti, cache)
+    if mpi_isparallel()
+        error("`nelementsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
+    else
+        return ndofs(mesh, solver, cache)
+    end
+end
 function ndofsglobal(mesh::DGMultiMesh, solver::DGMulti, cache)
     if mpi_isparallel()
         error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index cddeef77bb2..b83402c5f86 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -126,7 +126,7 @@ function save_restart_file_parallel(u, time, dt, timestep,
         attributes(file)["equations"] = get_name(equations)
         attributes(file)["polydeg"] = polydeg(dg)
         attributes(file)["n_vars"] = nvariables(equations)
-        attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+        attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache)
         attributes(file)["mesh_type"] = get_name(mesh)
         attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
         attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
@@ -239,7 +239,7 @@ function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estM
         if read(attributes(file)["polydeg"]) != polydeg(dg)
             error("restart mismatch: polynomial degree in solver differs from value in restart file")
         end
-        if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache)
+        if read(attributes(file)["n_elements"]) != nelementsglobal(mesh, dg, cache)
             error("restart mismatch: number of elements in solver differs from value in restart file")
         end
 
diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index 7367886ca94..deae8f7c930 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -158,7 +158,7 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars,
         attributes(file)["equations"] = get_name(equations)
         attributes(file)["polydeg"] = polydeg(dg)
         attributes(file)["n_vars"] = n_vars
-        attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+        attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache)
         attributes(file)["mesh_type"] = get_name(mesh)
         attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
         attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
@@ -183,7 +183,7 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars,
             # Need to create dataset explicitly in parallel case
             var = create_dataset(file, "/element_variables_$v",
                                  datatype(eltype(element_variable)),
-                                 dataspace((nelementsglobal(dg, cache),)))
+                                 dataspace((nelementsglobal(mesh, dg, cache),)))
 
             # Write data of each process in slices (ranks start with 0)
             slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2]
@@ -230,7 +230,7 @@ function save_solution_file_on_root(data, time, dt, timestep, n_vars,
         attributes(file)["equations"] = get_name(equations)
         attributes(file)["polydeg"] = polydeg(dg)
         attributes(file)["n_vars"] = n_vars
-        attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+        attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache)
         attributes(file)["mesh_type"] = get_name(mesh)
         attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
         attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl
index 6bb98196231..2046220aeca 100644
--- a/src/meshes/p4est_mesh.jl
+++ b/src/meshes/p4est_mesh.jl
@@ -94,6 +94,7 @@ end
 end
 # returns Int32 by default which causes a weird method error when creating the cache
 @inline ncells(mesh::P4estMesh) = Int(mesh.p4est.local_num_quadrants[])
+@inline ncellsglobal(mesh::P4estMesh) = Int(mesh.p4est.global_num_quadrants[])
 
 function Base.show(io::IO, mesh::P4estMesh)
     print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}")
@@ -105,7 +106,7 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::P4estMesh)
     else
         setup = [
             "#trees" => ntrees(mesh),
-            "current #cells" => ncells(mesh),
+            "current #cells" => ncellsglobal(mesh),
             "polydeg" => length(mesh.nodes) - 1,
         ]
         summary_box(io,
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 0af4c6ae023..10a042be3ba 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -80,6 +80,7 @@ const ParallelT8codeMesh{NDIMS} = T8codeMesh{NDIMS, <:Real, <:True}
 
 @inline ntrees(mesh::T8codeMesh) = size(mesh.tree_node_coordinates)[end]
 @inline ncells(mesh::T8codeMesh) = Int(t8_forest_get_local_num_elements(mesh.forest))
+@inline ncellsglobal(mesh::T8codeMesh) = Int(t8_forest_get_global_num_elements(mesh.forest))
 
 function Base.show(io::IO, mesh::T8codeMesh)
     print(io, "T8codeMesh{", ndims(mesh), ", ", real(mesh), "}")
@@ -91,7 +92,7 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::T8codeMesh)
     else
         setup = [
             "#trees" => ntrees(mesh),
-            "current #cells" => ncells(mesh),
+            "current #cells" => ncellsglobal(mesh),
             "polydeg" => length(mesh.nodes) - 1,
         ]
         summary_box(io,
diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl
index 8518cf27fd3..c6b82d5f37b 100644
--- a/src/semidiscretization/semidiscretization.jl
+++ b/src/semidiscretization/semidiscretization.jl
@@ -15,6 +15,19 @@ Return the number of degrees of freedom associated with each scalar variable.
     ndofs(mesh, solver, cache)
 end
 
+"""
+    ndofsglobal(semi::AbstractSemidiscretization)
+
+Return the global number of degrees of freedom associated with each scalar variable across all MPI ranks.
+This is the same as [`ndofs`](@ref) for simulations running in serial or
+parallelized via threads. It will in general be different for simulations
+running in parallel with MPI.
+"""
+@inline function ndofsglobal(semi::AbstractSemidiscretization)
+    mesh, _, solver, cache = mesh_equations_solver_cache(semi)
+    ndofsglobal(mesh, solver, cache)
+end
+
 """
     integrate_via_indices(func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true)
 
@@ -397,6 +410,7 @@ end
 # TODO: Taal, document interface?
 # New mesh/solver combinations have to implement
 # - ndofs(mesh, solver, cache)
+# - ndofsgloabal(mesh, solver, cache)
 # - ndims(mesh)
 # - nnodes(solver)
 # - real(solver)
diff --git a/src/semidiscretization/semidiscretization_coupled.jl b/src/semidiscretization/semidiscretization_coupled.jl
index cc629d1a674..c5c21584dca 100644
--- a/src/semidiscretization/semidiscretization_coupled.jl
+++ b/src/semidiscretization/semidiscretization_coupled.jl
@@ -81,7 +81,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationCoupled)
                          semi.semis[i].source_terms)
             summary_line(increment_indent(io), "solver", solver |> typeof |> nameof)
         end
-        summary_line(io, "total #DOFs per field", ndofs(semi))
+        summary_line(io, "total #DOFs per field", ndofsglobal(semi))
         summary_footer(io)
     end
 end
@@ -123,6 +123,18 @@ end
     sum(ndofs, semi.semis)
 end
 
+"""
+    ndofsglobal(semi::SemidiscretizationCoupled)
+    
+Return the global number of degrees of freedom associated with each scalar variable across all MPI ranks, and summed up over all coupled systems.
+This is the same as [`ndofs`](@ref) for simulations running in serial or
+parallelized via threads. It will in general be different for simulations
+running in parallel with MPI.
+"""
+@inline function ndofsglobal(semi::SemidiscretizationCoupled)
+    sum(ndofsglobal, semi.semis)
+end
+
 function compute_coefficients(t, semi::SemidiscretizationCoupled)
     @unpack u_indices = semi
 
diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl
index dcd211671c8..7c82a132a0b 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic.jl
@@ -315,7 +315,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperboli
 
         summary_line(io, "source terms", semi.source_terms)
         summary_line(io, "solver", semi.solver |> typeof |> nameof)
-        summary_line(io, "total #DOFs per field", ndofs(semi))
+        summary_line(io, "total #DOFs per field", ndofsglobal(semi))
         summary_footer(io)
     end
 end
diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
index 57724374acb..16f8da21c1e 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
@@ -229,7 +229,7 @@ function Base.show(io::IO, ::MIME"text/plain",
         summary_line(io, "source terms", semi.source_terms)
         summary_line(io, "solver", semi.solver |> typeof |> nameof)
         summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof)
-        summary_line(io, "total #DOFs per field", ndofs(semi))
+        summary_line(io, "total #DOFs per field", ndofsglobal(semi))
         summary_footer(io)
     end
 end
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 0ab947e697a..fb4c8f182e0 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -459,7 +459,7 @@ In particular, not the nodes themselves are returned.
 # `mesh` for some combinations of mesh/solver.
 @inline nelements(mesh, dg::DG, cache) = nelements(dg, cache)
 @inline function ndofsglobal(mesh, dg::DG, cache)
-    nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh)
+    nelementsglobal(mesh, dg, cache) * nnodes(dg)^ndims(mesh)
 end
 
 """
@@ -517,7 +517,7 @@ In particular, not the mortars themselves are returned.
 @inline eachmpimortar(dg::DG, cache) = Base.OneTo(nmpimortars(dg, cache))
 
 @inline nelements(dg::DG, cache) = nelements(cache.elements)
-@inline function nelementsglobal(dg::DG, cache)
+@inline function nelementsglobal(mesh, dg::DG, cache)
     mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache)
 end
 @inline ninterfaces(dg::DG, cache) = ninterfaces(cache.interfaces)