diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..4aaf123
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,129 @@
+# SPDX-FileCopyrightText: 2024 Technical University of Munich
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+Language:        Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveMacros: false
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     100
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Preserve
+IncludeCategories:
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
+    Priority:        3
+  - Regex:           '.*'
+    Priority:        1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+IndentPPDirectives: None
+IndentWidth:     2
+IndentWrappedFunctionNames: true
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Left
+QualifierAlignment: Left
+ReflowComments:  true
+SortIncludes:    false
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseTab:          Never
+
diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 0000000..839d81b
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,357 @@
+# SPDX-FileCopyrightText: 2024 Technical University of Munich
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+Checks: |
+  -*,
+  clang-diagnostic-*,
+  cppcoreguidelines-*,
+  -cppcoreguidelines-avoid-c-arrays,
+  -cppcoreguidelines-avoid-const-or-ref-data-members,
+  -cppcoreguidelines-avoid-magic-numbers,
+  -cppcoreguidelines-avoid-do-while,
+  -cppcoreguidelines-macro-usage,
+  -cppcoreguidelines-no-malloc,
+  -cppcoreguidelines-non-private-member-variables-in-classes,
+  -cppcoreguidelines-pro-bounds-array-to-pointer-decay,
+  -cppcoreguidelines-pro-bounds-constant-array-index,
+  -cppcoreguidelines-owning-memory,
+  -cppcoreguidelines-pro-bounds-pointer-arithmetic,
+  -cppcoreguidelines-pro-type-reinterpret-cast,
+  -cppcoreguidelines-narrowing-conversions,
+  -cppcoreguidelines-pro-type-member-init,
+  -cppcoreguidelines-pro-type-const-cast
+  llvm-*,
+  -llvm-else-after-return,
+  -llvm-header-guard,
+  -llvm-qualified-auto,
+  misc-*,
+  -misc-non-private-member-variables-in-classes,
+  -misc-unused-parameters,
+  -misc-no-recursion,
+  modernize-*,
+  -modernize-avoid-c-arrays,
+  -modernize-deprecated-headers,
+  -modernize-loop-convert,
+  -modernize-pass-by-value,
+  -modernize-return-braced-init-list,
+  -modernize-use-trailing-return-type,
+  mpi-*,
+  performance-*,
+  -performance-enum-size,
+  -performance-avoid-endl,
+  readability-*,
+  -readability-else-after-return,
+  -readability-identifier-length,
+  -readability-magic-numbers,
+  -readability-math-missing-parentheses,
+  -readability-function-cognitive-complexity,
+  -readability-function-size
+  -readability-suspicious-call-argument,
+  -readability-convert-member-functions-to-static,
+WarningsAsErrors: '*'
+CheckOptions:
+#  - key:             readability-identifier-naming.AbstractClassCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.AbstractClassPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.AbstractClassSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.ClassCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassConstantCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassConstantPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassConstantSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassMemberCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassMemberPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassMemberSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.ClassMethodCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.ClassMethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ClassMethodSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.ConstantCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.ConstantPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantMemberCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantMemberPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantMemberSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantParameterCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantParameterSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantPointerParameterCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantPointerParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstantPointerParameterSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.ConstexprFunctionCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.ConstexprFunctionPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstexprFunctionSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstexprMethodCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstexprMethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstexprMethodSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.ConstexprVariableCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstexprVariablePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ConstexprVariableSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.EnumCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.EnumPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.EnumSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.EnumConstantCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.EnumConstantPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.EnumConstantSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.FunctionCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.FunctionPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.FunctionSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.GlobalConstantCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalConstantPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalConstantSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalConstantPointerCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalConstantPointerPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalConstantPointerSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalFunctionCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalFunctionPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalFunctionSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalPointerCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalPointerPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalPointerSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalVariableCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalVariablePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.GlobalVariableSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.IgnoreMainLikeFunctions
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.InlineNamespaceCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.InlineNamespacePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.InlineNamespaceSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.LocalConstantCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.LocalConstantPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalConstantSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalConstantPointerCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalConstantPointerPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalConstantPointerSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalPointerCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalPointerPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalPointerSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.LocalVariableCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.LocalVariablePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.LocalVariableSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.MemberCase
+    value:           camelBack
+  - key:             readability-identifier-naming.MemberIgnoredRegexp
+    value:           "^m_|_$" # TODO: temporary! Remove after more refactoring.
+#  - key:             readability-identifier-naming.MemberPrefix
+#    value:           ""
+#  - key:             readability-identifier-naming.MemberSuffix
+#    value:           ""
+#  - key:             readability-identifier-naming.MethodCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.MethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.MethodSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.NamespaceCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.NamespacePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.NamespaceSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.ParameterCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.ParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ParameterSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ParameterPackCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ParameterPackPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ParameterPackSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PointerParameterCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PointerParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PointerParameterSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PrivateMemberCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PrivateMemberPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PrivateMemberSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PrivateMethodCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PrivateMethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PrivateMethodSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ProtectedMemberCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ProtectedMemberPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ProtectedMemberSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ProtectedMethodCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ProtectedMethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ProtectedMethodSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PublicMemberCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PublicMemberPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PublicMemberSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PublicMethodCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PublicMethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.PublicMethodSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.StaticConstantCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.StaticConstantPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.StaticConstantSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.StaticVariableCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.StaticVariablePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.StaticVariableSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.StructCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.StructPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.StructSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.TemplateParameterCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.TemplateParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TemplateParameterSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.TemplateTemplateParameterCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.TemplateTemplateParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TemplateTemplateParameterSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.TypeAliasCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.TypeAliasPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TypeAliasSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.TypedefCase
+    value:           CamelCase
+#  - key:             readability-identifier-naming.TypedefPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TypedefSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TypeTemplateParameterCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TypeTemplateParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.TypeTemplateParameterSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.UnionCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.UnionPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.UnionSuffix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ValueTemplateParameterCase
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ValueTemplateParameterPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.ValueTemplateParameterSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.VariableCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.VariablePrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.VariableSuffix
+#    value:           CamelCase
+  - key:             readability-identifier-naming.VirtualMethodCase
+    value:           camelBack
+#  - key:             readability-identifier-naming.VirtualMethodPrefix
+#    value:           CamelCase
+#  - key:             readability-identifier-naming.VirtualMethodSuffix
+#    value:           CamelCase
+
+    # ignore internal GCC headers (bits/)
+  - key:             misc-include-cleaner.IgnoreHeaders
+    value:           'bits/.*'
+  - key:             cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor
+    value:           True
diff --git a/.gitignore b/.gitignore
index 2db8329..9b6818b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2017 Technical University of Munich
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
 # KDevelop
 .kdev4/
 *.kdev4
diff --git a/.gitmodules b/.gitmodules
index 998f137..01645ed 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2017 Technical University of Munich
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
 [submodule "submodules/utils"]
 	path = submodules/utils
 	url = https://github.com/TUM-I5/utils.git
diff --git a/DownElement.h b/DownElement.h
index b5ba8b1..100f53d 100644
--- a/DownElement.h
+++ b/DownElement.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -15,13 +17,9 @@
 
 #include <algorithm>
 #include <cstring>
-#include <unordered_set>
+#include <functional>
 
-namespace PUML
-{
-
-namespace internal
-{
+namespace PUML::internal {
 
 /**
  * A hashable element defined by the global ids of
@@ -29,49 +27,42 @@ namespace internal
  *
  * @tparam N The number of down elements
  */
-template<unsigned int N>
-struct DownElement
-{
-	/** The global ids of the downward elements */
-	unsigned long down[N];
+template <unsigned int N>
+struct DownElement {
+  /** The global ids of the downward elements */
+  unsigned long down[N]{};
 
-	DownElement(const unsigned long down[N])
-	{
-		memcpy(this->down, down, N*sizeof(unsigned long));
-		std::sort(this->down, this->down+N);
-	}
+  DownElement(const unsigned long down[N]) {
+    memcpy(this->down, down, N * sizeof(unsigned long));
+    std::sort(this->down, this->down + N);
+  }
 
-	bool operator==(const DownElement &other) const
-	{
-		return memcmp(down, other.down, N*sizeof(unsigned long)) == 0;
-	}
+  auto operator==(const DownElement& other) const -> bool {
+    return memcmp(down, other.down, N * sizeof(unsigned long)) == 0;
+  }
 };
 
-template<unsigned int N>
-struct DownElementHash
-{
-	std::size_t operator()(const DownElement<N> &element) const
-	{
-		std::size_t h = std::hash<unsigned long>{}(element.down[0]);
-		for (unsigned int i = 1; i < N; i++)
-			hash_combine(h, element.down[i]);
+template <unsigned int N>
+struct DownElementHash {
+  auto operator()(const DownElement<N>& element) const -> std::size_t {
+    std::size_t h = std::hash<unsigned long>{}(element.down[0]);
+    for (unsigned int i = 1; i < N; i++) {
+      hashCombine(h, element.down[i]);
+    }
 
-		return h;
-	}
+    return h;
+  }
 
-	/**
-	 * Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
-	 */
-	template<typename T>
-	static void hash_combine(std::size_t& seed, const T& v)
-	{
-		std::hash<T> hasher;
-		seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-	}
+  /**
+   * Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
+   */
+  template <typename T>
+  static void hashCombine(std::size_t& seed, const T& v) {
+    std::hash<T> hasher;
+    seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+  }
 };
 
-}
-
-}
+} // namespace PUML::internal
 
-#endif // PUML_DOWNELEMENT_H
\ No newline at end of file
+#endif // PUML_DOWNELEMENT_H
diff --git a/Downward.h b/Downward.h
index 01d38db..98b637e 100644
--- a/Downward.h
+++ b/Downward.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -17,100 +19,103 @@
 #include <cassert>
 #include <cstring>
 
-#include "utils/arrayutils.h"
-
 #include "Element.h"
 #include "Numbering.h"
 #include "PUML.h"
 #include "Topology.h"
 #include "Utils.h"
 
-namespace PUML
-{
-
-class Downward
-{
-public:
-	/**
-	 * Returns all local face ids for a cell
-	 *
-	 * @param puml The PUML mesh
-	 * @param cell The cell for which the faces should be returned
-	 * @param lid The local ids of the faces
-	 */
-	template<TopoType Topo>
-	static void faces(const PUML<Topo> &puml, const typename PUML<Topo>::cell_t &cell, unsigned int* lid)
-	{
-		for (unsigned int i = 0; i < internal::Topology<Topo>::cellfaces(); i++) {
-			unsigned int v[internal::Topology<Topo>::facevertices()];
-			faceVertices(puml, cell, i, v);
-
-			int id = puml.faceByVertices(v);
-			assert(id >= 0);
-			lid[i] = id;
-		}
-	}
-
-	/**
-	 * Returns all local vertex ids for a cell
-	 *
-	 * @param puml The PUML mesh
-	 * @param cell The cell for which the vertices should be returned
-	 * @param lid The local ids of the vertices
-	 */
-	template<TopoType Topo>
-	static void vertices(const PUML<Topo> &puml, const typename PUML<Topo>::cell_t &cell, unsigned int* lid)
-	{
-		memcpy(lid, cell.m_vertices, internal::Topology<Topo>::cellvertices() * sizeof(unsigned int));
-	}
-
-	/**
-	 * Returns all global vertex ids for a cell
-	 *
-	 * @param puml The PUML mesh
-	 * @param cell The cell for which the vertices should be returned
-	 * @param gid The global ids of the vertices
-	 */
-	template<TopoType Topo>
-	static void gvertices(const PUML<Topo> &puml, const typename PUML<Topo>::cell_t &cell, unsigned long* gid)
-	{
-		unsigned int lid[internal::Topology<Topo>::cellvertices()];
-		vertices(puml, cell, lid);
-		internal::Utils::l2g<Topo, typename PUML<Topo>::vertex_t, internal::Topology<Topo>::cellvertices()>(puml, lid, gid);
-	}
-
-	/**
-	 * @param faceId The local id of the face
-	 * @return The side of the cell this face is on or -1 of the face is on no side
-	 */
-	template<TopoType Topo>
-	static int faceSide(const PUML<Topo> &puml, const typename PUML<Topo>::cell_t &cell, unsigned int faceId)
-	{
-		unsigned int faceIds[internal::Topology<Topo>::cellfaces()];
-		faces(puml, cell, faceIds);
-
-		unsigned int* end = faceIds+internal::Topology<Topo>::cellfaces();
-
-		unsigned int* pFaceId = std::find(faceIds, end, faceId);
-		if (pFaceId == end)
-			return -1;
-
-		return pFaceId - faceIds;
-	}
-
-	/**
-	 * @param faceSide The side of the cell
-	 */
-	template<TopoType Topo>
-	static void faceVertices(const PUML<Topo> &puml, const typename PUML<Topo>::cell_t &cell, unsigned int faceSide, unsigned int* lid)
-	{
-		assert(faceSide < internal::Topology<Topo>::cellfaces());
-		for (unsigned int i = 0; i < internal::Topology<Topo>::facevertices(); i++) {
-			lid[i] = cell.m_vertices[internal::Numbering<Topo>::facevertices()[faceSide][i]];
-		}
-	}
+namespace PUML {
+
+class Downward {
+  public:
+  /**
+   * Returns all local face ids for a cell
+   *
+   * @param puml The PUML mesh
+   * @param cell The cell for which the faces should be returned
+   * @param lid The local ids of the faces
+   */
+  template <TopoType Topo>
+  static void
+      faces(const PUML<Topo>& puml, const typename PUML<Topo>::cell_t& cell, unsigned int* lid) {
+    for (unsigned int i = 0; i < internal::Topology<Topo>::cellfaces(); i++) {
+      unsigned int v[internal::Topology<Topo>::facevertices()];
+      faceVertices(puml, cell, i, v);
+
+      int id = puml.faceByVertices(v);
+      assert(id >= 0);
+      lid[i] = id;
+    }
+  }
+
+  /**
+   * Returns all local vertex ids for a cell
+   *
+   * @param puml The PUML mesh
+   * @param cell The cell for which the vertices should be returned
+   * @param lid The local ids of the vertices
+   */
+  template <TopoType Topo>
+  static void
+      vertices(const PUML<Topo>& puml, const typename PUML<Topo>::cell_t& cell, unsigned int* lid) {
+    memcpy(lid, cell.m_vertices, internal::Topology<Topo>::cellvertices() * sizeof(unsigned int));
+  }
+
+  /**
+   * Returns all global vertex ids for a cell
+   *
+   * @param puml The PUML mesh
+   * @param cell The cell for which the vertices should be returned
+   * @param gid The global ids of the vertices
+   */
+  template <TopoType Topo>
+  static void gvertices(const PUML<Topo>& puml,
+                        const typename PUML<Topo>::cell_t& cell,
+                        unsigned long* gid) {
+    unsigned int lid[internal::Topology<Topo>::cellvertices()];
+    vertices(puml, cell, lid);
+    internal::Utils::l2g<Topo,
+                         typename PUML<Topo>::vertex_t,
+                         internal::Topology<Topo>::cellvertices()>(puml, lid, gid);
+  }
+
+  /**
+   * @param faceId The local id of the face
+   * @return The side of the cell this face is on or -1 of the face is on no side
+   */
+  template <TopoType Topo>
+  static auto faceSide(const PUML<Topo>& puml,
+                       const typename PUML<Topo>::cell_t& cell,
+                       unsigned int faceId) -> int {
+    unsigned int faceIds[internal::Topology<Topo>::cellfaces()];
+    faces(puml, cell, faceIds);
+
+    unsigned int* end = faceIds + internal::Topology<Topo>::cellfaces();
+
+    unsigned int* pFaceId = std::find(faceIds, end, faceId);
+    if (pFaceId == end) {
+      return -1;
+    }
+
+    return pFaceId - faceIds;
+  }
+
+  /**
+   * @param faceSide The side of the cell
+   */
+  template <TopoType Topo>
+  static void faceVertices(const PUML<Topo>& puml,
+                           const typename PUML<Topo>::cell_t& cell,
+                           unsigned int faceSide,
+                           unsigned int* lid) {
+    assert(faceSide < internal::Topology<Topo>::cellfaces());
+    for (unsigned int i = 0; i < internal::Topology<Topo>::facevertices(); i++) {
+      lid[i] = cell.m_vertices[internal::Numbering<Topo>::facevertices()[faceSide][i]];
+    }
+  }
 };
 
-}
+} // namespace PUML
 
 #endif // PUML_DOWNWARD_H
diff --git a/Element.h b/Element.h
index c330b9b..7972cff 100644
--- a/Element.h
+++ b/Element.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -17,110 +19,98 @@
 
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML {
 
-namespace internal
-{
+namespace internal {
 
 class Utils;
 
 } // namespace internal
 
-template<TopoType Topo>
+template <TopoType Topo>
 class PUML;
 class Downward;
 class Upward;
 
-template<typename utype>
-class Element
-{
-	template<TopoType Topo>
-	friend class PUML;
-	friend class Upward;
-	friend class internal::Utils;
-
-private:
-	/** The global id */
-	unsigned long m_gid;
-
-	/** The local/global ids of the upper elements */
-	utype m_upward;
-
-public:
-	/**
-	 * @return The global ID of the element
-	 */
-	unsigned long gid() const
-	{ return m_gid; }
+template <typename Utype>
+class Element {
+  template <TopoType Topo>
+  friend class PUML;
+  friend class Upward;
+  friend class internal::Utils;
+
+  private:
+  /** The global id */
+  unsigned long m_gid;
+
+  /** The local/global ids of the upper elements */
+  Utype m_upward;
+
+  public:
+  /**
+   * @return The global ID of the element
+   */
+  [[nodiscard]] auto gid() const -> unsigned long { return m_gid; }
 };
 
 /**
  * Elements that can be on partition boundaries (all except cells)
  */
-template<typename utype>
-class BoundaryElement : public Element<utype>
-{
-	template<TopoType Topo>
-	friend class PUML;
-
-private:
-	/** A listof ranks that contain the same vertex */
-	std::vector<int> m_sharedRanks;
-
-public:
-	/**
-	 * @return <code>True</code> if the element is on a partition boundary
-	 */
-	bool isShared() const
-	{ return m_sharedRanks.size() > 0; }
-
-	/**
-	 * @return A vector of ranks that also has this element
-	 */
-	const std::vector<int>& shared() const
-	{ return m_sharedRanks; }
+template <typename Utype>
+class BoundaryElement : public Element<Utype> {
+  template <TopoType Topo>
+  friend class PUML;
+
+  private:
+  /** A listof ranks that contain the same vertex */
+  std::vector<int> m_sharedRanks;
+
+  public:
+  /**
+   * @return <code>True</code> if the element is on a partition boundary
+   */
+  [[nodiscard]] auto isShared() const -> bool { return !m_sharedRanks.empty(); }
+
+  /**
+   * @return A vector of ranks that also has this element
+   */
+  [[nodiscard]] auto shared() const -> const std::vector<int>& { return m_sharedRanks; }
 };
 
-class Vertex : public BoundaryElement<std::vector<int> >
-{
-	template<TopoType Topo>
-	friend class PUML;
-
-private:
-	double m_coordinate[3];
-
-public:
-	/**
-	 * @return A pointer to an array with 3 components containing
-	 *  x, y and z
-	 */
-	const double* coordinate() const
-	{ return m_coordinate; }
+class Vertex : public BoundaryElement<std::vector<int>> {
+  template <TopoType Topo>
+  friend class PUML;
+
+  private:
+  double m_coordinate[3]{};
+
+  public:
+  /**
+   * @return A pointer to an array with 3 components containing
+   *  x, y and z
+   */
+  [[nodiscard]] auto coordinate() const -> const double* { return m_coordinate; }
 };
 
-class Edge : public BoundaryElement<std::vector<int> >
-{
-	template<TopoType Topo>
-	friend class PUML;
+class Edge : public BoundaryElement<std::vector<int>> {
+  template <TopoType Topo>
+  friend class PUML;
 };
 
-class Face : public BoundaryElement<int[2]>
-{
-	template<TopoType Topo>
-	friend class PUML;
+class Face : public BoundaryElement<int[2]> {
+  template <TopoType Topo>
+  friend class PUML;
 };
 
-template<TopoType Topo>
-class Cell : public Element<std::array<int, 0>>
-{
-	friend class PUML<Topo>;
-	friend class Downward;
+template <TopoType Topo>
+class Cell : public Element<std::array<int, 0>> {
+  friend class PUML<Topo>;
+  friend class Downward;
 
-private:
-	unsigned int m_vertices[internal::Topology<Topo>::cellvertices()];
+  private:
+  unsigned int m_vertices[internal::Topology<Topo>::cellvertices()]{};
 };
 
-}
+} // namespace PUML
 
-#endif // PUML_ELEMENT_H
\ No newline at end of file
+#endif // PUML_ELEMENT_H
diff --git a/FaceIterator.h b/FaceIterator.h
index 0628f12..5c45b4e 100644
--- a/FaceIterator.h
+++ b/FaceIterator.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,471 +9,579 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_FACE_ITERATOR_H
 #define PUML_FACE_ITERATOR_H
 
+#include "Topology.h"
+#include "PUML.h"
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
 
+#include "TypeInference.h"
+#include "Upward.h"
 #include <algorithm>
-#include <vector>
-#include <unordered_map>
 #include <cassert>
-#include <cstddef>
 #include <functional>
 #include <type_traits>
 #include <utility>
-#include "Upward.h"
-#include "TypeInference.h"
+#include <vector>
 
-#include "utils/logger.h"
+namespace PUML {
 
-namespace PUML
-{
+template <TopoType Topo>
+class FaceIterator {
+  private:
+  class Face;
 
-template<TopoType Topo>
-class FaceIterator
-{
-private:
-    class Face;
-public:
-    FaceIterator(const PUML<Topo>& puml, bool sparseComm = true) : m_sparseComm(sparseComm), m_puml(puml)
-    {
-        int rank = 0, commSize = 1;
+  public:
+  FaceIterator(const PUML<Topo>& puml, bool sparseComm = true)
+      : m_sparseComm(sparseComm), m_puml(puml) {
+    int rank = 0;
+    int commSize = 1;
 
 #ifdef USE_MPI
-        MPI_Comm_rank(m_puml.comm(), &rank);
-        MPI_Comm_size(m_puml.comm(), &commSize);
-
-        const auto& faces = puml.faces();
-
-        std::vector<std::vector<int>> transfer(commSize);
-        for (int i = 0; i < faces.size(); ++i) {
-            int lid[2];
-            Upward::cells(puml, faces[i], lid);
-            assert(!(lid[0] == -1 && lid[1] != -1));
-            assert(faces[i].shared().size() <= 1);
-            if (lid[0] != -1 && faces[i].isShared()) {
-                transfer[faces[i].shared()[0]].push_back(i);
-            }
-        }
-
-        m_transferSize = std::vector<int>(commSize);
-        m_transferDisp = std::vector<int>(commSize+1);
-        m_transferDisp[0] = 0;
-        for (int i = 0; i < commSize; ++i) {
-            m_transferSize[i] = transfer[i].size();
-            m_transferDisp[i+1] = m_transferDisp[i]+m_transferSize[i];
-            std::sort(transfer[i].begin(), transfer[i].end(),
-                [&faces] (int a, int b) -> bool {
-                    return faces[a].gid() < faces[b].gid();
-                });
-        }
-
-        m_transferCell = std::vector<int>(m_transferDisp[commSize]);
-        m_transferFace = std::vector<int>(m_transferDisp[commSize]);
-        for (int i = 0, j = 0; i < commSize; ++i) {
-            for (int k = 0; k < m_transferSize[i]; ++j, ++k) {
-                const auto& face = faces[transfer[i][k]];
-                int lid[2];
-                Upward::cells(puml, face, lid);
-                m_transferCell[j] = lid[0];
-                m_transferFace[j] = transfer[i][k];
-            }
-        }
-#endif
-    }
-    
-    // FaceHandlerFunc: void(int,int,const T&,const T&)
-    template<typename T, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>, bool> = true>
-    void forEach(const std::vector<T>& cellData,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto cellHandler = [&cellData](int fid, int cid){return cellData[cid];};
-        forEach<T, T>(std::move(cellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(const std::vector<T>& cellData,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto cellHandler = [&cellData](int fid, int cid){return cellData[cid];};
-        forEach<T, T>(std::move(cellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&,const S&)
-    template<typename T, typename S, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> = true>
-    void forEach(const std::vector<T>& externalCellData,
-                    const std::vector<S>& internalCellData,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [&externalCellData](int fid, int cid){return externalCellData[cid];};
-        auto internalCellHandler = [&internalCellData](int fid, int cid){return internalCellData[cid];};
-        forEach<T, S>(std::move(externalCellHandler),
-            std::move(internalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&,const S&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename S, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(const std::vector<T>& externalCellData,
-                    const std::vector<S>& internalCellData,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [&externalCellData](int fid, int cid){return externalCellData[cid];};
-        auto internalCellHandler = [&internalCellData](int fid, int cid){return internalCellData[cid];};
-        forEach<T, S>(std::move(externalCellHandler),
-            std::move(internalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&)
-    template<typename T, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true>
-    void forEach(const std::vector<T>& externalCellData,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [&externalCellData](int fid, int cid){return externalCellData[cid];};
-        internalforEach<T>(std::move(externalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(const std::vector<T>& externalCellData,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [&externalCellData](int fid, int cid){return externalCellData[cid];};
-        internalforEach<T>(std::move(externalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&,const T&)
-    template<typename T, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>, bool> = true>
-    void forEach(const T* cellData,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto cellHandler = [cellData](int fid, int cid){return cellData[cid];};
-        forEach<T, T>(std::move(cellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(const T* cellData,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto cellHandler = [cellData](int fid, int cid){return cellData[cid];};
-        forEach<T, T>(std::move(cellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
+    MPI_Comm_rank(m_puml.comm(), &rank);
+    MPI_Comm_size(m_puml.comm(), &commSize);
+
+    const auto& faces = puml.faces();
+
+    std::vector<std::vector<int>> transfer(commSize);
+    for (int i = 0; i < faces.size(); ++i) {
+      int lid[2];
+      Upward::cells(puml, faces[i], lid);
+      assert(!(lid[0] == -1 && lid[1] != -1));
+      assert(faces[i].shared().size() <= 1);
+      if (lid[0] != -1 && faces[i].isShared()) {
+        transfer[faces[i].shared()[0]].push_back(i);
+      }
     }
 
-    // FaceHandlerFunc: void(int,int,const T&,const S&)
-    template<typename T, typename S, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> = true>
-    void forEach(const T* externalCellData,
-                    const S* internalCellData,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [externalCellData](int fid, int cid){return externalCellData[cid];};
-        auto internalCellHandler = [internalCellData](int fid, int cid){return internalCellData[cid];};
-        forEach<T, S>(std::move(externalCellHandler),
-            std::move(internalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
+    m_transferSize = std::vector<int>(commSize);
+    m_transferDisp = std::vector<int>(commSize + 1);
+    m_transferDisp[0] = 0;
+    for (int i = 0; i < commSize; ++i) {
+      m_transferSize[i] = transfer[i].size();
+      m_transferDisp[i + 1] = m_transferDisp[i] + m_transferSize[i];
+      std::sort(transfer[i].begin(), transfer[i].end(), [&faces](int a, int b) -> bool {
+        return faces[a].gid() < faces[b].gid();
+      });
     }
 
-    // FaceHandlerFunc: void(int,int,const T&,const S&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename S, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(const T* externalCellData,
-                    const S* internalCellData,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [externalCellData](int fid, int cid){return externalCellData[cid];};
-        auto internalCellHandler = [internalCellData](int fid, int cid){return internalCellData[cid];};
-        forEach<T, S>(std::move(externalCellHandler),
-            std::move(internalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
+    m_transferCell = std::vector<int>(m_transferDisp[commSize]);
+    m_transferFace = std::vector<int>(m_transferDisp[commSize]);
+    for (int i = 0, j = 0; i < commSize; ++i) {
+      for (int k = 0; k < m_transferSize[i]; ++j, ++k) {
+        const auto& face = faces[transfer[i][k]];
+        int lid[2];
+        Upward::cells(puml, face, lid);
+        m_transferCell[j] = lid[0];
+        m_transferFace[j] = transfer[i][k];
+      }
     }
-
-    // FaceHandlerFunc: void(int,int,const T&)
-    template<typename T, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true>
-    void forEach(const T* externalCellData,
-                        FaceHandlerFunc&& faceHandler,
-                        MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [externalCellData](int fid, int cid){return externalCellData[cid];};
-        internalforEach<T>(std::move(externalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // FaceHandlerFunc: void(int,int,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(const T* externalCellData,
-                        FaceHandlerFunc&& faceHandler,
-                        BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                        MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto externalCellHandler = [externalCellData](int fid, int cid){return externalCellData[cid];};
-        internalforEach<T>(std::move(externalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    // CellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&,const T&)
-    template<typename T, typename CellHandlerFunc, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, CellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>, bool> = true>
-    void forEach(CellHandlerFunc&& cellHandler,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        // no direct move/forward possible here for cellHandler
-        auto externalCellHandler = [&cellHandler](int fid, int cid){return std::invoke(cellHandler, fid, cid);};
-        auto internalCellHandler = [&cellHandler](int fid, int cid){return std::invoke(cellHandler, fid, cid);};
-        forEach<T, T>(std::move(externalCellHandler),
-            std::move(internalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // CellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename CellHandlerFunc, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, CellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(CellHandlerFunc&& cellHandler,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        // no direct move/forward possible here for cellHandler
-        auto externalCellHandler = [&cellHandler](int fid, int cid){return std::invoke(cellHandler, fid, cid);};
-        auto internalCellHandler = [&cellHandler](int fid, int cid){return std::invoke(cellHandler, fid, cid);};
-        forEach<T, T>(std::move(externalCellHandler),
-            std::move(internalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    // ExternalCellHandlerFunc: T(int,int)
-    // InternalCellHandlerFunc: S(int,int)
-    // FaceHandlerFunc: void(int,int,const T&,const S&)
-    template<typename T, typename S, typename ExternalCellHandlerFunc, typename InternalCellHandlerFunc, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_r_v<S, InternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> = true>
-    void forEach(ExternalCellHandlerFunc&& externalCellHandler,
-                    InternalCellHandlerFunc&& internalCellHandler,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto realFaceHandler = [faceHandler = std::forward<FaceHandlerFunc>(faceHandler), internalCellHandler = std::forward<InternalCellHandlerFunc>(internalCellHandler)]
-            (int fid, int cid, const T& tv) {
-                std::invoke(faceHandler, fid, cid, tv, std::invoke(internalCellHandler, fid, cid));
-            };
-        forEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
-            std::move(realFaceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // ExternalCellHandlerFunc: T(int,int)
-    // InternalCellHandlerFunc: S(int,int)
-    // FaceHandlerFunc: void(int,int,const T&,const S&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename S, typename ExternalCellHandlerFunc, typename InternalCellHandlerFunc, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_r_v<S, InternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(ExternalCellHandlerFunc&& externalCellHandler,
-                    InternalCellHandlerFunc&& internalCellHandler,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto realFaceHandler = [faceHandler = std::forward<FaceHandlerFunc>(faceHandler), internalCellHandler = std::forward<InternalCellHandlerFunc>(internalCellHandler)]
-            (int fid, int cid, const T& tv) {
-                std::invoke(faceHandler, fid, cid, tv, std::invoke(internalCellHandler, fid, cid));
-            };
-        forEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
-            std::move(realFaceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    // ExternalCellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&)
-    template<typename T, typename ExternalCellHandlerFunc, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true>
-    void forEach(ExternalCellHandlerFunc&& externalCellHandler,
-                    FaceHandlerFunc&& faceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        internalforEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::move([](int a, int b){}),
-            mpit);
-    }
-
-    // ExternalCellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename ExternalCellHandlerFunc, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void forEach(ExternalCellHandlerFunc&& externalCellHandler,
-                    FaceHandlerFunc&& faceHandler,
-                    BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                    MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        internalforEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
-            std::forward<FaceHandlerFunc>(faceHandler),
-            std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
-            mpit);
-    }
-
-    const PUML<Topo>& puml() const {
-        return m_puml;
+#endif
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const T&)
+  template <typename T,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>,
+                             bool> = true>
+  void forEach(const std::vector<T>& cellData,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto cellHandler = [&cellData](int fid, int cid) { return cellData[cid]; };
+    forEach<T, T>(std::move(cellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::move([](int a, int b) {}),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>,
+                             bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(const std::vector<T>& cellData,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto cellHandler = [&cellData](int fid, int cid) { return cellData[cid]; };
+    forEach<T, T>(std::move(cellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const S&)
+  template <typename T,
+            typename S,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>,
+                             bool> = true>
+  void forEach(const std::vector<T>& externalCellData,
+               const std::vector<S>& internalCellData,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [&externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    auto internalCellHandler = [&internalCellData](int fid, int cid) {
+      return internalCellData[cid];
+    };
+    forEach<T, S>(std::move(externalCellHandler),
+                  std::move(internalCellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::move([](int a, int b) {}),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const S&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename S,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>,
+                             bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(const std::vector<T>& externalCellData,
+               const std::vector<S>& internalCellData,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [&externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    auto internalCellHandler = [&internalCellData](int fid, int cid) {
+      return internalCellData[cid];
+    };
+    forEach<T, S>(std::move(externalCellHandler),
+                  std::move(internalCellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&)
+  template <typename T,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true>
+  void forEach(const std::vector<T>& externalCellData,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [&externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    internalforEach<T>(std::move(externalCellHandler),
+                       std::forward<FaceHandlerFunc>(faceHandler),
+                       std::move([](int a, int b) {}),
+                       mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(const std::vector<T>& externalCellData,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [&externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    internalforEach<T>(std::move(externalCellHandler),
+                       std::forward<FaceHandlerFunc>(faceHandler),
+                       std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                       mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const T&)
+  template <typename T,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>,
+                             bool> = true>
+  void forEach(const T* cellData,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto cellHandler = [cellData](int fid, int cid) { return cellData[cid]; };
+    forEach<T, T>(std::move(cellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::move([](int a, int b) {}),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>,
+                             bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(const T* cellData,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto cellHandler = [cellData](int fid, int cid) { return cellData[cid]; };
+    forEach<T, T>(std::move(cellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const S&)
+  template <typename T,
+            typename S,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>,
+                             bool> = true>
+  void forEach(const T* externalCellData,
+               const S* internalCellData,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    auto internalCellHandler = [internalCellData](int fid, int cid) {
+      return internalCellData[cid];
+    };
+    forEach<T, S>(std::move(externalCellHandler),
+                  std::move(internalCellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::move([](int a, int b) {}),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const S&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename S,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>,
+                             bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(const T* externalCellData,
+               const S* internalCellData,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    auto internalCellHandler = [internalCellData](int fid, int cid) {
+      return internalCellData[cid];
+    };
+    forEach<T, S>(std::move(externalCellHandler),
+                  std::move(internalCellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                  mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&)
+  template <typename T,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true>
+  void forEach(const T* externalCellData,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    internalforEach<T>(std::move(externalCellHandler),
+                       std::forward<FaceHandlerFunc>(faceHandler),
+                       std::move([](int a, int b) {}),
+                       mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(const T* externalCellData,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto externalCellHandler = [externalCellData](int fid, int cid) {
+      return externalCellData[cid];
+    };
+    internalforEach<T>(std::move(externalCellHandler),
+                       std::forward<FaceHandlerFunc>(faceHandler),
+                       std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                       mpit);
+  }
+
+  // CellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&,const T&)
+  template <typename T,
+            typename CellHandlerFunc,
+            typename FaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_r_v<T, CellHandlerFunc, int, int>, bool> = true,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>,
+                             bool> = true>
+  void forEach(CellHandlerFunc&& cellHandler,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    // no direct move/forward possible here for cellHandler
+    auto externalCellHandler = [&cellHandler](int fid, int cid) {
+      return std::invoke(cellHandler, fid, cid);
+    };
+    auto internalCellHandler = [&cellHandler](int fid, int cid) {
+      return std::invoke(cellHandler, fid, cid);
+    };
+    forEach<T, T>(std::move(externalCellHandler),
+                  std::move(internalCellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::move([](int a, int b) {}),
+                  mpit);
+  }
+
+  // CellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <typename T,
+            typename CellHandlerFunc,
+            typename FaceHandlerFunc,
+            typename BoundaryFaceHandlerFunc,
+            std::enable_if_t<std::is_invocable_r_v<T, CellHandlerFunc, int, int>, bool> = true,
+            std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&>,
+                             bool> = true,
+            std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(CellHandlerFunc&& cellHandler,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    // no direct move/forward possible here for cellHandler
+    auto externalCellHandler = [&cellHandler](int fid, int cid) {
+      return std::invoke(cellHandler, fid, cid);
+    };
+    auto internalCellHandler = [&cellHandler](int fid, int cid) {
+      return std::invoke(cellHandler, fid, cid);
+    };
+    forEach<T, T>(std::move(externalCellHandler),
+                  std::move(internalCellHandler),
+                  std::forward<FaceHandlerFunc>(faceHandler),
+                  std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                  mpit);
+  }
+
+  // ExternalCellHandlerFunc: T(int,int)
+  // InternalCellHandlerFunc: S(int,int)
+  // FaceHandlerFunc: void(int,int,const T&,const S&)
+  template <
+      typename T,
+      typename S,
+      typename ExternalCellHandlerFunc,
+      typename InternalCellHandlerFunc,
+      typename FaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_r_v<S, InternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> =
+          true>
+  void forEach(ExternalCellHandlerFunc&& externalCellHandler,
+               InternalCellHandlerFunc&& internalCellHandler,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto realFaceHandler = [faceHandler = std::forward<FaceHandlerFunc>(faceHandler),
+                            internalCellHandler = std::forward<InternalCellHandlerFunc>(
+                                internalCellHandler)](int fid, int cid, const T& tv) {
+      std::invoke(faceHandler, fid, cid, tv, std::invoke(internalCellHandler, fid, cid));
+    };
+    forEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
+               std::move(realFaceHandler),
+               std::move([](int a, int b) {}),
+               mpit);
+  }
+
+  // ExternalCellHandlerFunc: T(int,int)
+  // InternalCellHandlerFunc: S(int,int)
+  // FaceHandlerFunc: void(int,int,const T&,const S&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <
+      typename T,
+      typename S,
+      typename ExternalCellHandlerFunc,
+      typename InternalCellHandlerFunc,
+      typename FaceHandlerFunc,
+      typename BoundaryFaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_r_v<S, InternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const S&>, bool> =
+          true,
+      std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(ExternalCellHandlerFunc&& externalCellHandler,
+               InternalCellHandlerFunc&& internalCellHandler,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto realFaceHandler = [faceHandler = std::forward<FaceHandlerFunc>(faceHandler),
+                            internalCellHandler = std::forward<InternalCellHandlerFunc>(
+                                internalCellHandler)](int fid, int cid, const T& tv) {
+      std::invoke(faceHandler, fid, cid, tv, std::invoke(internalCellHandler, fid, cid));
+    };
+    forEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
+               std::move(realFaceHandler),
+               std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+               mpit);
+  }
+
+  // ExternalCellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&)
+  template <
+      typename T,
+      typename ExternalCellHandlerFunc,
+      typename FaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true>
+  void forEach(ExternalCellHandlerFunc&& externalCellHandler,
+               FaceHandlerFunc&& faceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    internalforEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
+                       std::forward<FaceHandlerFunc>(faceHandler),
+                       std::move([](int a, int b) {}),
+                       mpit);
+  }
+
+  // ExternalCellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <
+      typename T,
+      typename ExternalCellHandlerFunc,
+      typename FaceHandlerFunc,
+      typename BoundaryFaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void forEach(ExternalCellHandlerFunc&& externalCellHandler,
+               FaceHandlerFunc&& faceHandler,
+               BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+               MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    internalforEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
+                       std::forward<FaceHandlerFunc>(faceHandler),
+                       std::forward<BoundaryFaceHandlerFunc>(boundaryFaceHandler),
+                       mpit);
+  }
+
+  auto puml() const -> const PUML<Topo>& { return m_puml; }
+
+  private:
+  // ExternalCellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&)
+  // BoundaryFaceHandlerFunc: void(int,int)
+  template <
+      typename T,
+      typename ExternalCellHandlerFunc,
+      typename FaceHandlerFunc,
+      typename BoundaryFaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
+  void internalforEach(ExternalCellHandlerFunc&& externalCellHandler,
+                       FaceHandlerFunc&& faceHandler,
+                       BoundaryFaceHandlerFunc&& boundaryFaceHandler,
+                       MPI_Datatype mpit) {
+    int rank = 0;
+    int commSize = 1;
+
+    for (int i = 0; i < m_puml.faces().size(); ++i) {
+      const auto& face = m_puml.faces()[i];
+      int lid[2];
+      Upward::cells(m_puml, face, lid);
+      assert(!(lid[0] == -1 && lid[1] != -1));
+
+      if (lid[1] != -1) {
+        const auto gd1 = std::invoke(externalCellHandler, i, lid[1]);
+        std::invoke(faceHandler, i, lid[0], gd1);
+
+        const auto gd0 = std::invoke(externalCellHandler, i, lid[0]);
+        std::invoke(faceHandler, i, lid[1], gd0);
+      } else if (lid[1] == -1 && !face.isShared()) {
+        std::invoke(boundaryFaceHandler, i, lid[0]);
+      }
     }
 
-private:
-    // ExternalCellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&)
-    // BoundaryFaceHandlerFunc: void(int,int)
-    template<typename T, typename ExternalCellHandlerFunc, typename FaceHandlerFunc, typename BoundaryFaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<BoundaryFaceHandlerFunc, int, int>, bool> = true>
-    void internalforEach(ExternalCellHandlerFunc&& externalCellHandler,
-                            FaceHandlerFunc&& faceHandler,
-                            BoundaryFaceHandlerFunc&& boundaryFaceHandler,
-                            MPI_Datatype mpit) {
-        int rank = 0, commSize = 1;
-
-        for (int i = 0; i < m_puml.faces().size(); ++i) {
-            const auto& face = m_puml.faces()[i];
-            int lid[2];
-            Upward::cells(m_puml, face, lid);
-            assert(!(lid[0] == -1 && lid[1] != -1));
-
-            if (lid[1] != -1) {
-                const auto gd1 = std::invoke(externalCellHandler, i, lid[1]);
-                std::invoke(faceHandler, i, lid[0], gd1);
-
-                const auto gd0 = std::invoke(externalCellHandler, i, lid[0]);
-                std::invoke(faceHandler, i, lid[1], gd0);
-            }
-            else if (lid[1] == -1 && !face.isShared()) {
-                std::invoke(boundaryFaceHandler, i, lid[0]);
-            }
-        }
-
 #ifdef USE_MPI
-        MPI_Comm_rank(m_puml.comm(), &rank);
-        MPI_Comm_size(m_puml.comm(), &commSize);
+    MPI_Comm_rank(m_puml.comm(), &rank);
+    MPI_Comm_size(m_puml.comm(), &commSize);
 
-        std::vector<T> transferSend(m_transferDisp[commSize]);
-        std::vector<T> transferReceive(m_transferDisp[commSize]);
-        for (int i = 0; i < transferSend.size(); ++i) {
-            transferSend[i] = std::invoke(externalCellHandler, m_transferFace[i], m_transferCell[i]);
-        }
+    std::vector<T> transferSend(m_transferDisp[commSize]);
+    std::vector<T> transferReceive(m_transferDisp[commSize]);
+    for (int i = 0; i < transferSend.size(); ++i) {
+      transferSend[i] = std::invoke(externalCellHandler, m_transferFace[i], m_transferCell[i]);
+    }
 
-        if (m_sparseComm) {
-            int transfer_ranks = 0;
-            for (int i = 0, j = 0; i < commSize; ++i) {
-                if (m_transferDisp[i+1] > m_transferDisp[i]) {
-                    ++transfer_ranks;
-                }
-            }
-            std::vector<MPI_Request> requests(transfer_ranks*2);
-            for (int i = 0, j = 0; i < commSize; ++i) {
-                if (m_transferDisp[i+1] > m_transferDisp[i]) {
-                    MPI_Isend(static_cast<T*>(transferSend.data()) + m_transferDisp[i], m_transferSize[i], mpit, i, 0, m_puml.comm(), &requests[2*j]);
-                    MPI_Irecv(static_cast<T*>(transferReceive.data()) + m_transferDisp[i], m_transferSize[i], mpit, i, 0, m_puml.comm(), &requests[2*j+1]);
-                    ++j;
-                }
-            }
-            MPI_Waitall(requests.size(), requests.data(), MPI_STATUS_IGNORE);
+    if (m_sparseComm) {
+      int transferRanks = 0;
+      for (int i = 0, j = 0; i < commSize; ++i) {
+        if (m_transferDisp[i + 1] > m_transferDisp[i]) {
+          ++transferRanks;
         }
-        else {
-            MPI_Alltoallv(transferSend.data(), m_transferSize.data(), m_transferDisp.data(), mpit, transferReceive.data(), m_transferSize.data(), m_transferDisp.data(), mpit, m_puml.comm());
+      }
+      std::vector<MPI_Request> requests(transferRanks * 2);
+      for (int i = 0, j = 0; i < commSize; ++i) {
+        if (m_transferDisp[i + 1] > m_transferDisp[i]) {
+          MPI_Isend(static_cast<T*>(transferSend.data()) + m_transferDisp[i],
+                    m_transferSize[i],
+                    mpit,
+                    i,
+                    0,
+                    m_puml.comm(),
+                    &requests[2 * j]);
+          MPI_Irecv(static_cast<T*>(transferReceive.data()) + m_transferDisp[i],
+                    m_transferSize[i],
+                    mpit,
+                    i,
+                    0,
+                    m_puml.comm(),
+                    &requests[(2 * j) + 1]);
+          ++j;
         }
+      }
+      MPI_Waitall(requests.size(), requests.data(), MPI_STATUS_IGNORE);
+    } else {
+      MPI_Alltoallv(transferSend.data(),
+                    m_transferSize.data(),
+                    m_transferDisp.data(),
+                    mpit,
+                    transferReceive.data(),
+                    m_transferSize.data(),
+                    m_transferDisp.data(),
+                    mpit,
+                    m_puml.comm());
+    }
 
-        for (int i = 0; i < m_transferFace.size(); ++i) {
-            const auto& gd1 = transferReceive[i];
-            std::invoke(faceHandler, m_transferFace[i], m_transferCell[i], gd1);
-        }
-#endif
+    for (int i = 0; i < m_transferFace.size(); ++i) {
+      const auto& gd1 = transferReceive[i];
+      std::invoke(faceHandler, m_transferFace[i], m_transferCell[i], gd1);
     }
+#endif
+  }
 
-    const PUML<Topo>& m_puml;
+  const PUML<Topo>& m_puml;
 #ifdef USE_MPI
-    std::vector<int> m_transferSize;
-    std::vector<int> m_transferDisp;
-    std::vector<int> m_transferCell;
-    std::vector<int> m_transferFace;
-    bool m_sparseComm;
+  std::vector<int> m_transferSize;
+  std::vector<int> m_transferDisp;
+  std::vector<int> m_transferCell;
+  std::vector<int> m_transferFace;
+  bool m_sparseComm;
 #endif
 };
 
-}
+} // namespace PUML
 
 #endif
diff --git a/LICENSES/BSD-2-Clause.txt b/LICENSES/BSD-2-Clause.txt
new file mode 100644
index 0000000..5f662b3
--- /dev/null
+++ b/LICENSES/BSD-2-Clause.txt
@@ -0,0 +1,9 @@
+Copyright (c) <year> <owner> 
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/LICENSES/BSD-3-Clause.txt b/LICENSES/BSD-3-Clause.txt
new file mode 100644
index 0000000..ea890af
--- /dev/null
+++ b/LICENSES/BSD-3-Clause.txt
@@ -0,0 +1,11 @@
+Copyright (c) <year> <owner>. 
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/LICENSES/CECILL-C.txt b/LICENSES/CECILL-C.txt
new file mode 100644
index 0000000..2bb09b4
--- /dev/null
+++ b/LICENSES/CECILL-C.txt
@@ -0,0 +1,517 @@
+
+CeCILL-C FREE SOFTWARE LICENSE AGREEMENT
+
+
+    Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+    * firstly, compliance with the principles governing the distribution
+      of Free Software: access to source code, broad rights granted to
+      users,
+    * secondly, the election of a governing law, French law, with which
+      it is conformant, both as regards the law of torts and
+      intellectual property law, and the protection that it offers to
+      both authors and holders of the economic rights over software.
+
+The authors of the CeCILL-C (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are:
+
+Commissariat à l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+    Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and re-use the software governed by this license.
+
+The exercising of this right is conditional upon the obligation to make
+available to the community the modifications made to the source code of
+the software so as to contribute to its evolution.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+    Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Integrated Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Integrated
+Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Integrated Contribution: means any or all modifications, corrections,
+translations, adaptations and/or new functions integrated into the
+Source Code by any or all Contributors.
+
+Related Module: means a set of sources files including their
+documentation that, without modification to the Source Code, enables
+supplementary functions or services in addition to those offered by the
+Software.
+
+Derivative Software: means any combination of the Software, modified or
+not, and of a Related Module.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+    Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+    Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+    * (i) loading the Software by any or all means, notably, by
+      downloading from a remote server, or by loading from a physical
+      medium;
+    * (ii) the first time the Licensee exercises any of the rights
+      granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+    Article 4 - EFFECTIVE DATE AND TERM
+
+
+      4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+      4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+    Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+      5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+   1. permanent or temporary reproduction of all or part of the Software
+      by any or all means and in any or all form.
+
+   2. loading, displaying, running, or storing the Software on any or
+      all medium.
+
+   3. entitlement to observe, study or test its operation so as to
+      determine the ideas and principles behind any or all constituent
+      elements of said Software. This shall apply when the Licensee
+      carries out any or all loading, displaying, running, transmission
+      or storage operation as regards the Software, that it is entitled
+      to carry out hereunder.
+
+
+      5.2 RIGHT OF MODIFICATION
+
+The right of modification includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software. It includes, in particular, the
+right to create a Derivative Software.
+
+The Licensee is authorized to make any or all modification to the
+Software provided that it includes an explicit notice that it is the
+author of said modification and indicates the date of the creation thereof.
+
+
+      5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+        5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+   1. a copy of the Agreement,
+
+   2. a notice relating to the limitation of both the Licensor's
+      warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows effective access to the full Source
+Code of the Software at a minimum during the entire period of its
+distribution of the Software, it being understood that the additional
+cost of acquiring the Source Code shall not exceed the cost of
+transferring the data.
+
+
+        5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes an Integrated Contribution to the Software, the
+terms and conditions for the distribution of the resulting Modified
+Software become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+   1. a copy of the Agreement,
+
+   2. a notice relating to the limitation of both the Licensor's
+      warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the object code of the Modified
+Software is redistributed, the Licensee allows effective access to the
+full source code of the Modified Software at a minimum during the entire
+period of its distribution of the Modified Software, it being understood
+that the additional cost of acquiring the source code shall not exceed
+the cost of transferring the data.
+
+
+        5.3.3 DISTRIBUTION OF DERIVATIVE SOFTWARE
+
+When the Licensee creates Derivative Software, this Derivative Software
+may be distributed under a license agreement other than this Agreement,
+subject to compliance with the requirement to include a notice
+concerning the rights over the Software as defined in Article 6.4.
+In the event the creation of the Derivative Software required modification
+of the Source Code, the Licensee undertakes that:
+
+   1. the resulting Modified Software will be governed by this Agreement,
+   2. the Integrated Contributions in the resulting Modified Software
+      will be clearly identified and documented,
+   3. the Licensee will allow effective access to the source code of the
+      Modified Software, at a minimum during the entire period of
+      distribution of the Derivative Software, such that such
+      modifications may be carried over in a subsequent version of the
+      Software; it being understood that the additional cost of
+      purchasing the source code of the Modified Software shall not
+      exceed the cost of transferring the data.
+
+
+        5.3.4 COMPATIBILITY WITH THE CeCILL LICENSE
+
+When a Modified Software contains an Integrated Contribution subject to
+the CeCILL license agreement, or when a Derivative Software contains a
+Related Module subject to the CeCILL license agreement, the provisions
+set forth in the third item of Article 6.4 are optional.
+
+
+    Article 6 - INTELLECTUAL PROPERTY
+
+
+      6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+      6.2 OVER THE INTEGRATED CONTRIBUTIONS
+
+The Licensee who develops an Integrated Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+      6.3 OVER THE RELATED MODULES
+
+The Licensee who develops a Related Module is the owner of the
+intellectual property rights over this Related Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution under the conditions defined in Article 5.3.3.
+
+
+      6.4 NOTICE OF RIGHTS
+
+The Licensee expressly undertakes:
+
+   1. not to remove, or modify, in any manner, the intellectual property
+      notices attached to the Software;
+
+   2. to reproduce said notices, in an identical manner, in the copies
+      of the Software modified or not;
+
+   3. to ensure that use of the Software, its intellectual property
+      notices and the fact that it is governed by the Agreement is
+      indicated in a text that is easily accessible, specifically from
+      the interface of any Derivative Software.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-à-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+    Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+    Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+    Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+    Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+    Article 11 - MISCELLANEOUS
+
+
+      11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+      11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+    Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version.
+
+
+    Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 1.0 dated 2006-09-05.
diff --git a/Neighbor.h b/Neighbor.h
index fa7168c..ebf85bd 100644
--- a/Neighbor.h
+++ b/Neighbor.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -20,38 +22,36 @@
 #include "Topology.h"
 #include "Upward.h"
 
-namespace PUML
-{
-
-class Neighbor
-{
-public:
-	/**
-	 * Returns all local neighor ids for a cell
-	 *
-	 * @param puml The PUML mesh
-	 * @param clid The local id of the cell for which the neighbors should be returned
-	 * @param flid The local ids of the neighbors or -1 if there is no local neighbor
-	 */
-	template<TopoType Topo>
-	static void face(const PUML<Topo> &puml, unsigned int clid, int* flid)
-	{
-		unsigned int faces[internal::Topology<Topo>::cellfaces()];
-		assert(clid < puml.cells().size());
-		Downward::faces(puml, puml.cells()[clid], faces);
-
-		for (unsigned int i = 0; i < internal::Topology<Topo>::cellfaces(); i++) {
-			int neighbors[2];
-			Upward::cells(puml, puml.faces()[faces[i]], neighbors);
-
-			if (static_cast<unsigned int>(neighbors[0]) == clid)
-				flid[i] = neighbors[1];
-			else
-				flid[i] = neighbors[0];
-		}
-	}
+namespace PUML {
+
+class Neighbor {
+  public:
+  /**
+   * Returns all local neighor ids for a cell
+   *
+   * @param puml The PUML mesh
+   * @param clid The local id of the cell for which the neighbors should be returned
+   * @param flid The local ids of the neighbors or -1 if there is no local neighbor
+   */
+  template <TopoType Topo>
+  static void face(const PUML<Topo>& puml, unsigned int clid, int* flid) {
+    unsigned int faces[internal::Topology<Topo>::cellfaces()];
+    assert(clid < puml.cells().size());
+    Downward::faces(puml, puml.cells()[clid], faces);
+
+    for (unsigned int i = 0; i < internal::Topology<Topo>::cellfaces(); i++) {
+      int neighbors[2];
+      Upward::cells(puml, puml.faces()[faces[i]], neighbors);
+
+      if (static_cast<unsigned int>(neighbors[0]) == clid) {
+        flid[i] = neighbors[1];
+      } else {
+        flid[i] = neighbors[0];
+      }
+    }
+  }
 };
 
-}
+} // namespace PUML
 
 #endif // PUML_NEIGHBOR_H
diff --git a/Numbering.h b/Numbering.h
index 25cb469..158da50 100644
--- a/Numbering.h
+++ b/Numbering.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017-2024 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,8 +9,8 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
+ * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_NUMBERING_H
@@ -15,41 +18,46 @@
 
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML::internal {
 
-namespace internal
-{
+template <TopoType Topo>
+class Numbering {
+  public:
+  typedef unsigned int face_t[Topology<Topo>::facevertices()];
+  typedef unsigned int edge_t[2];
+  typedef unsigned int faceadj_t[2];
 
-template<TopoType Topo>
-class Numbering
-{
-public:
-	typedef unsigned int face_t[Topology<Topo>::facevertices()];
-
-	static const face_t* facevertices();
+  static auto facevertices() -> const face_t*;
+  static auto edgevertices() -> const edge_t*;
+  static auto edgefaces() -> const faceadj_t*;
 };
 
-template<>
-class Numbering<TETRAHEDRON>
-{
-public:
-	typedef unsigned int face_t[Topology<TETRAHEDRON>::facevertices()];
-
-	static const face_t* facevertices()
-	{
-		static const face_t vertices[4] = {
-			{ 1, 0, 2 },
-			{ 0, 1, 3 },
-			{ 1, 2, 3 },
-			{ 2, 0, 3 } };
-
-		return vertices;
-	}
+template <>
+class Numbering<TETRAHEDRON> {
+  public:
+  typedef unsigned int face_t[Topology<TETRAHEDRON>::facevertices()];
+  typedef unsigned int edge_t[2];
+  typedef unsigned int faceadj_t[2];
+
+  static auto facevertices() -> const face_t* {
+    static const face_t Vertices[4] = {{1, 0, 2}, {0, 1, 3}, {1, 2, 3}, {2, 0, 3}};
+
+    return Vertices;
+  }
+
+  static auto edgevertices() -> const edge_t* {
+    static const edge_t Vertices[6] = {{0, 1}, {1, 2}, {2, 0}, {0, 3}, {1, 3}, {2, 3}};
+    return Vertices;
+  }
+
+  static auto edgefaces() -> const faceadj_t* {
+    static const faceadj_t Faces[6] = {{0, 1}, {0, 2}, {0, 3}, {1, 3}, {1, 2}, {2, 3}};
+    return Faces;
+  }
 };
 
-}
+// TODO(someone): adapt for HEXAHEDRON. Maybe edgefaces() can be removed or inferred
 
-}
+} // namespace PUML::internal
 
-#endif // PUML_NUMBERING_H
\ No newline at end of file
+#endif // PUML_NUMBERING_H
diff --git a/PUML.h b/PUML.h
index 565d539..5f05d03 100644
--- a/PUML.h
+++ b/PUML.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017-2024 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -15,38 +17,37 @@
 
 #include "TypeInference.h"
 #include <cstddef>
+#include <cstring>
+#include <hdf5.h>
+#include <iterator>
+#include <string>
 #include <type_traits>
+#include <utility>
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
 
 #include <algorithm>
 #include <cassert>
+#include <cstdlib>
 #include <limits>
 #include <set>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
-#include <cstdlib>
-
-#include <hdf5.h>
 
 #include "utils/logger.h"
 #include "utils/stringutils.h"
 
 #include "DownElement.h"
 #include "Element.h"
+#include "Numbering.h"
 #include "Topology.h"
 #include "VertexElementMap.h"
 
-namespace PUML
-{
+namespace PUML {
 
-enum DataType
-{
-	CELL = 0,
-	VERTEX = 1
-};
+enum DataType { CELL = 0, VERTEX = 1 };
 
 /**
  * Distributes a number of mesh entities (i.e. elements or vertices) to  a given number of ranks
@@ -54,19 +55,18 @@ enum DataType
  * The remaining ranks will read E/R entities.
  */
 class Distributor {
-  public: 
-  Distributor() = default;
-  Distributor(unsigned long newNumEntities, unsigned long newNumRanks) : numEntities(newNumEntities),
-    numRanks(newNumRanks),
-    entitiesPerRank(numEntities / numRanks),
-    missingEntities(numEntities % numRanks) {
-    assert(numEntities > numRanks); 
+  public:
+  Distributor() = delete;
+  Distributor(unsigned long newNumEntities, unsigned long newNumRanks)
+      : numEntities(newNumEntities), numRanks(newNumRanks), entitiesPerRank(numEntities / numRanks),
+        missingEntities(numEntities % numRanks) {
+    assert(numEntities > numRanks);
   }
 
   /**
    * Gives the offset and size of data where the rank should read data.
    */
-  std::pair<unsigned long, unsigned long> offsetAndSize(unsigned long rank) {
+  auto offsetAndSize(unsigned long rank) -> std::pair<unsigned long, unsigned long> {
     assert(rank < numRanks);
     unsigned long offset = 0;
     unsigned long size = 0;
@@ -84,13 +84,14 @@ class Distributor {
   /**
    * Gives the rank, which has read the entity with the given globalId.
    */
-  unsigned long rankOfEntity(unsigned long globalId) {
+  auto rankOfEntity(unsigned long globalId) -> const unsigned long {
     assert(globalId < numEntities);
     unsigned long rank = 0;
     if (globalId < missingEntities * (entitiesPerRank + 1)) {
       rank = globalId / (entitiesPerRank + 1);
     } else {
-      rank = (globalId - missingEntities * (entitiesPerRank + 1)) / entitiesPerRank + missingEntities;
+      rank =
+          (globalId - missingEntities * (entitiesPerRank + 1)) / entitiesPerRank + missingEntities;
     }
     assert(rank < numRanks);
     return rank;
@@ -99,11 +100,11 @@ class Distributor {
   /**
    * Gives the local id of a mesh entity for the given globalId.
    */
-  unsigned long globalToLocalId(unsigned long rank, unsigned long globalId) {
+  auto globalToLocalId(unsigned long rank, unsigned long globalId) -> unsigned long {
     assert(globalId < numEntities);
-    auto[offset, size] = offsetAndSize(rank);
-    assert (globalId >= offset);
-    assert (globalId < offset + size);
+    auto [offset, size] = offsetAndSize(rank);
+    assert(globalId >= offset);
+    assert(globalId < offset + size);
     return globalId - offset;
   }
 
@@ -114,1378 +115,1365 @@ class Distributor {
   const unsigned long missingEntities;
 };
 
-#define checkH5Err(...) _checkH5Err(__VA_ARGS__, __FILE__, __LINE__, rank)
+#define checkH5Err(...) checkH5ErrImpl(__VA_ARGS__, __FILE__, __LINE__, rank)
 
 /**
  * @todo Handle non-MPI case correct
  */
-template<TopoType Topo>
-class PUML
-{
-public:
-	/** The cell type from the file */
-	typedef unsigned long ocell_t[internal::Topology<Topo>::cellvertices()];
+template <TopoType Topo>
+class PUML {
+  public:
+  /** The cell type from the file */
+  typedef unsigned long ocell_t[internal::Topology<Topo>::cellvertices()];
+
+  /** The vertex type from the file */
+  typedef double overtex_t[3];
 
-	/** The vertex type from the file */
-	typedef double overtex_t[3];
+  /** Internal cell type */
+  using cell_t = Cell<Topo>;
 
-	/** Internal cell type */
-	typedef Cell<Topo> cell_t;
+  /** Internal face type */
+  using face_t = Face;
 
-	/** Internal face type */
-	typedef Face face_t;
+  /** Internal edge type */
+  using edge_t = Edge;
 
-	/** Internal edge type */
-	typedef Edge edge_t;
+  /** Internal vertex type */
+  using vertex_t = Vertex;
 
-	/** Internal vertex type */
-	typedef Vertex vertex_t;
-private:
+  private:
 #ifdef USE_MPI
-	MPI_Comm m_comm;
+  MPI_Comm m_comm;
 #endif // USE_MPI
 
-	/** The original cells from the file */
-	ocell_t* m_originalCells;
+  /** The original cells from the file */
+  ocell_t* m_originalCells;
 
-	/** The original vertices from the file */
-	overtex_t* m_originalVertices;
+  /** The original vertices from the file */
+  overtex_t* m_originalVertices{0L};
 
-	/** The original number of cells/vertices on each node */
-	unsigned int m_originalSize[2];
+  /** The original number of cells/vertices on each node */
+  unsigned int m_originalSize[2]{};
 
-	/** The original number of total cells/vertices */
-	unsigned long m_originalTotalSize[2];
+  /** The original number of total cells/vertices */
+  unsigned long m_originalTotalSize[2]{};
 
-	typedef std::unordered_map<unsigned long, unsigned int> g2l_t;
+  using g2l_t = std::unordered_map<unsigned long, unsigned int>;
 
-	/** The list of all local cells */
-	std::vector<cell_t> m_cells;
+  /** The list of all local cells */
+  std::vector<cell_t> m_cells;
 
-	/** The list of all local faces */
-	std::vector<face_t> m_faces;
+  /** The list of all local faces */
+  std::vector<face_t> m_faces;
 
-	/** Mapping from global face ids to local face ids */
-	g2l_t m_facesg2l;
+  /** Mapping from global face ids to local face ids */
+  g2l_t m_facesg2l;
 
-	/** List of all local edges */
-	std::vector<edge_t> m_edges;
+  /** List of all local edges */
+  std::vector<edge_t> m_edges;
 
-	/** Mapping from global edge ids to local edge ids */
-	g2l_t m_edgesg2l;
+  /** Mapping from global edge ids to local edge ids */
+  g2l_t m_edgesg2l;
 
-	/** List of all local vertices */
-	std::vector<vertex_t> m_vertices;
+  /** List of all local vertices */
+  std::vector<vertex_t> m_vertices;
 
-	/** Mapping from global vertex ids to locl vertex ids */
-	g2l_t m_verticesg2l;
+  /** Mapping from global vertex ids to locl vertex ids */
+  g2l_t m_verticesg2l;
 
-	/** Maps from local vertex ids to to a local face ids */
-	internal::VertexElementMap<internal::Topology<Topo>::facevertices()> m_v2f;
+  /** Maps from local vertex ids to to a local face ids */
+  internal::VertexElementMap<internal::Topology<Topo>::facevertices()> m_v2f;
 
-	/** Maps from local vertex ids to local edge ids */
-	internal::VertexElementMap<2> m_v2e;
+  /** Maps from local vertex ids to local edge ids */
+  internal::VertexElementMap<2> m_v2e;
 
-	/** User cell data */
-	std::vector<void*> m_cellData;
+  /** User cell data */
+  std::vector<void*> m_cellData;
 
-	/** User vertex data */
-	std::vector<void*> m_vertexData;
+  /** User vertex data */
+  std::vector<void*> m_vertexData;
 
-	/** Original user vertex data */
-	std::vector<void*> m_originalVertexData;
+  /** Original user vertex data */
+  std::vector<void*> m_originalVertexData;
 
-	std::vector<std::size_t> m_cellDataSize;
+  std::vector<std::size_t> m_cellDataSize;
 
-	std::vector<std::size_t> m_vertexDataSize;
+  std::vector<std::size_t> m_vertexDataSize;
 
 #ifdef USE_MPI
 
-	std::vector<MPI_Datatype> m_cellDataType;
-	std::vector<bool> m_cellDataTypeDerived;
+  std::vector<MPI_Datatype> m_cellDataType;
+  std::vector<bool> m_cellDataTypeDerived;
 
-	std::vector<MPI_Datatype> m_vertexDataType;
-	std::vector<bool> m_vertexDataTypeDerived;
+  std::vector<MPI_Datatype> m_vertexDataType;
+  std::vector<bool> m_vertexDataTypeDerived;
 #endif
 
-	std::pair<MPI_Datatype, bool> createDatatypeArray(MPI_Datatype baseType, std::size_t elemSize) {
-		if (elemSize == 1) {
-			return {baseType, false};
-		}
-		MPI_Datatype newType;
-		MPI_Type_contiguous(elemSize, baseType, &newType);
-		MPI_Type_commit(&newType);
-		return {newType, true};
-	}
-
-public:
-	PUML() :
+  auto createDatatypeArray(MPI_Datatype baseType, std::size_t elemSize)
+      -> std::pair<MPI_Datatype, bool> {
+    if (elemSize == 1) {
+      return {baseType, false};
+    }
+    MPI_Datatype newType = MPI_DATATYPE_NULL;
+    MPI_Type_contiguous(elemSize, baseType, &newType);
+    MPI_Type_commit(&newType);
+    return {newType, true};
+  }
+
+  public:
+  PUML()
+      :
 #ifdef USE_MPI
-		m_comm(MPI_COMM_WORLD),
+        m_comm(MPI_COMM_WORLD),
 #endif // USE_MPI
-		m_originalCells(0L),
-		m_originalVertices(0L)
-	{ }
+        m_originalCells(0L) {
+  }
 
-	virtual ~PUML()
-	{
-		delete [] m_originalCells;
-		delete [] m_originalVertices;
+  virtual ~PUML() {
+    delete[] m_originalCells;
+    delete[] m_originalVertices;
 
-		for (const auto& i : m_cellData) {
-			std::free(i);
-		}
+    for (const auto& i : m_cellData) {
+      std::free(i);
+    }
 
-		for (const auto& i : m_vertexData) {
-			std::free(i);
-		}
+    for (const auto& i : m_vertexData) {
+      std::free(i);
+    }
 
-		for (const auto& i : m_originalVertexData) {
-			std::free(i);
-		}
+    for (const auto& i : m_originalVertexData) {
+      std::free(i);
+    }
 
 #ifdef USE_MPI
-		for (size_t i = 0; i < m_cellDataType.size(); ++i) {
-			if (m_cellDataTypeDerived[i]) {
-				MPI_Type_free(&m_cellDataType[i]);
-			}
-		}
-
-		for (size_t i = 0; i < m_vertexDataType.size(); ++i) {
-			if (m_vertexDataTypeDerived[i]) {
-				MPI_Type_free(&m_vertexDataType[i]);
-			}
-		}
+    for (size_t i = 0; i < m_cellDataType.size(); ++i) {
+      if (m_cellDataTypeDerived[i]) {
+        MPI_Type_free(&m_cellDataType[i]);
+      }
+    }
+
+    for (size_t i = 0; i < m_vertexDataType.size(); ++i) {
+      if (m_vertexDataTypeDerived[i]) {
+        MPI_Type_free(&m_vertexDataType[i]);
+      }
+    }
 #endif
-	}
+  }
 
 #ifdef USE_MPI
-	void setComm(MPI_Comm comm)
-	{
-		m_comm = comm;
-	}
+  void setComm(MPI_Comm comm) { m_comm = comm; }
 #endif // USE_MPI
 
-	void open(const char* cellName, const char* vertexName)
-	{
-		int rank = 0;
-		int procs = 1;
+  void open(const char* cellName, const char* vertexName) {
+    int rank = 0;
+    int procs = 1;
 #ifdef USE_MPI
-		MPI_Comm_rank(m_comm, &rank);
-		MPI_Comm_size(m_comm, &procs);
+    MPI_Comm_rank(m_comm, &rank);
+    MPI_Comm_size(m_comm, &procs);
 #endif // USE_MPI
 
-		std::vector<std::string> cellNames = utils::StringUtils::split(cellName, ':');
-		if (cellNames.size() != 2) {
-			logError() << "Cells name must have the form \"filename:/dataset\"";
-		}
+    std::vector<std::string> cellNames = utils::StringUtils::split(cellName, ':');
+    if (cellNames.size() != 2) {
+      logError() << "Cells name must have the form \"filename:/dataset\"";
+    }
 
-		std::vector<std::string> vertexNames = utils::StringUtils::split(vertexName, ':');
-		if (vertexNames.size() != 2) {
-			logError() << "Vertices name must have the form \"filename:/dataset\"";
-		}
+    std::vector<std::string> vertexNames = utils::StringUtils::split(vertexName, ':');
+    if (vertexNames.size() != 2) {
+      logError() << "Vertices name must have the form \"filename:/dataset\"";
+    }
 
-		// Open the cell file
-		hid_t h5plist = H5Pcreate(H5P_FILE_ACCESS);
-		checkH5Err(h5plist);
+    // Open the cell file
+    hid_t h5plist = H5Pcreate(H5P_FILE_ACCESS);
+    checkH5Err(h5plist);
 #ifdef USE_MPI
-		checkH5Err(H5Pset_fapl_mpio(h5plist, m_comm, MPI_INFO_NULL));
+    checkH5Err(H5Pset_fapl_mpio(h5plist, m_comm, MPI_INFO_NULL));
 #endif // USE_MPI
 
-		hid_t h5file = H5Fopen(cellNames[0].c_str(), H5F_ACC_RDONLY, h5plist);
-		checkH5Err(h5file);
+    hid_t h5file = H5Fopen(cellNames[0].c_str(), H5F_ACC_RDONLY, h5plist);
+    checkH5Err(h5file);
 
-		// Get cell dataset
-		hid_t h5dataset = H5Dopen(h5file, cellNames[1].c_str(), H5P_DEFAULT);
-		checkH5Err(h5dataset);
+    // Get cell dataset
+    hid_t h5dataset = H5Dopen(h5file, cellNames[1].c_str(), H5P_DEFAULT);
+    checkH5Err(h5dataset);
 
-		// Check the size of cell dataset
-		hid_t h5space = H5Dget_space(h5dataset);
-		checkH5Err(h5space);
-		if (H5Sget_simple_extent_ndims(h5space) != 2) {
-			logError() << "Cell dataset must have 2 dimensions";
-		}
-		hsize_t dims[2];
-		checkH5Err(H5Sget_simple_extent_dims(h5space, dims, 0L));
-		if (dims[1] != internal::Topology<Topo>::cellvertices()) {
-			logError() << "Each cell must have" << internal::Topology<Topo>::cellvertices() << "vertices";
-		}
+    // Check the size of cell dataset
+    hid_t h5space = H5Dget_space(h5dataset);
+    checkH5Err(h5space);
+    if (H5Sget_simple_extent_ndims(h5space) != 2) {
+      logError() << "Cell dataset must have 2 dimensions";
+    }
+    hsize_t dims[2];
+    checkH5Err(H5Sget_simple_extent_dims(h5space, dims, nullptr));
+    if (dims[1] != internal::Topology<Topo>::cellvertices()) {
+      logError() << "Each cell must have" << internal::Topology<Topo>::cellvertices() << "vertices";
+    }
 
-		logInfo(rank) << "Found" << dims[0] << "cells";
-		auto cellDistributor = Distributor(dims[0], procs);
+    logInfo(rank) << "Found" << dims[0] << "cells";
+    auto cellDistributor = Distributor(dims[0], procs);
 
-		// Read the cells
-		m_originalTotalSize[0] = dims[0];
-		auto[offsetCells, sizeCells] = cellDistributor.offsetAndSize(rank);
-		m_originalSize[0] = sizeCells;
+    // Read the cells
+    m_originalTotalSize[0] = dims[0];
+    auto [offsetCells, sizeCells] = cellDistributor.offsetAndSize(rank);
+    m_originalSize[0] = sizeCells;
 
-		hsize_t start[2] = {offsetCells, 0};
-		hsize_t count[2] = {m_originalSize[0], internal::Topology<Topo>::cellvertices()};
+    hsize_t start[2] = {offsetCells, 0};
+    hsize_t count[2] = {m_originalSize[0], internal::Topology<Topo>::cellvertices()};
 
-		checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, 0L, count, 0L));
+    checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, nullptr, count, nullptr));
 
-		hid_t h5memspace = H5Screate_simple(2, count, 0L);
-		checkH5Err(h5memspace);
+    hid_t h5memspace = H5Screate_simple(2, count, nullptr);
+    checkH5Err(h5memspace);
 
-		hid_t h5alist = H5Pcreate(H5P_DATASET_XFER);
-		checkH5Err(h5alist);
+    hid_t h5alist = H5Pcreate(H5P_DATASET_XFER);
+    checkH5Err(h5alist);
 #ifdef USE_MPI
-		checkH5Err(H5Pset_dxpl_mpio(h5alist, H5FD_MPIO_COLLECTIVE));
+    checkH5Err(H5Pset_dxpl_mpio(h5alist, H5FD_MPIO_COLLECTIVE));
 #endif // USE_MPI
 
-		m_originalCells = new ocell_t[m_originalSize[0]];
-		checkH5Err(H5Dread(h5dataset, H5T_NATIVE_ULONG, h5memspace, h5space, h5alist, m_originalCells));
-
-		// Close cells
-		checkH5Err(H5Sclose(h5space));
-		checkH5Err(H5Sclose(h5memspace));
-		checkH5Err(H5Dclose(h5dataset));
-		checkH5Err(H5Fclose(h5file));
-
-		// Open the vertex file
-		h5file = H5Fopen(vertexNames[0].c_str(), H5F_ACC_RDONLY, h5plist);
-		checkH5Err(h5file);
-
-		// Get vertex dataset
-		h5dataset = H5Dopen(h5file, vertexNames[1].c_str(), H5P_DEFAULT);
-		checkH5Err(h5dataset);
-
-		// Check the size of vertex dataset
-		h5space = H5Dget_space(h5dataset);
-		checkH5Err(h5space);
-		if (H5Sget_simple_extent_ndims(h5space) != 2) {
-			logError() << "Vertex dataset must have 2 dimensions";
-		}
-		checkH5Err(H5Sget_simple_extent_dims(h5space, dims, 0L));
-		if (dims[1] != 3) {
-			logError() << "Each vertex must have xyz coordinate";
-		}
-
-		logInfo(rank) << "Found" << dims[0] << "vertices";
-		auto vertexDistributor = Distributor(dims[0], procs);
-
-		// Read the vertices
-		m_originalTotalSize[1] = dims[0];
-		auto[offsetVertices, sizeVertices] = vertexDistributor.offsetAndSize(rank);
-		m_originalSize[1] = sizeVertices;
-
-		start[0] = offsetVertices;
-		count[0] = m_originalSize[1]; count[1] = 3;
-
-		checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, 0L, count, 0L));
-
-		h5memspace = H5Screate_simple(2, count, 0L);
-		checkH5Err(h5memspace);
-
-		m_originalVertices = new overtex_t[m_originalSize[1]];
-		checkH5Err(H5Dread(h5dataset, H5T_NATIVE_DOUBLE, h5memspace, h5space, h5alist, m_originalVertices));
-
-		// Close vertices
-		checkH5Err(H5Sclose(h5space));
-		checkH5Err(H5Sclose(h5memspace));
-		checkH5Err(H5Dclose(h5dataset));
-		checkH5Err(H5Fclose(h5file));
-
-		// Close other H5 stuff
-		checkH5Err(H5Pclose(h5plist));
-		checkH5Err(H5Pclose(h5alist));
-	}
-
-    template<typename T>
-	void addDataArray(const T* rawData, DataType type, const std::vector<size_t>& sizes
+    m_originalCells = new ocell_t[m_originalSize[0]];
+    checkH5Err(H5Dread(h5dataset, H5T_NATIVE_ULONG, h5memspace, h5space, h5alist, m_originalCells));
+
+    // Close cells
+    checkH5Err(H5Sclose(h5space));
+    checkH5Err(H5Sclose(h5memspace));
+    checkH5Err(H5Dclose(h5dataset));
+    checkH5Err(H5Fclose(h5file));
+
+    // Open the vertex file
+    h5file = H5Fopen(vertexNames[0].c_str(), H5F_ACC_RDONLY, h5plist);
+    checkH5Err(h5file);
+
+    // Get vertex dataset
+    h5dataset = H5Dopen(h5file, vertexNames[1].c_str(), H5P_DEFAULT);
+    checkH5Err(h5dataset);
+
+    // Check the size of vertex dataset
+    h5space = H5Dget_space(h5dataset);
+    checkH5Err(h5space);
+    if (H5Sget_simple_extent_ndims(h5space) != 2) {
+      logError() << "Vertex dataset must have 2 dimensions";
+    }
+    checkH5Err(H5Sget_simple_extent_dims(h5space, dims, nullptr));
+    if (dims[1] != 3) {
+      logError() << "Each vertex must have xyz coordinate";
+    }
+
+    logInfo(rank) << "Found" << dims[0] << "vertices";
+    auto vertexDistributor = Distributor(dims[0], procs);
+
+    // Read the vertices
+    m_originalTotalSize[1] = dims[0];
+    auto [offsetVertices, sizeVertices] = vertexDistributor.offsetAndSize(rank);
+    m_originalSize[1] = sizeVertices;
+
+    start[0] = offsetVertices;
+    count[0] = m_originalSize[1];
+    count[1] = 3;
+
+    checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, nullptr, count, nullptr));
+
+    h5memspace = H5Screate_simple(2, count, nullptr);
+    checkH5Err(h5memspace);
+
+    m_originalVertices = new overtex_t[m_originalSize[1]];
+    checkH5Err(
+        H5Dread(h5dataset, H5T_NATIVE_DOUBLE, h5memspace, h5space, h5alist, m_originalVertices));
+
+    // Close vertices
+    checkH5Err(H5Sclose(h5space));
+    checkH5Err(H5Sclose(h5memspace));
+    checkH5Err(H5Dclose(h5dataset));
+    checkH5Err(H5Fclose(h5file));
+
+    // Close other H5 stuff
+    checkH5Err(H5Pclose(h5plist));
+    checkH5Err(H5Pclose(h5alist));
+  }
+
+  template <typename T>
+  void addDataArray(const T* rawData,
+                    DataType type,
+                    const std::vector<size_t>& sizes
 #ifdef USE_MPI
-		, MPI_Datatype mpiType = MPITypeInfer<T>::type()
+                    ,
+                    MPI_Datatype mpiType = MPITypeInfer<T>::type()
 #endif
-	)
-	{
-		static_assert(std::is_trivially_copyable_v<T>, "T needs to be trivially copyable");
-		static_assert(std::is_trivially_default_constructible_v<T>, "T needs to be trivially default constructible");
-		int rank = 0;
-		int procs = 1;
+  ) {
+    static_assert(std::is_trivially_copyable_v<T>, "T needs to be trivially copyable");
+    static_assert(std::is_trivially_default_constructible_v<T>,
+                  "T needs to be trivially default constructible");
+    int rank = 0;
+    int procs = 1;
 #ifdef USE_MPI
-		MPI_Comm_rank(m_comm, &rank);
-		MPI_Comm_size(m_comm, &procs);
+    MPI_Comm_rank(m_comm, &rank);
+    MPI_Comm_size(m_comm, &procs);
 #endif // USE_MPI
 
-		auto cellDistributor = Distributor(m_originalTotalSize[type], procs);
-		auto[offset, localSize] = cellDistributor.offsetAndSize(rank);
-
-		size_t elemSize = 1;
-		for (auto size : sizes) {
-			elemSize *= size;
-		}
-
-		void* data = std::malloc(sizeof(T) * localSize * elemSize);
-        std::memcpy(data, rawData, sizeof(T) * localSize * elemSize);
-		switch (type) {
-		case CELL:
-		{
-			m_cellData.push_back(data);
-			m_cellDataSize.push_back(sizeof(T) * elemSize);
+    auto cellDistributor = Distributor(m_originalTotalSize[type], procs);
+    auto [offset, localSize] = cellDistributor.offsetAndSize(rank);
+
+    size_t elemSize = 1;
+    for (auto size : sizes) {
+      elemSize *= size;
+    }
+
+    void* data = std::malloc(sizeof(T) * localSize * elemSize);
+    std::memcpy(data, rawData, sizeof(T) * localSize * elemSize);
+    switch (type) {
+    case CELL: {
+      m_cellData.push_back(data);
+      m_cellDataSize.push_back(sizeof(T) * elemSize);
 #ifdef USE_MPI
-			auto [type, derived] = createDatatypeArray(mpiType, elemSize);
-			m_cellDataType.push_back(type);
-			m_cellDataTypeDerived.push_back(derived);
+      auto [type, derived] = createDatatypeArray(mpiType, elemSize);
+      m_cellDataType.push_back(type);
+      m_cellDataTypeDerived.push_back(derived);
 #endif
-		}
-			break;
-		case VERTEX:
-		{
-			m_originalVertexData.push_back(data);
-			m_vertexDataSize.push_back(sizeof(T) * elemSize);
+    } break;
+    case VERTEX: {
+      m_originalVertexData.push_back(data);
+      m_vertexDataSize.push_back(sizeof(T) * elemSize);
 #ifdef USE_MPI
-			auto [type, derived] = createDatatypeArray(mpiType, elemSize);
-			m_vertexDataType.push_back(type);
-			m_vertexDataTypeDerived.push_back(derived);
+      auto [type, derived] = createDatatypeArray(mpiType, elemSize);
+      m_vertexDataType.push_back(type);
+      m_vertexDataTypeDerived.push_back(derived);
 #endif
-		}
-			break;
-		}
-	}
+    } break;
+    }
+  }
 
-	template<typename T = int>
-	void addData(const char* dataName, DataType type, const std::vector<size_t>& sizes
+  template <typename T = int>
+  void addData(const char* dataName,
+               DataType type,
+               const std::vector<size_t>& sizes
 #ifdef USE_MPI
-		, MPI_Datatype mpiType = MPITypeInfer<T>::type()
+               ,
+               MPI_Datatype mpiType = MPITypeInfer<T>::type()
 #endif
-		, hid_t hdf5Type = HDF5TypeInfer<T>::type()
-	)
-	{
-		static_assert(std::is_trivially_copyable_v<T>, "T needs to be trivially copyable");
-		static_assert(std::is_trivially_default_constructible_v<T>, "T needs to be trivially default constructible");
-		int rank = 0;
-		int procs = 1;
+                   ,
+               hid_t hdf5Type = HDF5TypeInfer<T>::type()) {
+    static_assert(std::is_trivially_copyable_v<T>, "T needs to be trivially copyable");
+    static_assert(std::is_trivially_default_constructible_v<T>,
+                  "T needs to be trivially default constructible");
+    int rank = 0;
+    int procs = 1;
 #ifdef USE_MPI
-		MPI_Comm_rank(m_comm, &rank);
-		MPI_Comm_size(m_comm, &procs);
+    MPI_Comm_rank(m_comm, &rank);
+    MPI_Comm_size(m_comm, &procs);
 #endif // USE_MPI
 
-		auto cellDistributor = Distributor(m_originalTotalSize[0], procs);
-		std::vector<std::string> dataNames = utils::StringUtils::split(dataName, ':');
-		if (dataNames.size() != 2) {
-			logError() << "Data name must have the form \"filename:/dataset\"";
-		}
+    auto cellDistributor = Distributor(m_originalTotalSize[0], procs);
+    std::vector<std::string> dataNames = utils::StringUtils::split(dataName, ':');
+    if (dataNames.size() != 2) {
+      logError() << "Data name must have the form \"filename:/dataset\"";
+    }
 
-		// Open the cell file
-		hid_t h5plist = H5Pcreate(H5P_FILE_ACCESS);
-		checkH5Err(h5plist);
+    // Open the cell file
+    hid_t h5plist = H5Pcreate(H5P_FILE_ACCESS);
+    checkH5Err(h5plist);
 #ifdef USE_MPI
-		checkH5Err(H5Pset_fapl_mpio(h5plist, m_comm, MPI_INFO_NULL));
+    checkH5Err(H5Pset_fapl_mpio(h5plist, m_comm, MPI_INFO_NULL));
 #endif // USE_MPI
 
-		hid_t h5file = H5Fopen(dataNames[0].c_str(), H5F_ACC_RDONLY, h5plist);
-		checkH5Err(h5file);
+    hid_t h5file = H5Fopen(dataNames[0].c_str(), H5F_ACC_RDONLY, h5plist);
+    checkH5Err(h5file);
 
-		unsigned long totalSize = m_originalTotalSize[type];
+    unsigned long totalSize = m_originalTotalSize[type];
 
-		// Get cell dataset
-		hid_t h5dataset = H5Dopen(h5file, dataNames[1].c_str(), H5P_DEFAULT);
-		checkH5Err(h5dataset);
+    // Get cell dataset
+    hid_t h5dataset = H5Dopen(h5file, dataNames[1].c_str(), H5P_DEFAULT);
+    checkH5Err(h5dataset);
 
-		// Check the size of cell dataset
-		hid_t h5space = H5Dget_space(h5dataset);
-		checkH5Err(h5space);
-		if (H5Sget_simple_extent_ndims(h5space) != 1) {
-			logError() << "Dataset must have 1 dimension";
-		}
-		hsize_t dim;
-		checkH5Err(H5Sget_simple_extent_dims(h5space, &dim, 0L));
-		if (dim != totalSize) {
-			logError() << "Dataset has the wrong size";
-		}
+    // Check the size of cell dataset
+    hid_t h5space = H5Dget_space(h5dataset);
+    checkH5Err(h5space);
+    const auto dimcount = H5Sget_simple_extent_ndims(h5space);
+    if (dimcount != 1 + sizes.size()) {
+      logError() << "Dataset must have" << 1 + sizes.size() << "dimension(s), but it has"
+                 << dimcount;
+    }
+    std::vector<hsize_t> dim(1 + sizes.size());
+    checkH5Err(H5Sget_simple_extent_dims(h5space, dim.data(), nullptr));
+    if (dim[0] != totalSize) {
+      logError() << "Dataset has the wrong size:" << dim[0] << "vs." << totalSize;
+    }
+    for (std::size_t i = 0; i < sizes.size(); ++i) {
+      if (dim[i + 1] != sizes[i]) {
+        const std::vector<hsize_t> subdims(dim.begin() + 1, dim.end());
+        logError() << "Dataset has the wrong subsize:" << subdims << "vs." << sizes;
+      }
+    }
 
-		// Read the cells
-		auto[offset, localSize] = cellDistributor.offsetAndSize(rank);
+    // Read the cells
+    auto [offset, localSize] = cellDistributor.offsetAndSize(rank);
 
-		size_t elemSize = 1;
-		for (auto size : sizes) {
-			elemSize *= size;
-		}
+    size_t elemSize = 1;
+    for (auto size : sizes) {
+      elemSize *= size;
+    }
 
-		std::vector<hsize_t> start = {offset};
-		std::vector<hsize_t> count = {localSize};
+    std::vector<hsize_t> start = {offset};
+    std::vector<hsize_t> count = {localSize};
 
-		for (auto size : sizes) {
-			start.push_back(0);
-			count.push_back(size);
-		}
+    for (auto size : sizes) {
+      start.push_back(0);
+      count.push_back(size);
+    }
 
-		checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start.data(), 0L, count.data(), 0L));
+    checkH5Err(
+        H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start.data(), nullptr, count.data(), nullptr));
 
-		hid_t h5memspace = H5Screate_simple(count.size(), count.data(), 0L);
-		checkH5Err(h5memspace);
+    hid_t h5memspace = H5Screate_simple(count.size(), count.data(), nullptr);
+    checkH5Err(h5memspace);
 
-		hid_t h5alist = H5Pcreate(H5P_DATASET_XFER);
-		checkH5Err(h5alist);
+    hid_t h5alist = H5Pcreate(H5P_DATASET_XFER);
+    checkH5Err(h5alist);
 #ifdef USE_MPI
-		checkH5Err(H5Pset_dxpl_mpio(h5alist, H5FD_MPIO_COLLECTIVE));
+    checkH5Err(H5Pset_dxpl_mpio(h5alist, H5FD_MPIO_COLLECTIVE));
 #endif // USE_MPI
 
-		void* data = std::malloc(sizeof(T) * localSize * elemSize);
-		checkH5Err(H5Dread(h5dataset, hdf5Type, h5memspace, h5space, h5alist, data));
+    void* data = std::malloc(sizeof(T) * localSize * elemSize);
+    checkH5Err(H5Dread(h5dataset, hdf5Type, h5memspace, h5space, h5alist, data));
 
-		// Close data
-		checkH5Err(H5Sclose(h5space));
-		checkH5Err(H5Sclose(h5memspace));
-		checkH5Err(H5Dclose(h5dataset));
-		checkH5Err(H5Fclose(h5file));
+    // Close data
+    checkH5Err(H5Sclose(h5space));
+    checkH5Err(H5Sclose(h5memspace));
+    checkH5Err(H5Dclose(h5dataset));
+    checkH5Err(H5Fclose(h5file));
 
-		// Close other H5 stuff
-		checkH5Err(H5Pclose(h5plist));
-		checkH5Err(H5Pclose(h5alist));
+    // Close other H5 stuff
+    checkH5Err(H5Pclose(h5plist));
+    checkH5Err(H5Pclose(h5alist));
 
-		switch (type) {
-		case CELL:
-		{
-			m_cellData.push_back(data);
-			m_cellDataSize.push_back(sizeof(T) * elemSize);
+    switch (type) {
+    case CELL: {
+      m_cellData.push_back(data);
+      m_cellDataSize.push_back(sizeof(T) * elemSize);
 #ifdef USE_MPI
-			auto [type, derived] = createDatatypeArray(mpiType, elemSize);
-			m_cellDataType.push_back(type);
-			m_cellDataTypeDerived.push_back(derived);
+      auto [type, derived] = createDatatypeArray(mpiType, elemSize);
+      m_cellDataType.push_back(type);
+      m_cellDataTypeDerived.push_back(derived);
 #endif
-		}
-			break;
-		case VERTEX:
-		{
-			m_originalVertexData.push_back(data);
-			m_vertexDataSize.push_back(sizeof(T) * elemSize);
+    } break;
+    case VERTEX: {
+      m_originalVertexData.push_back(data);
+      m_vertexDataSize.push_back(sizeof(T) * elemSize);
 #ifdef USE_MPI
-			auto [type, derived] = createDatatypeArray(mpiType, elemSize);
-			m_vertexDataType.push_back(type);
-			m_vertexDataTypeDerived.push_back(derived);
+      auto [type, derived] = createDatatypeArray(mpiType, elemSize);
+      m_vertexDataType.push_back(type);
+      m_vertexDataTypeDerived.push_back(derived);
 #endif
-		}
-			break;
-		}
-	}
-
-	void partition(int* partition)
-	{
-		int rank = 0;
-		int procs = 1;
-#ifdef USE_MPI
-		MPI_Comm_rank(m_comm, &rank);
-		MPI_Comm_size(m_comm, &procs);
-#endif // USE_MPI
+    } break;
+    }
+  }
 
-		// Create sorting indices
-		unsigned int* indices = new unsigned int[m_originalSize[0]];
-		for (unsigned int i = 0; i < m_originalSize[0]; i++) {
-			indices[i] = i;
-		}
-
-		std::sort(indices, indices+m_originalSize[0], [&](unsigned int i1, unsigned int i2)
-			{
-				return partition[i1] < partition[i2];
-			});
-
-		// Sort cells
-		ocell_t* newCells = new ocell_t[m_originalSize[0]];
-		for (unsigned int i = 0; i < m_originalSize[0]; i++) {
-			std::memcpy(newCells[i], m_originalCells[indices[i]], sizeof(ocell_t));
-		}
-		delete [] m_originalCells;
-		m_originalCells = newCells;
-
-		// Sort other data
-		for (std::size_t j = 0; j < m_cellData.size(); ++j) {
-			void* newData = std::malloc(m_originalSize[0] * m_cellDataSize[j]);
-			for (unsigned int i = 0; i < m_originalSize[0]; i++) {
-				std::memcpy(reinterpret_cast<char*>(newData) + m_cellDataSize[j] * i, reinterpret_cast<char*>(m_cellData[j]) + m_cellDataSize[j] * indices[i], m_cellDataSize[j]);
-			}
-
-			std::free(m_cellData[j]);
-			m_cellData[j] = newData;
-		}
-
-		delete [] indices;
-
-		// Compute exchange info
-		int* sendCount = new int[procs];
-		memset(sendCount, 0, procs * sizeof(int));
-
-		for (unsigned int i = 0; i < m_originalSize[0]; i++) {
-			assert(partition[i] < procs);
-			sendCount[partition[i]]++;
-		}
-
-		int* recvCount = new int[procs];
+  void partition(const int* partition) {
+    int rank = 0;
+    int procs = 1;
 #ifdef USE_MPI
-		MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1, MPI_INT, m_comm);
-#else // USE_MPI
-		recvCount[0] = sendCount[0];
+    MPI_Comm_rank(m_comm, &rank);
+    MPI_Comm_size(m_comm, &procs);
 #endif // USE_MPI
 
-		int *sDispls = new int[procs];
-		int *rDispls = new int[procs];
-		sDispls[0] = 0;
-		rDispls[0] = 0;
-		for (int i = 1; i < procs; i++) {
-			sDispls[i] = sDispls[i-1] + sendCount[i-1];
-			rDispls[i] = rDispls[i-1] + recvCount[i-1];
-		}
+    // Create sorting indices
+    auto* indices = new unsigned int[m_originalSize[0]];
+    for (unsigned int i = 0; i < m_originalSize[0]; i++) {
+      indices[i] = i;
+    }
 
-		m_originalSize[0] = rDispls[procs-1] + recvCount[procs-1];
+    std::sort(indices, indices + m_originalSize[0], [&](unsigned int i1, unsigned int i2) {
+      return partition[i1] < partition[i2];
+    });
 
+    // Sort cells
+    auto* newCells = new ocell_t[m_originalSize[0]];
+    for (unsigned int i = 0; i < m_originalSize[0]; i++) {
+      std::memcpy(newCells[i], m_originalCells[indices[i]], sizeof(ocell_t));
+    }
+    delete[] m_originalCells;
+    m_originalCells = newCells;
+
+    // Sort other data
+    for (std::size_t j = 0; j < m_cellData.size(); ++j) {
+      void* newData = std::malloc(m_originalSize[0] * m_cellDataSize[j]);
+      for (unsigned int i = 0; i < m_originalSize[0]; i++) {
+        std::memcpy(reinterpret_cast<char*>(newData) + (m_cellDataSize[j] * i),
+                    reinterpret_cast<char*>(m_cellData[j]) + (m_cellDataSize[j] * indices[i]),
+                    m_cellDataSize[j]);
+      }
+
+      std::free(m_cellData[j]);
+      m_cellData[j] = newData;
+    }
+
+    delete[] indices;
+
+    // Compute exchange info
+    int* sendCount = new int[procs];
+    memset(sendCount, 0, procs * sizeof(int));
+
+    for (unsigned int i = 0; i < m_originalSize[0]; i++) {
+      assert(partition[i] < procs);
+      sendCount[partition[i]]++;
+    }
+
+    int* recvCount = new int[procs];
 #ifdef USE_MPI
-		// Exchange the cells
-		MPI_Datatype cellType;
-		MPI_Type_contiguous(internal::Topology<Topo>::cellvertices(), MPI_UNSIGNED_LONG, &cellType);
-		MPI_Type_commit(&cellType);
-
-		newCells = new ocell_t[m_originalSize[0]];
-		MPI_Alltoallv(m_originalCells, sendCount, sDispls, cellType,
-			newCells, recvCount, rDispls, cellType,
-			m_comm);
-
-		delete [] m_originalCells;
-		m_originalCells = newCells;
-
-		MPI_Type_free(&cellType);
-
-		// Exchange cell data
-		for (std::size_t j = 0; j < m_cellData.size(); ++j) {
-			void* newData = std::malloc(m_originalSize[0] * m_cellDataSize[j]);
-			MPI_Alltoallv(m_cellData[j], sendCount, sDispls, m_cellDataType[j],
-				newData, recvCount, rDispls, m_cellDataType[j],
-				m_comm);
-
-			std::free(m_cellData[j]);
-			m_cellData[j] = newData;
-		}
+    MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1, MPI_INT, m_comm);
+#else  // USE_MPI
+    recvCount[0] = sendCount[0];
 #endif // USE_MPI
 
-		delete [] sendCount;
-		delete [] recvCount;
-		delete [] sDispls;
-		delete [] rDispls;
-	}
+    int* sDispls = new int[procs];
+    int* rDispls = new int[procs];
+    sDispls[0] = 0;
+    rDispls[0] = 0;
+    for (int i = 1; i < procs; i++) {
+      sDispls[i] = sDispls[i - 1] + sendCount[i - 1];
+      rDispls[i] = rDispls[i - 1] + recvCount[i - 1];
+    }
+
+    m_originalSize[0] = rDispls[procs - 1] + recvCount[procs - 1];
 
-	void generateMesh()
-	{
-		int rank = 0;
-		int procs = 1;
 #ifdef USE_MPI
-		MPI_Comm_rank(m_comm, &rank);
-		MPI_Comm_size(m_comm, &procs);
+    // Exchange the cells
+    MPI_Datatype cellType = MPI_DATATYPE_NULL;
+    MPI_Type_contiguous(internal::Topology<Topo>::cellvertices(), MPI_UNSIGNED_LONG, &cellType);
+    MPI_Type_commit(&cellType);
+
+    newCells = new ocell_t[m_originalSize[0]];
+    MPI_Alltoallv(m_originalCells,
+                  sendCount,
+                  sDispls,
+                  cellType,
+                  newCells,
+                  recvCount,
+                  rDispls,
+                  cellType,
+                  m_comm);
+
+    delete[] m_originalCells;
+    m_originalCells = newCells;
+
+    MPI_Type_free(&cellType);
+
+    // Exchange cell data
+    for (std::size_t j = 0; j < m_cellData.size(); ++j) {
+      void* newData = std::malloc(m_originalSize[0] * m_cellDataSize[j]);
+      MPI_Alltoallv(m_cellData[j],
+                    sendCount,
+                    sDispls,
+                    m_cellDataType[j],
+                    newData,
+                    recvCount,
+                    rDispls,
+                    m_cellDataType[j],
+                    m_comm);
+
+      std::free(m_cellData[j]);
+      m_cellData[j] = newData;
+    }
 #endif // USE_MPI
 
-		auto vertexDistributor = Distributor(m_originalTotalSize[1], procs);
-		// Generate a list of vertices we need from other processors
-		std::unordered_set<unsigned long>* requiredVertexSets = new std::unordered_set<unsigned long>[procs];
-		for (unsigned int i = 0; i < m_originalSize[0]; i++) {
-			for (unsigned int j = 0; j < internal::Topology<Topo>::cellvertices(); j++) {
-				int proc = vertexDistributor.rankOfEntity(m_originalCells[i][j]);
-				assert(proc < procs);
-
-				requiredVertexSets[proc].insert(m_originalCells[i][j]); // Convert to local vid
-			}
-		}
-
-		// Generate information for requesting vertices
-		unsigned int totalVertices = requiredVertexSets[0].size();
-		for (int i = 1; i < procs; i++)
-			totalVertices += requiredVertexSets[i].size();
-
-		int* sendCount = new int[procs];
-
-		unsigned long* requiredVertices = new unsigned long[totalVertices];
-		unsigned int k = 0;
-		for (int i = 0; i < procs; i++) {
-			sendCount[i] = requiredVertexSets[i].size();
-
-			for (std::unordered_set<unsigned long>::const_iterator it = requiredVertexSets[i].begin();
-					it != requiredVertexSets[i].end(); ++it) {
-				assert(k < totalVertices);
-				requiredVertices[k++] = *it;
-			}
-		}
-
-		delete [] requiredVertexSets;
-
-		// Exchange required vertex information
-		int* recvCount = new int[procs];
+    delete[] sendCount;
+    delete[] recvCount;
+    delete[] sDispls;
+    delete[] rDispls;
+  }
+
+  void generateMesh() {
+    int rank = 0;
+    int procs = 1;
 #ifdef USE_MPI
-		MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1, MPI_INT, m_comm);
-#else // USE_MPI
-		recvCount[0] = sendCount[0];
+    MPI_Comm_rank(m_comm, &rank);
+    MPI_Comm_size(m_comm, &procs);
 #endif // USE_MPI
 
-		int *sDispls = new int[procs];
-		int *rDispls = new int[procs];
-		sDispls[0] = 0;
-		rDispls[0] = 0;
-		for (int i = 1; i < procs; i++) {
-			sDispls[i] = sDispls[i-1] + sendCount[i-1];
-			rDispls[i] = rDispls[i-1] + recvCount[i-1];
-		}
+    auto vertexDistributor = Distributor(m_originalTotalSize[1], procs);
+    // Generate a list of vertices we need from other processors
+    auto* requiredVertexSets = new std::unordered_set<unsigned long>[procs];
+    for (unsigned int i = 0; i < m_originalSize[0]; i++) {
+      for (unsigned int j = 0; j < internal::Topology<Topo>::cellvertices(); j++) {
+        int proc = vertexDistributor.rankOfEntity(m_originalCells[i][j]);
+        assert(proc < procs);
 
-		unsigned int totalRecv = rDispls[procs-1] + recvCount[procs-1];
+        requiredVertexSets[proc].insert(m_originalCells[i][j]); // Convert to local vid
+      }
+    }
 
-		unsigned long* distribVertexIds = new unsigned long[totalRecv];
+    // Generate information for requesting vertices
+    unsigned int totalVertices = requiredVertexSets[0].size();
+    for (int i = 1; i < procs; i++) {
+      totalVertices += requiredVertexSets[i].size();
+    }
+
+    int* sendCount = new int[procs];
+
+    auto* requiredVertices = new unsigned long[totalVertices];
+    unsigned int k = 0;
+    for (int i = 0; i < procs; i++) {
+      sendCount[i] = requiredVertexSets[i].size();
+
+      for (unsigned long it : requiredVertexSets[i]) {
+        assert(k < totalVertices);
+        requiredVertices[k++] = it;
+      }
+    }
+
+    delete[] requiredVertexSets;
+
+    // Exchange required vertex information
+    int* recvCount = new int[procs];
 #ifdef USE_MPI
-		MPI_Alltoallv(requiredVertices, sendCount, sDispls, MPI_UNSIGNED_LONG,
-			distribVertexIds, recvCount, rDispls, MPI_UNSIGNED_LONG,
-			m_comm);
+    MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1, MPI_INT, m_comm);
+#else  // USE_MPI
+    recvCount[0] = sendCount[0];
 #endif // USE_MPI
 
-		// Send back vertex coordinates (an other data)
-		overtex_t* distribVertices = new overtex_t[totalRecv];
-		std::vector<void*> distribData;
-		distribData.resize(m_originalVertexData.size());
-		for (unsigned int i = 0; i < m_originalVertexData.size(); i++) {
-			distribData[i] = std::malloc(totalRecv * m_vertexDataSize[i]);
-		}
-		std::vector<int>* sharedRanks = new std::vector<int>[m_originalSize[1]];
-		k = 0;
-		for (int i = 0; i < procs; i++) {
-			for (int j = 0; j < recvCount[i]; j++) {
-				assert(k < totalRecv);
-				distribVertexIds[k] = vertexDistributor.globalToLocalId(rank, distribVertexIds[k]);
-
-				assert(distribVertexIds[k] < m_originalSize[1]);
-				std::memcpy(distribVertices[k], m_originalVertices[distribVertexIds[k]], sizeof(overtex_t));
-
-				// Handle other vertex data
-				for (unsigned int l = 0; l < m_originalVertexData.size(); l++) {
-					std::memcpy(reinterpret_cast<char*>(distribData[l]) + m_vertexDataSize[l] * k, reinterpret_cast<char*>(m_originalVertexData[l]) + m_vertexDataSize[l] * distribVertexIds[k], m_vertexDataSize[l]);
-				}
-
-				// Save all ranks for each vertex
-				sharedRanks[distribVertexIds[k]].push_back(i);
-
-				k++;
-			}
-		}
-
-		overtex_t* recvVertices = new overtex_t[totalVertices];
-
-		for (auto& it : m_vertexData) {
-			std::free(it);
-		}
-		m_vertexData.resize(m_originalVertexData.size());
-		for (unsigned int i = 0; i < m_originalVertexData.size(); i++) {
-			m_vertexData[i] = std::malloc(totalVertices * m_vertexDataSize[i]);
-		}
+    int* sDispls = new int[procs];
+    int* rDispls = new int[procs];
+    sDispls[0] = 0;
+    rDispls[0] = 0;
+    for (int i = 1; i < procs; i++) {
+      sDispls[i] = sDispls[i - 1] + sendCount[i - 1];
+      rDispls[i] = rDispls[i - 1] + recvCount[i - 1];
+    }
+
+    const unsigned int totalRecv = rDispls[procs - 1] + recvCount[procs - 1];
+
+    auto* distribVertexIds = new unsigned long[totalRecv];
 #ifdef USE_MPI
-		MPI_Datatype vertexType;
-		MPI_Type_contiguous(3, MPI_DOUBLE, &vertexType);
-		MPI_Type_commit(&vertexType);
+    MPI_Alltoallv(requiredVertices,
+                  sendCount,
+                  sDispls,
+                  MPI_UNSIGNED_LONG,
+                  distribVertexIds,
+                  recvCount,
+                  rDispls,
+                  MPI_UNSIGNED_LONG,
+                  m_comm);
+#endif // USE_MPI
 
-		MPI_Alltoallv(distribVertices, recvCount, rDispls, vertexType,
-			recvVertices, sendCount, sDispls, vertexType,
-			m_comm);
+    // Send back vertex coordinates (and other data)
+    auto* distribVertices = new overtex_t[totalRecv];
+    std::vector<void*> distribData;
+    distribData.resize(m_originalVertexData.size());
+    for (unsigned int i = 0; i < m_originalVertexData.size(); i++) {
+      distribData[i] = std::malloc(totalRecv * m_vertexDataSize[i]);
+    }
+    auto* sharedRanks = new std::vector<int>[m_originalSize[1]];
+    k = 0;
+    for (int i = 0; i < procs; i++) {
+      for (int j = 0; j < recvCount[i]; j++) {
+        assert(k < totalRecv);
+        distribVertexIds[k] = vertexDistributor.globalToLocalId(rank, distribVertexIds[k]);
+
+        assert(distribVertexIds[k] < m_originalSize[1]);
+        std::memcpy(distribVertices[k], m_originalVertices[distribVertexIds[k]], sizeof(overtex_t));
+
+        // Handle other vertex data
+        for (unsigned int l = 0; l < m_originalVertexData.size(); l++) {
+          std::memcpy(reinterpret_cast<char*>(distribData[l]) + (m_vertexDataSize[l] * k),
+                      reinterpret_cast<char*>(m_originalVertexData[l]) +
+                          (m_vertexDataSize[l] * distribVertexIds[k]),
+                      m_vertexDataSize[l]);
+        }
+
+        // Save all ranks for each vertex
+        sharedRanks[distribVertexIds[k]].push_back(i);
+
+        k++;
+      }
+    }
 
-		MPI_Type_free(&vertexType);
+    auto* recvVertices = new overtex_t[totalVertices];
 
-		for (unsigned int i = 0; i < m_originalVertexData.size(); i++) {
-			MPI_Alltoallv(distribData[i], recvCount, rDispls, m_vertexDataType[i],
-				m_vertexData[i], sendCount, sDispls, m_vertexDataType[i],
-				m_comm);
-		}
+    for (auto& it : m_vertexData) {
+      std::free(it);
+    }
+    m_vertexData.resize(m_originalVertexData.size());
+    for (unsigned int i = 0; i < m_originalVertexData.size(); i++) {
+      m_vertexData[i] = std::malloc(totalVertices * m_vertexDataSize[i]);
+    }
+#ifdef USE_MPI
+    MPI_Datatype vertexType = MPI_DATATYPE_NULL;
+    MPI_Type_contiguous(3, MPI_DOUBLE, &vertexType);
+    MPI_Type_commit(&vertexType);
+
+    MPI_Alltoallv(distribVertices,
+                  recvCount,
+                  rDispls,
+                  vertexType,
+                  recvVertices,
+                  sendCount,
+                  sDispls,
+                  vertexType,
+                  m_comm);
+
+    MPI_Type_free(&vertexType);
+
+    for (unsigned int i = 0; i < m_originalVertexData.size(); i++) {
+      MPI_Alltoallv(distribData[i],
+                    recvCount,
+                    rDispls,
+                    m_vertexDataType[i],
+                    m_vertexData[i],
+                    sendCount,
+                    sDispls,
+                    m_vertexDataType[i],
+                    m_comm);
+    }
 #endif // USE_MPI
 
-		delete [] distribVertices;
-		for (auto& it : distribData) {
-			std::free(it);
-		}
-		distribData.clear();
-
-		// Send back the number of shared ranks for each vertex
-		unsigned int* distNsharedRanks = new unsigned int[totalRecv];
-		unsigned int distTotalSharedRanks = 0;
-		for (unsigned int i = 0; i < totalRecv; i++) {
-			assert(distribVertexIds[i] < m_originalSize[1]);
-			distNsharedRanks[i] = sharedRanks[distribVertexIds[i]].size();
-			distTotalSharedRanks += distNsharedRanks[i];
-		}
-
-		unsigned int* recvNsharedRanks = new unsigned int[totalVertices];
+    delete[] distribVertices;
+    for (auto& it : distribData) {
+      std::free(it);
+    }
+    distribData.clear();
+
+    // Send back the number of shared ranks for each vertex
+    auto* distNsharedRanks = new unsigned int[totalRecv];
+    unsigned int distTotalSharedRanks = 0;
+    for (unsigned int i = 0; i < totalRecv; i++) {
+      assert(distribVertexIds[i] < m_originalSize[1]);
+      distNsharedRanks[i] = sharedRanks[distribVertexIds[i]].size();
+      distTotalSharedRanks += distNsharedRanks[i];
+    }
+
+    auto* recvNsharedRanks = new unsigned int[totalVertices];
 #ifdef USE_MPI
-		MPI_Alltoallv(distNsharedRanks, recvCount, rDispls, MPI_UNSIGNED,
-			recvNsharedRanks, sendCount, sDispls, MPI_UNSIGNED,
-			m_comm);
+    MPI_Alltoallv(distNsharedRanks,
+                  recvCount,
+                  rDispls,
+                  MPI_UNSIGNED,
+                  recvNsharedRanks,
+                  sendCount,
+                  sDispls,
+                  MPI_UNSIGNED,
+                  m_comm);
 #endif // USE_MPI
 
-		delete [] distNsharedRanks;
+    delete[] distNsharedRanks;
 
-		// Setup buffers for exchanging shared ranks
-		int* sharedSendCount = new int[procs];
-		memset(sharedSendCount, 0, procs * sizeof(int));
+    // Setup buffers for exchanging shared ranks
+    int* sharedSendCount = new int[procs];
+    memset(sharedSendCount, 0, procs * sizeof(int));
 
-		int* distSharedRanks = new int[distTotalSharedRanks];
-		k = 0;
-		unsigned int l = 0;
-		for (int i = 0; i < procs; i++) {
-			for (int j = 0; j < recvCount[i]; j++) {
-				assert(k < totalRecv);
-				assert(l + sharedRanks[distribVertexIds[k]].size() <= distTotalSharedRanks);
-				memcpy(&distSharedRanks[l], &sharedRanks[distribVertexIds[k]][0], sharedRanks[distribVertexIds[k]].size() * sizeof(int));
-				l += sharedRanks[distribVertexIds[k]].size();
+    int* distSharedRanks = new int[distTotalSharedRanks];
+    k = 0;
+    unsigned int l = 0;
+    for (int i = 0; i < procs; i++) {
+      for (int j = 0; j < recvCount[i]; j++) {
+        assert(k < totalRecv);
+        assert(l + sharedRanks[distribVertexIds[k]].size() <= distTotalSharedRanks);
+        memcpy(&distSharedRanks[l],
+               sharedRanks[distribVertexIds[k]].data(),
+               sharedRanks[distribVertexIds[k]].size() * sizeof(int));
+        l += sharedRanks[distribVertexIds[k]].size();
 
-				sharedSendCount[i] += sharedRanks[distribVertexIds[k]].size();
+        sharedSendCount[i] += sharedRanks[distribVertexIds[k]].size();
 
-				k++;
-			}
-		}
+        k++;
+      }
+    }
 
-		delete [] distribVertexIds;
-		delete [] sharedRanks;
-		delete [] recvCount;
+    delete[] distribVertexIds;
+    delete[] sharedRanks;
+    delete[] recvCount;
 
-		int* sharedRecvCount = new int[procs];
-		memset(sharedRecvCount, 0, procs * sizeof(int));
+    int* sharedRecvCount = new int[procs];
+    memset(sharedRecvCount, 0, procs * sizeof(int));
 
-		unsigned int recvTotalSharedRanks = 0;
-		k = 0;
-		for (int i = 0; i < procs; i++) {
-			for (int j = 0; j < sendCount[i]; j++) {
-				assert(k < totalVertices);
-				recvTotalSharedRanks += recvNsharedRanks[k];
-				sharedRecvCount[i] += recvNsharedRanks[k];
+    unsigned int recvTotalSharedRanks = 0;
+    k = 0;
+    for (int i = 0; i < procs; i++) {
+      for (int j = 0; j < sendCount[i]; j++) {
+        assert(k < totalVertices);
+        recvTotalSharedRanks += recvNsharedRanks[k];
+        sharedRecvCount[i] += recvNsharedRanks[k];
 
-				k++;
-			}
-		}
+        k++;
+      }
+    }
 
-		delete [] sendCount;
+    delete[] sendCount;
 
-		int* recvSharedRanks = new int[recvTotalSharedRanks];
+    int* recvSharedRanks = new int[recvTotalSharedRanks];
 
-		sDispls[0] = 0;
-		rDispls[0] = 0;
-		for (int i = 1; i < procs; i++) {
-			sDispls[i] = sDispls[i-1] + sharedSendCount[i-1];
-			rDispls[i] = rDispls[i-1] + sharedRecvCount[i-1];
-		}
+    sDispls[0] = 0;
+    rDispls[0] = 0;
+    for (int i = 1; i < procs; i++) {
+      sDispls[i] = sDispls[i - 1] + sharedSendCount[i - 1];
+      rDispls[i] = rDispls[i - 1] + sharedRecvCount[i - 1];
+    }
 
 #ifdef USE_MPI
-		MPI_Alltoallv(distSharedRanks, sharedSendCount, sDispls, MPI_INT,
-			recvSharedRanks, sharedRecvCount, rDispls, MPI_INT,
-			m_comm);
+    MPI_Alltoallv(distSharedRanks,
+                  sharedSendCount,
+                  sDispls,
+                  MPI_INT,
+                  recvSharedRanks,
+                  sharedRecvCount,
+                  rDispls,
+                  MPI_INT,
+                  m_comm);
 #endif // USE_MPI
 
-		delete [] distSharedRanks;
-		delete [] sharedSendCount;
-		delete [] sharedRecvCount;
-		delete [] sDispls;
-		delete [] rDispls;
-
-		// Generate the vertex array
-		m_vertices.resize(totalVertices);
-
-		k = 0;
-		for (unsigned int i = 0; i < totalVertices; i++) {
-			m_vertices[i].m_gid = requiredVertices[i];
-			memcpy(m_vertices[i].m_coordinate, recvVertices[i], sizeof(overtex_t));
-			m_vertices[i].m_sharedRanks.resize(recvNsharedRanks[i]-1);
-			unsigned int l = 0;
-			for (unsigned int j = 0; j < recvNsharedRanks[i]; j++) {
-				if (recvSharedRanks[k] != rank)
-					m_vertices[i].m_sharedRanks[l++] = recvSharedRanks[k];
-				k++;
-			}
-			std::sort(m_vertices[i].m_sharedRanks.begin(), m_vertices[i].m_sharedRanks.end());
-		}
-
-		delete [] requiredVertices;
-		delete [] recvVertices;
-		delete [] recvSharedRanks;
-		delete [] recvNsharedRanks;
-
-		// Construct to g2l map for the vertices
-		constructG2L(m_vertices, m_verticesg2l);
-
-		// Create the cell, face and edge list
-		m_cells.resize(m_originalSize[0]);
-		m_v2f.clear();
-		m_faces.clear();
-		m_v2e.clear();
-
-		unsigned long cellOffset = m_originalSize[0];
+    delete[] distSharedRanks;
+    delete[] sharedSendCount;
+    delete[] sharedRecvCount;
+    delete[] sDispls;
+    delete[] rDispls;
+
+    // Generate the vertex array
+    m_vertices.resize(totalVertices);
+
+    k = 0;
+    for (unsigned int i = 0; i < totalVertices; i++) {
+      m_vertices[i].m_gid = requiredVertices[i];
+      memcpy(m_vertices[i].m_coordinate, recvVertices[i], sizeof(overtex_t));
+      m_vertices[i].m_sharedRanks.resize(recvNsharedRanks[i] - 1);
+      unsigned int l = 0;
+      for (unsigned int j = 0; j < recvNsharedRanks[i]; j++) {
+        if (recvSharedRanks[k] != rank) {
+          m_vertices[i].m_sharedRanks[l++] = recvSharedRanks[k];
+        }
+        k++;
+      }
+      std::sort(m_vertices[i].m_sharedRanks.begin(), m_vertices[i].m_sharedRanks.end());
+    }
+
+    delete[] requiredVertices;
+    delete[] recvVertices;
+    delete[] recvSharedRanks;
+    delete[] recvNsharedRanks;
+
+    // Construct to g2l map for the vertices
+    constructG2L(m_vertices, m_verticesg2l);
+
+    // Create the cell, face and edge list
+    m_cells.resize(m_originalSize[0]);
+    m_v2f.clear();
+    m_faces.clear();
+    m_v2e.clear();
+
+    unsigned long cellOffset = m_originalSize[0];
 #ifdef USE_MPI
-		MPI_Scan(MPI_IN_PLACE, &cellOffset, 1, MPI_UNSIGNED_LONG, MPI_SUM, m_comm);
+    MPI_Scan(MPI_IN_PLACE, &cellOffset, 1, MPI_UNSIGNED_LONG, MPI_SUM, m_comm);
 #endif // USE_MPI
-		cellOffset -= m_originalSize[0];
-
-		std::vector<std::set<unsigned int> > edgeUpward;
-		std::set<unsigned int>* vertexUpward = new std::set<unsigned int>[m_vertices.size()];
-
-		for (unsigned int i = 0; i < m_originalSize[0]; i++) {
-			m_cells[i].m_gid = i + cellOffset;
-
-			for (unsigned int j = 0; j < internal::Topology<Topo>::cellvertices(); j++)
-				m_cells[i].m_vertices[j] = m_verticesg2l[m_originalCells[i][j]];
-
-			// TODO adapt for hex
-
-			// Faces
-			unsigned int v[internal::Topology<Topo>::facevertices()];
-			unsigned int faces[internal::Topology<Topo>::cellfaces()];
-			v[0] = m_cells[i].m_vertices[1];
-			v[1] = m_cells[i].m_vertices[0];
-			v[2] = m_cells[i].m_vertices[2];
-			faces[0] = addFace(m_v2f.add(v), i);
-			v[0] = m_cells[i].m_vertices[0];
-			v[1] = m_cells[i].m_vertices[1];
-			v[2] = m_cells[i].m_vertices[3];
-			faces[1] = addFace(m_v2f.add(v), i);
-			v[0] = m_cells[i].m_vertices[1];
-			v[1] = m_cells[i].m_vertices[2];
-			v[2] = m_cells[i].m_vertices[3];
-			faces[2] = addFace(m_v2f.add(v), i);
-			v[0] = m_cells[i].m_vertices[2];
-			v[1] = m_cells[i].m_vertices[0];
-			v[2] = m_cells[i].m_vertices[3];
-			faces[3] = addFace(m_v2f.add(v), i);
-
-			// Edges
-			unsigned int edges[internal::Topology<Topo>::celledges()];
-			v[0] = m_cells[i].m_vertices[0];
-			v[1] = m_cells[i].m_vertices[1];
-			edges[0] = addEdge(edgeUpward, m_v2e.add(v), faces[0], faces[1]);
-			v[0] = m_cells[i].m_vertices[1];
-			v[1] = m_cells[i].m_vertices[2];
-			edges[1] = addEdge(edgeUpward, m_v2e.add(v), faces[0], faces[2]);
-			v[0] = m_cells[i].m_vertices[2];
-			v[1] = m_cells[i].m_vertices[0];
-			edges[2] = addEdge(edgeUpward, m_v2e.add(v), faces[0], faces[3]);
-			v[0] = m_cells[i].m_vertices[0];
-			v[1] = m_cells[i].m_vertices[3];
-			edges[3] = addEdge(edgeUpward, m_v2e.add(v), faces[1], faces[3]);
-			v[0] = m_cells[i].m_vertices[1];
-			v[1] = m_cells[i].m_vertices[3];
-			edges[4] = addEdge(edgeUpward, m_v2e.add(v), faces[1], faces[2]);
-			v[0] = m_cells[i].m_vertices[2];
-			v[1] = m_cells[i].m_vertices[3];
-			edges[5] = addEdge(edgeUpward, m_v2e.add(v), faces[2], faces[3]);
-
-			// Vertices (upward information)
-			vertexUpward[m_cells[i].m_vertices[0]].insert(edges[0]);
-			vertexUpward[m_cells[i].m_vertices[0]].insert(edges[2]);
-			vertexUpward[m_cells[i].m_vertices[0]].insert(edges[3]);
-			vertexUpward[m_cells[i].m_vertices[1]].insert(edges[0]);
-			vertexUpward[m_cells[i].m_vertices[1]].insert(edges[1]);
-			vertexUpward[m_cells[i].m_vertices[1]].insert(edges[4]);
-			vertexUpward[m_cells[i].m_vertices[2]].insert(edges[1]);
-			vertexUpward[m_cells[i].m_vertices[2]].insert(edges[2]);
-			vertexUpward[m_cells[i].m_vertices[2]].insert(edges[5]);
-			vertexUpward[m_cells[i].m_vertices[3]].insert(edges[3]);
-			vertexUpward[m_cells[i].m_vertices[3]].insert(edges[4]);
-			vertexUpward[m_cells[i].m_vertices[3]].insert(edges[5]);
-		}
-
-		// Create edges
-		m_edges.clear();
-		m_edges.resize(edgeUpward.size());
-		for (unsigned int i = 0; i < m_edges.size(); i++) {
-			assert(m_edges[i].m_upward.empty());
-			m_edges[i].m_upward.resize(edgeUpward[i].size());
-			unsigned int j = 0;
-			for (std::set<unsigned int>::const_iterator it = edgeUpward[i].begin();
-					it != edgeUpward[i].end(); ++it, j++) {
-				m_edges[i].m_upward[j] = *it;
-			}
-		}
-		edgeUpward.clear(); // Free memory
-
-		// Set vertex upward information
-		for (unsigned int i = 0; i < m_vertices.size(); i++) {
-			m_vertices[i].m_upward.resize(vertexUpward[i].size());
-			unsigned int j = 0;
-			for (std::set<unsigned int>::const_iterator it = vertexUpward[i].begin();
-					it != vertexUpward[i].end(); ++it, j++) {
-				m_vertices[i].m_upward[j] = *it;
-			}
-		}
-		delete [] vertexUpward;
-
-		// Generate shared information and global ids for edges
-		generatedSharedAndGID<edge_t, vertex_t, 2>(m_edges, m_vertices);
-
-		// Generate shared information and global ids for faces
-		generatedSharedAndGID<face_t, edge_t, internal::Topology<Topo>::faceedges()>(m_faces, m_edges);
-	}
-
-	/**
-	 * @return The total number of all cells within the mesh.
-	 */
-	unsigned int numTotalCells() const
-	{
-		return m_originalTotalSize[0];
-	}
-
-	/**
-	 * @return The number of original cells on this rank
-	 *
-	 * @note This value can change when {@link partition()} is called
-	 */
-	unsigned int numOriginalCells() const
-	{
-		return m_originalSize[0];
-	}
-
-	/**
-	 * @return The number of original vertices on this rank
-	 */
-	unsigned int numOriginalVertices() const
-	{
-		return m_originalSize[1];
-	}
-
-	/**
-	 * @return The original cells on this rank
-	 *
-	 * @note The pointer gets invalid when {@link partition()} is called
-	 */
-	const ocell_t* originalCells() const
-	{
-		return m_originalCells;
-	}
-
-	/**
-	 * @return The original vertices on this rank
-	 */
-	const overtex_t* originalVertices() const
-	{
-		return m_originalVertices;
-	}
-
-	/**
-	 * @return Original user cell data
-	 */
-	const void* originalCellData(unsigned int index) const
-	{
-		return cellData(index); // This is the same
-	}
-
-	/**
-	 * @return Original user vertex data
-	 */
-	const void* originalVertexData(unsigned int index) const
-	{
-		return m_originalVertexData[index];
-	}
-
-	/**
-	 * @return The cells of the mesh
-	 */
-	const std::vector<cell_t>& cells() const
-	{
-		return m_cells;
-	}
-
-	/**
-	 * @return The faces of the mesh
-	 */
-	const std::vector<face_t>& faces() const
-	{
-		return m_faces;
-	}
-
-	/**
-	 * @return The edges of the mesh
-	 */
-	const std::vector<edge_t>& edges() const
-	{
-		return m_edges;
-	}
-
-	/**
-	 * @return The vertices of the mesh
-	 */
-	const std::vector<vertex_t>& vertices() const
-	{
-		return m_vertices;
-	}
-
-	/**
-	 * @return User cell data
-	 */
-	const void* cellData(unsigned int index) const
-	{
-		return m_cellData[index];
-	}
-
-	/**
-	 * @return User vertex data
-	 */
-	const void* vertexData(unsigned int index) const
-	{
-		return m_vertexData[index];
-	}
-
-	/**
-	 * @param vertexIds A list of local vertex ids
-	 * @return The local face id for the given set of vertices or <code>-1</code> if
-	 *  the face does not exist
-	 */
-	int faceByVertices(unsigned int vertexIds[internal::Topology<Topo>::facevertices()]) const
-	{
-		return m_v2f.find(vertexIds);
-	}
+    cellOffset -= m_originalSize[0];
+
+    std::vector<std::set<unsigned int>> edgeUpward;
+    auto* vertexUpward = new std::set<unsigned int>[m_vertices.size()];
+
+    for (unsigned int i = 0; i < m_originalSize[0]; i++) {
+      m_cells[i].m_gid = i + cellOffset;
+
+      for (unsigned int j = 0; j < internal::Topology<Topo>::cellvertices(); j++) {
+        m_cells[i].m_vertices[j] = m_verticesg2l[m_originalCells[i][j]];
+      }
+
+      // Faces
+      unsigned int v[internal::Topology<Topo>::facevertices()];
+      unsigned int faces[internal::Topology<Topo>::cellfaces()];
+      for (unsigned int j = 0; j < internal::Topology<Topo>::cellfaces(); ++j) {
+        const auto& face = internal::Numbering<Topo>::facevertices()[j];
+        v[0] = m_cells[i].m_vertices[face[0]];
+        v[1] = m_cells[i].m_vertices[face[1]];
+        v[2] = m_cells[i].m_vertices[face[2]];
+        faces[j] = addFace(m_v2f.add(v), i);
+      }
+
+      // Edges + Vertex upward information
+      for (unsigned int j = 0; j < internal::Topology<Topo>::celledges(); ++j) {
+        const auto& edge = internal::Numbering<Topo>::edgevertices()[j];
+        const auto& edgeadj = internal::Numbering<Topo>::edgefaces()[j];
+        v[0] = m_cells[i].m_vertices[edge[0]];
+        v[1] = m_cells[i].m_vertices[edge[1]];
+        unsigned int edgeIdx =
+            addEdge(edgeUpward, m_v2e.add(v), faces[edgeadj[0]], faces[edgeadj[1]]);
+        vertexUpward[m_cells[i].m_vertices[edge[0]]].insert(edgeIdx);
+        vertexUpward[m_cells[i].m_vertices[edge[1]]].insert(edgeIdx);
+      }
+    }
+
+    // Create edges
+    m_edges.clear();
+    m_edges.resize(edgeUpward.size());
+    for (unsigned int i = 0; i < m_edges.size(); i++) {
+      assert(m_edges[i].m_upward.empty());
+      m_edges[i].m_upward.resize(edgeUpward[i].size());
+      unsigned int j = 0;
+      for (auto it = edgeUpward[i].begin(); it != edgeUpward[i].end(); ++it, j++) {
+        m_edges[i].m_upward[j] = *it;
+      }
+    }
+    edgeUpward.clear(); // Free memory
+
+    // Set vertex upward information
+    for (unsigned int i = 0; i < m_vertices.size(); i++) {
+      m_vertices[i].m_upward.resize(vertexUpward[i].size());
+      unsigned int j = 0;
+      for (auto it = vertexUpward[i].begin(); it != vertexUpward[i].end(); ++it, j++) {
+        m_vertices[i].m_upward[j] = *it;
+      }
+    }
+    delete[] vertexUpward;
+
+    // Generate shared information and global ids for edges
+    generatedSharedAndGID<edge_t, vertex_t, 2>(m_edges, m_vertices);
+
+    // Generate shared information and global ids for faces
+    generatedSharedAndGID<face_t, edge_t, internal::Topology<Topo>::faceedges()>(m_faces, m_edges);
+  }
+
+  /**
+   * @return The total number of all cells within the mesh.
+   */
+  auto numTotalCells() const -> unsigned int { return m_originalTotalSize[0]; }
+
+  /**
+   * @return The number of original cells on this rank
+   *
+   * @note This value can change when {@link partition()} is called
+   */
+  auto numOriginalCells() const -> unsigned int { return m_originalSize[0]; }
+
+  /**
+   * @return The number of original vertices on this rank
+   */
+  auto numOriginalVertices() const -> unsigned int { return m_originalSize[1]; }
+
+  /**
+   * @return The original cells on this rank
+   *
+   * @note The pointer gets invalid when {@link partition()} is called
+   */
+  auto originalCells() const -> const ocell_t* { return m_originalCells; }
+
+  /**
+   * @return The original vertices on this rank
+   */
+  auto originalVertices() const -> const overtex_t* { return m_originalVertices; }
+
+  /**
+   * @return Original user cell data
+   */
+  auto originalCellData(unsigned int index) const -> const void* {
+    return cellData(index); // This is the same
+  }
+
+  /**
+   * @return Original user vertex data
+   */
+  auto originalVertexData(unsigned int index) const -> const void* {
+    return m_originalVertexData[index];
+  }
+
+  /**
+   * @return The cells of the mesh
+   */
+  auto cells() const -> const std::vector<cell_t>& { return m_cells; }
+
+  /**
+   * @return The faces of the mesh
+   */
+  auto faces() const -> const std::vector<face_t>& { return m_faces; }
+
+  /**
+   * @return The edges of the mesh
+   */
+  auto edges() const -> const std::vector<edge_t>& { return m_edges; }
+
+  /**
+   * @return The vertices of the mesh
+   */
+  auto vertices() const -> const std::vector<vertex_t>& { return m_vertices; }
+
+  /**
+   * @return User cell data
+   */
+  auto cellData(unsigned int index) const -> const void* { return m_cellData[index]; }
+
+  /**
+   * @return User vertex data
+   */
+  auto vertexData(unsigned int index) const -> const void* { return m_vertexData[index]; }
+
+  /**
+   * @param vertexIds A list of local vertex ids
+   * @return The local face id for the given set of vertices or <code>-1</code> if
+   *  the face does not exist
+   */
+  auto faceByVertices(unsigned int vertexIds[internal::Topology<Topo>::facevertices()]) const
+      -> int {
+    return m_v2f.find(vertexIds);
+  }
 
 #ifdef USE_MPI
-	/**
-	 * @return The MPI communicator used by this class. (this field only exists, if MPI is enabled)
-	 */
-	const MPI_Comm& comm() const
-	{
-		return m_comm;
-	}
+  /**
+   * @return The MPI communicator used by this class. (this field only exists, if MPI is enabled)
+   */
+  auto comm() const -> const MPI_Comm& { return m_comm; }
 #endif
 
-private:
-	/**
-	 * Add a face but only if it does not exist yet
-	 *
-	 * @param lid The local id of the face
-	 * @param plid The local id of the parent
-	 * @return The local id of the face
-	 */
-	unsigned int addFace(unsigned int lid, unsigned int plid)
-	{
-		if (lid < m_faces.size()) {
-			// Update an old face
-			assert(m_faces[lid].m_upward[1] == -1);
-			m_faces[lid].m_upward[1] = plid;
-			if (m_faces[lid].m_upward[1] < m_faces[lid].m_upward[0])
-				std::swap(m_faces[lid].m_upward[0], m_faces[lid].m_upward[1]);
-		} else {
-			// New face
-			assert(lid == m_faces.size());
-
-			face_t face;
-			face.m_upward[0] = plid;
-			face.m_upward[1] = -1;
-			m_faces.push_back(face);
-		}
-
-		return lid;
-	}
-
-	/**
-	 * Generates the shared information and global ids from the downward elements
-	 *
-	 * @param elements The elements for which the data should be generated
-	 * @param down The downward elements
-	 * @tparam N The number of downward elements
-	 */
-	template<typename E, typename D, unsigned int N>
-	void generatedSharedAndGID(std::vector<E> &elements, const std::vector<D> &down)
-	{
+  private:
+  /**
+   * Add a face but only if it does not exist yet
+   *
+   * @param lid The local id of the face
+   * @param plid The local id of the parent
+   * @return The local id of the face
+   */
+  auto addFace(unsigned int lid, unsigned int plid) -> unsigned int {
+    if (lid < m_faces.size()) {
+      // Update an old face
+      assert(m_faces[lid].m_upward[1] == -1);
+      m_faces[lid].m_upward[1] = plid;
+      if (m_faces[lid].m_upward[1] < m_faces[lid].m_upward[0]) {
+        std::swap(m_faces[lid].m_upward[0], m_faces[lid].m_upward[1]);
+      }
+    } else {
+      // New face
+      assert(lid == m_faces.size());
+
+      face_t face;
+      face.m_upward[0] = plid;
+      face.m_upward[1] = -1;
+      m_faces.push_back(face);
+    }
+
+    return lid;
+  }
+
+  /**
+   * Generates the shared information and global ids from the downward elements
+   *
+   * @param elements The elements for which the data should be generated
+   * @param down The downward elements
+   * @tparam N The number of downward elements
+   */
+  template <typename E, typename D, unsigned int N>
+  void generatedSharedAndGID(std::vector<E>& elements, const std::vector<D>& down) {
 #ifdef USE_MPI
-		// Collect all shared ranks for each element and downward gids
-		const std::vector<int> ** allShared = new const std::vector<int>*[elements.size() * N];
-		memset(allShared, 0, elements.size() * N * sizeof(std::vector<int>*));
-		unsigned long* downward = new unsigned long[elements.size() * N];
-		unsigned int* downPos = new unsigned int[elements.size()];
-		memset(downPos, 0, elements.size() * sizeof(unsigned int));
-
-		for (typename std::vector<D>::const_iterator it = down.begin();
-				it != down.end(); ++it) {
-			for (std::vector<int>::const_iterator it2 = it->m_upward.begin();
-					it2 != it->m_upward.end(); ++it2) {
-				assert(downPos[*it2] < N);
-				allShared[*it2 * N + downPos[*it2]] = &it->m_sharedRanks;
-				downward[*it2 * N + downPos[*it2]] = it->m_gid;
-				downPos[*it2]++;
-			}
-		}
-
-		delete [] downPos;
-
-		// Create the intersection of the shared ranks and update the elements
-		assert(N >= 2);
-		for (unsigned int i = 0; i < elements.size(); i++) {
-			assert(allShared[i*N]);
-			assert(allShared[i*N+1]);
-
-			std::set_intersection(allShared[i*N]->begin(), allShared[i*N]->end(),
-				allShared[i*N + 1]->begin(), allShared[i*N + 1]->end(),
-				std::back_inserter(elements[i].m_sharedRanks));
-
-			std::vector<int> buffer;
-			for (unsigned int j = 2; j < N; j++) {
-				buffer.clear();
-
-				assert(allShared[i*N+j]);
-				std::set_intersection(elements[i].m_sharedRanks.begin(), elements[i].m_sharedRanks.end(),
-					allShared[i*N + j]->begin(), allShared[i*N + j]->end(),
-					std::back_inserter(buffer));
-
-				std::swap(elements[i].m_sharedRanks, buffer);
-			}
-		}
-
-		delete [] allShared;
-
-		// Eliminate false positves
-		int rank, procs;
-		MPI_Comm_rank(m_comm, &rank);
-		MPI_Comm_size(m_comm, &procs);
-
-		int* nShared = new int[procs];
-		memset(nShared, 0, procs * sizeof(int));
-		for (typename std::vector<E>::const_iterator it = elements.begin();
-				it != elements.end(); ++it) {
-			for (std::vector<int>::const_iterator it2 = it->m_sharedRanks.begin();
-					it2 != it->m_sharedRanks.end(); ++it2) {
-				nShared[*it2]++;
-			}
-		}
-
-		int *nRecvShared = new int[procs];
-		MPI_Alltoall(nShared, 1, MPI_INT, nRecvShared, 1, MPI_INT, m_comm);
-
-		int *sDispls = new int[procs];
-		int *rDispls = new int[procs];
-		sDispls[0] = 0;
-		rDispls[0] = 0;
-		for (int i = 1; i < procs; i++) {
-			sDispls[i] = sDispls[i-1] + nShared[i-1];
-			rDispls[i] = rDispls[i-1] + nRecvShared[i-1];
-		}
-
-		unsigned int totalShared = sDispls[procs-1] + nShared[procs-1];
-
-		unsigned int* sharedPos = new unsigned int[procs];
-		memset(sharedPos, 0, procs * sizeof(unsigned int));
-
-		unsigned long* sendShared = new unsigned long[totalShared * N];
-
-		for (unsigned int i = 0; i < elements.size(); i++) {
-			for (std::vector<int>::const_iterator it = elements[i].m_sharedRanks.begin();
-					it != elements[i].m_sharedRanks.end(); ++it) {
-				assert(sharedPos[*it] < static_cast<unsigned>(nShared[*it]));
-				memcpy(&sendShared[(sDispls[*it]+sharedPos[*it])*N], &downward[i * N],
-					N * sizeof(unsigned long));
-				sharedPos[*it]++;
-			}
-		}
-
-		delete [] sharedPos;
-
-		unsigned int totalRecvShared = rDispls[procs-1] + nRecvShared[procs-1];
-
-		unsigned long* recvShared = new unsigned long[totalRecvShared * N];
-
-		MPI_Datatype type;
-		MPI_Type_contiguous(N, MPI_UNSIGNED_LONG, &type);
-		MPI_Type_commit(&type);
-
-		MPI_Alltoallv(sendShared, nShared, sDispls, type,
-			recvShared, nRecvShared, rDispls, type,
-			m_comm);
-
-		delete [] nShared;
-		delete [] sendShared;
-
-		std::unordered_set<internal::DownElement<N>, internal::DownElementHash<N> >* hashedElements
-			= new std::unordered_set<internal::DownElement<N>, internal::DownElementHash<N> >[procs];
-
-		unsigned int k = 0;
-		for (int i = 0; i < procs; i++) {
-			assert(i != rank || nRecvShared[i] == 0);
-			for (int j = 0; j < nRecvShared[i]; j++) {
-				assert(k < totalRecvShared);
-				assert(hashedElements[i].find(&recvShared[k*N]) == hashedElements[i].end());
-				hashedElements[i].emplace(&recvShared[k*N]);
-				k++;
-			}
-		}
-
-		delete [] nRecvShared;
-		delete [] recvShared;
-
-		unsigned int e = 0;
-		for (unsigned int i = 0; i < elements.size(); i++) {
-			internal::DownElement<N> delem(&downward[i * N]);
-
-			std::vector<int>::iterator it = elements[i].m_sharedRanks.begin();
-			while (it != elements[i].m_sharedRanks.end()) {
-				if (hashedElements[*it].find(delem) == hashedElements[*it].end()) {
-					if ((rank == 0 && *it == 3) || (rank == 3 && *it == 0))
-						e++;
-					it = elements[i].m_sharedRanks.erase(it);
-				} else {
-					++it;
-				}
-			}
-		}
-
-		delete [] hashedElements;
-
-		// Count owned elements
-		unsigned int owned = 0;
-		for (typename std::vector<E>::const_iterator it = elements.begin();
-				it != elements.end(); ++it) {
-			if (it->m_sharedRanks.empty() || it->m_sharedRanks[0] > rank)
-				owned++;
-		}
-
-		// Get global id offset
-		unsigned long gidOffset = owned;
-		MPI_Scan(MPI_IN_PLACE, &gidOffset, 1, MPI_UNSIGNED_LONG, MPI_SUM, m_comm);
-		gidOffset -= owned;
-
-		// Set global ids for owned elements and count the number of elements we need to forward
-		int* nSendGid = new int[procs];
-		memset(nSendGid, 0, procs * sizeof(int));
-		int* nRecvGid = new int[procs];
-		memset(nRecvGid, 0, procs * sizeof(int));
-		for (typename std::vector<E>::iterator it = elements.begin();
-				it != elements.end(); ++it) {
-			if (it->m_sharedRanks.empty() || it->m_sharedRanks[0] > rank) {
-				it->m_gid = gidOffset++;
-
-				for (std::vector<int>::const_iterator it2 = it->m_sharedRanks.begin();
-						it2 != it->m_sharedRanks.end(); ++it2) {
-					nSendGid[*it2]++;
-				}
-			} else {
-				it->m_gid = std::numeric_limits<unsigned long>::max();
-
-				if (!it->m_sharedRanks.empty())
-					nRecvGid[it->m_sharedRanks[0]]++;
-			}
-		}
-
-		// Compute exchange offsets
-		sDispls[0] = 0;
-		rDispls[0] = 0;
-		for (int i = 1; i < procs; i++) {
-			sDispls[i] = sDispls[i-1] + nSendGid[i-1];
-			rDispls[i] = rDispls[i-1] + nRecvGid[i-1];
-		}
-
-		unsigned int totalSendGid = sDispls[procs-1] + nSendGid[procs-1];
-		unsigned int totalRecvGid = rDispls[procs-1] + nRecvGid[procs-1];
-
-		// Collect send data
-		unsigned int* sendPos = new unsigned int[procs];
-		memset(sendPos, 0, procs * sizeof(unsigned int));
-
-		unsigned long* sendGid = new unsigned long[totalSendGid];
-		unsigned long* sendDGid = new unsigned long[totalSendGid * N];
-		for (unsigned int i = 0; i < elements.size(); i++) {
-			if (elements[i].m_sharedRanks.empty() || elements[i].m_sharedRanks[0] > rank) {
-				for (std::vector<int>::const_iterator it = elements[i].m_sharedRanks.begin();
-						it != elements[i].m_sharedRanks.end(); ++it) {
-					assert(sendPos[*it] < static_cast<unsigned>(nSendGid[*it]));
-
-					sendGid[sDispls[*it] + sendPos[*it]] = elements[i].m_gid;
-					memcpy(&sendDGid[(sDispls[*it] + sendPos[*it])*N], &downward[i*N], N*sizeof(unsigned long));
-					sendPos[*it]++;
-				}
-			}
-		}
-
-		delete [] sendPos;
-
-		// Exchange cell data
-		unsigned long* recvGid = new unsigned long[totalRecvGid];
-		unsigned long* recvDGid = new unsigned long[totalRecvGid*N];
-
-		MPI_Alltoallv(sendGid, nSendGid, sDispls, MPI_UNSIGNED_LONG,
-			recvGid, nRecvGid, rDispls, MPI_UNSIGNED_LONG,
-			m_comm);
-
-		MPI_Alltoallv(sendDGid, nSendGid, sDispls, type,
-			recvDGid, nRecvGid, rDispls, type,
-			m_comm);
-
-		MPI_Type_free(&type);
-
-		delete [] sendGid;
-		delete [] sendDGid;
-		delete [] nSendGid;
-		delete [] nRecvGid;
-		delete [] sDispls;
-		delete [] rDispls;
-
-		// Create a hash map from the received elements
-		std::unordered_map<internal::DownElement<N>, unsigned long, internal::DownElementHash<N> > dg2g;
-		for (unsigned int i = 0; i < totalRecvGid; i++) {
-			dg2g.emplace(&recvDGid[i*N], recvGid[i]);
-		}
-
-		delete [] recvGid;
-		delete [] recvDGid;
-
-		// Assign gids
-		for (unsigned int i = 0; i < elements.size(); i++) {
-			if (!elements[i].m_sharedRanks.empty() && elements[i].m_sharedRanks[0] < rank) {
-				assert(elements[i].m_gid == std::numeric_limits<unsigned long>::max());
-
-				internal::DownElement<N> delem(&downward[i * N]);
-				typename std::unordered_map<internal::DownElement<N>, unsigned long, internal::DownElementHash<N> >::const_iterator it
-					= dg2g.find(delem);
-				assert(it != dg2g.end());
-
-				elements[i].m_gid = it->second;
-			}
-		}
-
-		delete [] downward;
+    // Collect all shared ranks for each element and downward gids
+    const auto** allShared = new const std::vector<int>*[elements.size() * N];
+    memset(allShared, 0, elements.size() * N * sizeof(std::vector<int>*));
+    auto* downward = new unsigned long[elements.size() * N];
+    auto* downPos = new unsigned int[elements.size()];
+    memset(downPos, 0, elements.size() * sizeof(unsigned int));
+
+    for (typename std::vector<D>::const_iterator it = down.begin(); it != down.end(); ++it) {
+      for (auto it2 = it->m_upward.begin(); it2 != it->m_upward.end(); ++it2) {
+        assert(downPos[*it2] < N);
+        allShared[(*it2 * N) + downPos[*it2]] = &it->m_sharedRanks;
+        downward[(*it2 * N) + downPos[*it2]] = it->m_gid;
+        downPos[*it2]++;
+      }
+    }
+
+    delete[] downPos;
+
+    // Create the intersection of the shared ranks and update the elements
+    assert(N >= 2);
+    for (unsigned int i = 0; i < elements.size(); i++) {
+      assert(allShared[i * N]);
+      assert(allShared[i * N + 1]);
+
+      std::set_intersection(allShared[i * N]->begin(),
+                            allShared[i * N]->end(),
+                            allShared[(i * N) + 1]->begin(),
+                            allShared[(i * N) + 1]->end(),
+                            std::back_inserter(elements[i].m_sharedRanks));
+
+      std::vector<int> buffer;
+      for (unsigned int j = 2; j < N; j++) {
+        buffer.clear();
+
+        assert(allShared[i * N + j]);
+        std::set_intersection(elements[i].m_sharedRanks.begin(),
+                              elements[i].m_sharedRanks.end(),
+                              allShared[(i * N) + j]->begin(),
+                              allShared[(i * N) + j]->end(),
+                              std::back_inserter(buffer));
+
+        std::swap(elements[i].m_sharedRanks, buffer);
+      }
+    }
+
+    delete[] allShared;
+
+    // Eliminate false positves
+    int rank;
+    int procs;
+    MPI_Comm_rank(m_comm, &rank);
+    MPI_Comm_size(m_comm, &procs);
+
+    int* nShared = new int[procs];
+    memset(nShared, 0, procs * sizeof(int));
+    for (typename std::vector<E>::const_iterator it = elements.begin(); it != elements.end();
+         ++it) {
+      for (auto it2 = it->m_sharedRanks.begin(); it2 != it->m_sharedRanks.end(); ++it2) {
+        nShared[*it2]++;
+      }
+    }
+
+    int* nRecvShared = new int[procs];
+    MPI_Alltoall(nShared, 1, MPI_INT, nRecvShared, 1, MPI_INT, m_comm);
+
+    int* sDispls = new int[procs];
+    int* rDispls = new int[procs];
+    sDispls[0] = 0;
+    rDispls[0] = 0;
+    for (int i = 1; i < procs; i++) {
+      sDispls[i] = sDispls[i - 1] + nShared[i - 1];
+      rDispls[i] = rDispls[i - 1] + nRecvShared[i - 1];
+    }
+
+    const unsigned int totalShared = sDispls[procs - 1] + nShared[procs - 1];
+
+    auto* sharedPos = new unsigned int[procs];
+    memset(sharedPos, 0, procs * sizeof(unsigned int));
+
+    auto* sendShared = new unsigned long[totalShared * N];
+
+    for (unsigned int i = 0; i < elements.size(); i++) {
+      for (auto it = elements[i].m_sharedRanks.begin(); it != elements[i].m_sharedRanks.end();
+           ++it) {
+        assert(sharedPos[*it] < static_cast<unsigned>(nShared[*it]));
+        memcpy(&sendShared[(sDispls[*it] + sharedPos[*it]) * N],
+               &downward[i * N],
+               N * sizeof(unsigned long));
+        sharedPos[*it]++;
+      }
+    }
+
+    delete[] sharedPos;
+
+    const unsigned int totalRecvShared = rDispls[procs - 1] + nRecvShared[procs - 1];
+
+    auto* recvShared = new unsigned long[totalRecvShared * N];
+
+    MPI_Datatype type = MPI_DATATYPE_NULL;
+    MPI_Type_contiguous(N, MPI_UNSIGNED_LONG, &type);
+    MPI_Type_commit(&type);
+
+    MPI_Alltoallv(
+        sendShared, nShared, sDispls, type, recvShared, nRecvShared, rDispls, type, m_comm);
+
+    delete[] nShared;
+    delete[] sendShared;
+
+    auto* hashedElements =
+        new std::unordered_set<internal::DownElement<N>, internal::DownElementHash<N>>[procs];
+
+    unsigned int k = 0;
+    for (int i = 0; i < procs; i++) {
+      assert(i != rank || nRecvShared[i] == 0);
+      for (int j = 0; j < nRecvShared[i]; j++) {
+        assert(k < totalRecvShared);
+        assert(hashedElements[i].find(&recvShared[k * N]) == hashedElements[i].end());
+        hashedElements[i].emplace(&recvShared[k * N]);
+        k++;
+      }
+    }
+
+    delete[] nRecvShared;
+    delete[] recvShared;
+
+    unsigned int e = 0;
+    for (unsigned int i = 0; i < elements.size(); i++) {
+      internal::DownElement<N> delem(&downward[i * N]);
+
+      auto it = elements[i].m_sharedRanks.begin();
+      while (it != elements[i].m_sharedRanks.end()) {
+        if (hashedElements[*it].find(delem) == hashedElements[*it].end()) {
+          if ((rank == 0 && *it == 3) || (rank == 3 && *it == 0)) {
+            e++;
+          }
+          it = elements[i].m_sharedRanks.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+
+    delete[] hashedElements;
+
+    // Count owned elements
+    unsigned int owned = 0;
+    for (typename std::vector<E>::const_iterator it = elements.begin(); it != elements.end();
+         ++it) {
+      if (it->m_sharedRanks.empty() || it->m_sharedRanks[0] > rank) {
+        owned++;
+      }
+    }
+
+    // Get global id offset
+    unsigned long gidOffset = owned;
+    MPI_Scan(MPI_IN_PLACE, &gidOffset, 1, MPI_UNSIGNED_LONG, MPI_SUM, m_comm);
+    gidOffset -= owned;
+
+    // Set global ids for owned elements and count the number of elements we need to forward
+    int* nSendGid = new int[procs];
+    memset(nSendGid, 0, procs * sizeof(int));
+    int* nRecvGid = new int[procs];
+    memset(nRecvGid, 0, procs * sizeof(int));
+    for (typename std::vector<E>::iterator it = elements.begin(); it != elements.end(); ++it) {
+      if (it->m_sharedRanks.empty() || it->m_sharedRanks[0] > rank) {
+        it->m_gid = gidOffset++;
+
+        for (auto it2 = it->m_sharedRanks.begin(); it2 != it->m_sharedRanks.end(); ++it2) {
+          nSendGid[*it2]++;
+        }
+      } else {
+        it->m_gid = std::numeric_limits<unsigned long>::max();
+
+        if (!it->m_sharedRanks.empty()) {
+          nRecvGid[it->m_sharedRanks[0]]++;
+        }
+      }
+    }
+
+    // Compute exchange offsets
+    sDispls[0] = 0;
+    rDispls[0] = 0;
+    for (int i = 1; i < procs; i++) {
+      sDispls[i] = sDispls[i - 1] + nSendGid[i - 1];
+      rDispls[i] = rDispls[i - 1] + nRecvGid[i - 1];
+    }
+
+    const unsigned int totalSendGid = sDispls[procs - 1] + nSendGid[procs - 1];
+    const unsigned int totalRecvGid = rDispls[procs - 1] + nRecvGid[procs - 1];
+
+    // Collect send data
+    auto* sendPos = new unsigned int[procs];
+    memset(sendPos, 0, procs * sizeof(unsigned int));
+
+    auto* sendGid = new unsigned long[totalSendGid];
+    auto* sendDGid = new unsigned long[totalSendGid * N];
+    for (unsigned int i = 0; i < elements.size(); i++) {
+      if (elements[i].m_sharedRanks.empty() || elements[i].m_sharedRanks[0] > rank) {
+        for (auto it = elements[i].m_sharedRanks.begin(); it != elements[i].m_sharedRanks.end();
+             ++it) {
+          assert(sendPos[*it] < static_cast<unsigned>(nSendGid[*it]));
+
+          sendGid[sDispls[*it] + sendPos[*it]] = elements[i].m_gid;
+          memcpy(&sendDGid[(sDispls[*it] + sendPos[*it]) * N],
+                 &downward[i * N],
+                 N * sizeof(unsigned long));
+          sendPos[*it]++;
+        }
+      }
+    }
+
+    delete[] sendPos;
+
+    // Exchange cell data
+    auto* recvGid = new unsigned long[totalRecvGid];
+    auto* recvDGid = new unsigned long[totalRecvGid * N];
+
+    MPI_Alltoallv(sendGid,
+                  nSendGid,
+                  sDispls,
+                  MPI_UNSIGNED_LONG,
+                  recvGid,
+                  nRecvGid,
+                  rDispls,
+                  MPI_UNSIGNED_LONG,
+                  m_comm);
+
+    MPI_Alltoallv(sendDGid, nSendGid, sDispls, type, recvDGid, nRecvGid, rDispls, type, m_comm);
+
+    MPI_Type_free(&type);
+
+    delete[] sendGid;
+    delete[] sendDGid;
+    delete[] nSendGid;
+    delete[] nRecvGid;
+    delete[] sDispls;
+    delete[] rDispls;
+
+    // Create a hash map from the received elements
+    std::unordered_map<internal::DownElement<N>, unsigned long, internal::DownElementHash<N>> dg2g;
+    for (unsigned int i = 0; i < totalRecvGid; i++) {
+      dg2g.emplace(&recvDGid[i * N], recvGid[i]);
+    }
+
+    delete[] recvGid;
+    delete[] recvDGid;
+
+    // Assign gids
+    for (unsigned int i = 0; i < elements.size(); i++) {
+      if (!elements[i].m_sharedRanks.empty() && elements[i].m_sharedRanks[0] < rank) {
+        assert(elements[i].m_gid == std::numeric_limits<unsigned long>::max());
+
+        internal::DownElement<N> delem(&downward[i * N]);
+        typename std::unordered_map<internal::DownElement<N>,
+                                    unsigned long,
+                                    internal::DownElementHash<N>>::const_iterator it =
+            dg2g.find(delem);
+        assert(it != dg2g.end());
+
+        elements[i].m_gid = it->second;
+      }
+    }
+
+    delete[] downward;
 #endif // USE_MPI
-	}
-
-private:
-	/**
-	 * Add an edge if it does not exist yet
-	 *
-	 * @param edgeUpward The storage for upward information
-	 * @param lid The local id of the edge
-	 * @param plid1 The first parent
-	 * @param plid2 The second parent
-	 * @return The local id of the edge
-	 */
-	static unsigned int addEdge(std::vector<std::set<unsigned int> > &edgeUpward,
-		unsigned int lid, unsigned int plid1, unsigned int plid2)
-	{
-		if (lid >= edgeUpward.size()) {
-			assert(lid == edgeUpward.size());
-			edgeUpward.push_back(std::set<unsigned int>());
-		}
-		edgeUpward[lid].insert(plid1);
-		edgeUpward[lid].insert(plid2);
-
-		return lid;
-	}
-
-	/**
-	 * Constructs the global -> local map for an element array
-	 */
-	template<typename TT>
-	static void constructG2L(const std::vector<TT> &elements, g2l_t &g2lMap)
-	{
-		g2lMap.clear();
-
-		unsigned int i = 0;
-		for (typename std::vector<TT>::const_iterator it = elements.begin();
-				it != elements.end(); ++it, i++) {
-			assert(g2lMap.find(it->m_gid) == g2lMap.end());
-			g2lMap[it->m_gid] = i;
-		}
-	}
-
-	template<typename TT>
-	static void _checkH5Err(TT status, const char* file, int line, int rank)
-	{
-		if (status < 0) {
-			logError() << utils::nospace << "An HDF5 error occurred in PUML ("
-				<< file << ": " << line << ") on rank " << rank;
-		}
-	}
+  }
+
+  /**
+   * Add an edge if it does not exist yet
+   *
+   * @param edgeUpward The storage for upward information
+   * @param lid The local id of the edge
+   * @param plid1 The first parent
+   * @param plid2 The second parent
+   * @return The local id of the edge
+   */
+  static auto addEdge(std::vector<std::set<unsigned int>>& edgeUpward,
+                      unsigned int lid,
+                      unsigned int plid1,
+                      unsigned int plid2) -> unsigned int {
+    if (lid >= edgeUpward.size()) {
+      assert(lid == edgeUpward.size());
+      edgeUpward.emplace_back();
+    }
+    edgeUpward[lid].insert(plid1);
+    edgeUpward[lid].insert(plid2);
+
+    return lid;
+  }
+
+  /**
+   * Constructs the global -> local map for an element array
+   */
+  template <typename TT>
+  static void constructG2L(const std::vector<TT>& elements, g2l_t& g2lMap) {
+    g2lMap.clear();
+
+    unsigned int i = 0;
+    for (typename std::vector<TT>::const_iterator it = elements.begin(); it != elements.end();
+         ++it, i++) {
+      assert(g2lMap.find(it->m_gid) == g2lMap.end());
+      g2lMap[it->m_gid] = i;
+    }
+  }
+
+  template <typename TT>
+  static void checkH5ErrImpl(TT status, const char* file, int line, int rank) {
+    if (status < 0) {
+      logError() << utils::nospace << "An HDF5 error occurred in PUML (" << file << ": " << line
+                 << ") on rank " << rank;
+    }
+  }
 };
 
 #undef checkH5Err
 
 /** Convenient typedef for tetrahrdral meshes */
-typedef PUML<TETRAHEDRON> TETPUML;
+using TETPUML = PUML<TETRAHEDRON>;
 
-}
+} // namespace PUML
 
 #endif // PUML_PUML_H
diff --git a/Partition.h b/Partition.h
index f6de44a..aea5481 100644
--- a/Partition.h
+++ b/Partition.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,21 +9,20 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_PARTITION_H
 #define PUML_PARTITION_H
 
+#include "Topology.h"
+#include "utils/logger.h"
 #include <memory>
 
 #ifdef USE_MPI
-#include <mpi.h>
 #endif // USE_MPI
 
 #include "PartitionBase.h"
-#include "PartitionGraph.h"
 #include "PartitionDummy.h"
 
 #ifdef USE_PARMETIS
@@ -35,95 +37,83 @@
 #include "PartitionParhip.h"
 #endif
 
-namespace PUML
-{
-	
+namespace PUML {
+
 enum class PartitionerType {
-	None,
-	Parmetis,
-	ParmetisGeometric,
-	PtScotch,
-	PtScotchQuality,
-	PtScotchBalance,
-	PtScotchBalanceQuality,
-	PtScotchSpeed,
-	PtScotchBalanceSpeed,
-	ParHIPUltrafastMesh,
-	ParHIPFastMesh,
-	ParHIPEcoMesh,
-	ParHIPUltrafastSocial,
-	ParHIPFastSocial,
-	ParHIPEcoSocial
+  None,
+  Parmetis,
+  ParmetisGeometric,
+  PtScotch,
+  PtScotchQuality,
+  PtScotchBalance,
+  PtScotchBalanceQuality,
+  PtScotchSpeed,
+  PtScotchBalanceSpeed,
+  ParHIPUltrafastMesh,
+  ParHIPFastMesh,
+  ParHIPEcoMesh,
+  ParHIPUltrafastSocial,
+  ParHIPFastSocial,
+  ParHIPEcoSocial
 };
 
-template<TopoType Topo>
-class Partition{
-public:
-static std::unique_ptr<PartitionBase<Topo>> getPartitioner(PartitionerType partitioner) {
-	PartitionBase<Topo>* partition = nullptr;
-	if (partitioner == PartitionerType::None) {
-		partition = new PartitionDummy<Topo>();
-	}
+template <TopoType Topo>
+class Partition {
+  public:
+  static auto getPartitioner(PartitionerType partitioner) -> std::unique_ptr<PartitionBase<Topo>> {
+    PartitionBase<Topo>* partition = nullptr;
+    if (partitioner == PartitionerType::None) {
+      partition = new PartitionDummy<Topo>();
+    }
 #ifdef USE_PARMETIS
-	else if (partitioner == PartitionerType::Parmetis) {
-		partition = new PartitionParmetis<Topo>(ParmetisPartitionMode::Default);
-	}
-	else if (partitioner == PartitionerType::ParmetisGeometric) {
-		partition = new PartitionParmetis<Topo>(ParmetisPartitionMode::Geometric);
-	}
+    else if (partitioner == PartitionerType::Parmetis) {
+      partition = new PartitionParmetis<Topo>(ParmetisPartitionMode::Default);
+    } else if (partitioner == PartitionerType::ParmetisGeometric) {
+      partition = new PartitionParmetis<Topo>(ParmetisPartitionMode::Geometric);
+    }
 #endif
 #ifdef USE_PTSCOTCH
-	else if (partitioner == PartitionerType::PtScotch) {
-		partition = new PartitionPtscotch<Topo>(SCOTCH_STRATDEFAULT);
-	}
-	else if (partitioner == PartitionerType::PtScotchQuality) {
-		partition = new PartitionPtscotch<Topo>(SCOTCH_STRATQUALITY);
-	}
-	else if (partitioner == PartitionerType::PtScotchBalance) {
-		partition = new PartitionPtscotch<Topo>(SCOTCH_STRATBALANCE);
-	}
-	else if (partitioner == PartitionerType::PtScotchBalanceQuality) {
-		partition = new PartitionPtscotch<Topo>(SCOTCH_STRATBALANCE | SCOTCH_STRATQUALITY);
-	}
-	else if (partitioner == PartitionerType::PtScotchSpeed) {
-		partition = new PartitionPtscotch<Topo>(SCOTCH_STRATSPEED);
-	}
-	else if (partitioner == PartitionerType::PtScotchBalanceSpeed) {
-		partition = new PartitionPtscotch<Topo>(SCOTCH_STRATBALANCE | SCOTCH_STRATSPEED);
-	}
+    else if (partitioner == PartitionerType::PtScotch) {
+      partition = new PartitionPtscotch<Topo>(SCOTCH_STRATDEFAULT);
+    } else if (partitioner == PartitionerType::PtScotchQuality) {
+      partition = new PartitionPtscotch<Topo>(SCOTCH_STRATQUALITY);
+    } else if (partitioner == PartitionerType::PtScotchBalance) {
+      partition = new PartitionPtscotch<Topo>(SCOTCH_STRATBALANCE);
+    } else if (partitioner == PartitionerType::PtScotchBalanceQuality) {
+      partition = new PartitionPtscotch<Topo>(SCOTCH_STRATBALANCE | SCOTCH_STRATQUALITY);
+    } else if (partitioner == PartitionerType::PtScotchSpeed) {
+      partition = new PartitionPtscotch<Topo>(SCOTCH_STRATSPEED);
+    } else if (partitioner == PartitionerType::PtScotchBalanceSpeed) {
+      partition = new PartitionPtscotch<Topo>(SCOTCH_STRATBALANCE | SCOTCH_STRATSPEED);
+    }
 #endif
 #ifdef USE_PARHIP
-	else if (partitioner == PartitionerType::ParHIPUltrafastMesh) {
-		partition = new PartitionParhip<Topo>(ULTRAFASTMESH);
-	}
-	else if (partitioner == PartitionerType::ParHIPFastMesh) {
-		partition = new PartitionParhip<Topo>(FASTMESH);
-	}
-	else if (partitioner == PartitionerType::ParHIPEcoMesh) {
-		partition = new PartitionParhip<Topo>(ECOMESH);
-	}
-	else if (partitioner == PartitionerType::ParHIPUltrafastSocial) {
-		partition = new PartitionParhip<Topo>(ULTRAFASTSOCIAL);
-	}
-	else if (partitioner == PartitionerType::ParHIPFastSocial) {
-		partition = new PartitionParhip<Topo>(FASTSOCIAL);
-	}
-	else if (partitioner == PartitionerType::ParHIPEcoSocial) {
-		partition = new PartitionParhip<Topo>(ECOSOCIAL);
-	}
+    else if (partitioner == PartitionerType::ParHIPUltrafastMesh) {
+      partition = new PartitionParhip<Topo>(ULTRAFASTMESH);
+    } else if (partitioner == PartitionerType::ParHIPFastMesh) {
+      partition = new PartitionParhip<Topo>(FASTMESH);
+    } else if (partitioner == PartitionerType::ParHIPEcoMesh) {
+      partition = new PartitionParhip<Topo>(ECOMESH);
+    } else if (partitioner == PartitionerType::ParHIPUltrafastSocial) {
+      partition = new PartitionParhip<Topo>(ULTRAFASTSOCIAL);
+    } else if (partitioner == PartitionerType::ParHIPFastSocial) {
+      partition = new PartitionParhip<Topo>(FASTSOCIAL);
+    } else if (partitioner == PartitionerType::ParHIPEcoSocial) {
+      partition = new PartitionParhip<Topo>(ECOSOCIAL);
+    }
 #endif
-	else {
-		logError() << "Unknown (or disabled) partitioner.";
-	}
+    else {
+      logError() << "Unknown (or disabled) partitioner.";
+    }
 
-	return std::unique_ptr<PartitionBase<Topo>>(partition);
-}
+    return std::unique_ptr<PartitionBase<Topo>>(partition);
+  }
 
-	Partition() = delete;
+  Partition() = delete;
 };
 
 using TETPartition = Partition<TETRAHEDRON>;
 
-}
+} // namespace PUML
 
 #endif // PUML_PARTITION_H
diff --git a/PartitionBase.h b/PartitionBase.h
index a1adc6b..8e4e560 100644
--- a/PartitionBase.h
+++ b/PartitionBase.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  * @author David Schneller <david.schneller@tum.de>
  */
@@ -15,53 +17,50 @@
 #define PUML_PARTITION_BASE_H
 
 #ifdef USE_MPI
-#include <mpi.h>
 #endif // USE_MPI
 #include "utils/logger.h"
 #include "Topology.h"
-#include "PUML.h"
 #include "PartitionGraph.h"
 #include "PartitionTarget.h"
 #include <vector>
-#include <stdexcept>
 
-namespace PUML
-{
+namespace PUML {
 
-enum class PartitioningResult {
-	SUCCESS = 0,
-	ERROR
-};
+enum class PartitioningResult { SUCCESS = 0, ERROR };
 
-template<TopoType Topo>
-class PartitionBase
-{
-public:
-	PartitionBase() { }
-        virtual ~PartitionBase() = default;
+template <TopoType Topo>
+class PartitionBase {
+  public:
+  PartitionBase() = default;
+  virtual ~PartitionBase() = default;
 
 #ifdef USE_MPI
-	std::vector<int> partition(const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
-	{
-		std::vector<int> part(graph.localVertexCount());
-		auto result = partition(part, graph, target, seed);
-		if (result != PartitioningResult::SUCCESS) {
-			logError() << "Partitioning failed.";
-		}
-		return part;
-	}
+  auto partition(const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
+      -> std::vector<int> {
+    std::vector<int> part(graph.localVertexCount());
+    auto result = partition(part, graph, target, seed);
+    if (result != PartitioningResult::SUCCESS) {
+      logError() << "Partitioning failed.";
+    }
+    return part;
+  }
 
-	PartitioningResult partition(std::vector<int>& part, const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
-	{
-		return partition(part.data(), graph, target, seed);
-	}
+  auto partition(std::vector<int>& part,
+                 const PartitionGraph<Topo>& graph,
+                 const PartitionTarget& target,
+                 int seed = 1) -> PartitioningResult {
+    return partition(part.data(), graph, target, seed);
+  }
 
-	virtual PartitioningResult partition(int* part, const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1) = 0;
+  virtual auto partition(int* part,
+                         const PartitionGraph<Topo>& graph,
+                         const PartitionTarget& target,
+                         int seed = 1) -> PartitioningResult = 0;
 #endif // USE_MPI
 };
 
 using TETPartitionBase = PartitionBase<TETRAHEDRON>;
 
-}
+} // namespace PUML
 
 #endif // PUML_PARTITION_BASE_H
diff --git a/PartitionDummy.h b/PartitionDummy.h
index 9f177db..7a15268 100644
--- a/PartitionDummy.h
+++ b/PartitionDummy.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -13,6 +16,7 @@
 #ifndef PUML_PARTITIONDUMMY_H
 #define PUML_PARTITIONDUMMY_H
 
+#include "PartitionTarget.h"
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
@@ -21,31 +25,31 @@
 #include "PartitionGraph.h"
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML {
 
-template<TopoType Topo>
-class PartitionDummy : public PartitionBase<Topo>
-{
+template <TopoType Topo>
+class PartitionDummy : public PartitionBase<Topo> {
 
-public:
-	using PartitionBase<Topo>::PartitionBase;
+  public:
+  using PartitionBase<Topo>::PartitionBase;
 
 #ifdef USE_MPI
-	virtual PartitioningResult partition(int* partition, const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
-	{
-		// all data stays where it is (i.e. where it was read)
-
-		int rank;
-		MPI_Comm_rank(graph.comm(), &rank);
-		for (unsigned long i = 0; i < graph.localVertexCount(); ++i) {
-			partition[i] = rank;
-		}
-		return PartitioningResult::SUCCESS;
-	}
+  virtual auto partition(int* partition,
+                         const PartitionGraph<Topo>& graph,
+                         const PartitionTarget& target,
+                         int seed = 1) -> PartitioningResult {
+    // all data stays where it is (i.e. where it was read)
+
+    int rank = 0;
+    MPI_Comm_rank(graph.comm(), &rank);
+    for (unsigned long i = 0; i < graph.localVertexCount(); ++i) {
+      partition[i] = rank;
+    }
+    return PartitioningResult::SUCCESS;
+  }
 #endif // USE_MPI
 };
 
-}
+} // namespace PUML
 
 #endif // PUML_PARTITIONPARMETIS_H
diff --git a/PartitionGraph.h b/PartitionGraph.h
index 4cf9e8b..ffd838f 100644
--- a/PartitionGraph.h
+++ b/PartitionGraph.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,19 +9,19 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_PARTITION_GRAPH_H
 #define PUML_PARTITION_GRAPH_H
 
+#include <cstddef>
+#include "TypeInference.h"
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
 
 #include <algorithm>
-#include <numeric>
 #include <vector>
 #include <cassert>
 #include <functional>
@@ -29,257 +32,271 @@
 #include "FaceIterator.h"
 #include "Downward.h"
 
-#include "utils/logger.h"
-
-namespace PUML
-{
-
-template<TopoType Topo>
-class PartitionGraph
-{
-public:
-    PartitionGraph(const PUML<Topo>& puml) : m_puml(puml) {
-        int commSize = 1;
-#ifdef USE_MPI
-        m_comm = m_puml.comm();
-        MPI_Comm_size(m_comm, &commSize);
-#endif
-        m_processCount = commSize;
-
-        const unsigned long cellfaces = internal::Topology<Topo>::cellfaces();
-        const auto& faces = m_puml.faces();
-        const auto& cells = m_puml.cells();
-        unsigned long vertexCount = cells.size();
-
-        std::vector<unsigned long> adjRawCount(vertexCount);
-        std::vector<unsigned long> adjRaw(vertexCount * cellfaces);
-
-        FaceIterator<Topo> iterator(m_puml);
-        iterator.template forEach<unsigned long>(
-            [&cells] (int fid, int cid) {
-                return cells[cid].gid();
-            },
-            [&adjRawCount, &adjRaw] (int id, int lid, const unsigned long& gid) {
-                int idx = cellfaces * lid + adjRawCount[lid]++;
-                adjRaw[idx] = gid;
-            }
-        );
-
-        m_adjDisp.resize(vertexCount+1);
-        m_adjDisp[0] = 0;
-        //Note: std::inclusive_scan can be used here but some compilers
-        // haven't provided support for it e.g., libc++@15.0.0
-        m_adjDisp[1] = adjRawCount[0];
-        for (std::size_t i = 1; i < adjRawCount.size(); ++i) {
-	        m_adjDisp[i + 1] = m_adjDisp[i] + adjRawCount[i];
-        }
-
-        m_adj.resize(m_adjDisp[vertexCount]);
-        for (unsigned long i = 0, j = 0; i < vertexCount; ++i) {
-            for (unsigned long k = 0; k < adjRawCount[i]; ++k, ++j) {
-                m_adj[j] = adjRaw[i * cellfaces + k];
-            }
-        }
-
-        m_vertexDistribution.resize(commSize+1);
-        m_edgeDistribution.resize(commSize+1);
-        m_vertexDistribution[0] = 0;
-        m_edgeDistribution[0] = 0;
+namespace PUML {
 
+template <TopoType Topo>
+class PartitionGraph {
+  public:
+  PartitionGraph(const PUML<Topo>& puml) : m_puml(puml) {
+    int commSize = 1;
 #ifdef USE_MPI
-        MPI_Allgather(&vertexCount, 1, MPI_UNSIGNED_LONG, static_cast<unsigned long*>(m_vertexDistribution.data()) + 1, 1, MPI_UNSIGNED_LONG, m_comm);
-        MPI_Allgather(&m_adjDisp[vertexCount], 1, MPI_UNSIGNED_LONG, static_cast<unsigned long*>(m_edgeDistribution.data()) + 1, 1, MPI_UNSIGNED_LONG, m_comm);
-
-        for (unsigned long i = 2; i <= m_processCount; ++i)
-        {
-            m_vertexDistribution[i] += m_vertexDistribution[i - 1];
-            m_edgeDistribution[i] += m_edgeDistribution[i - 1];
-        }
-#else
-        m_vertexDistribution[1] = vertexCount;
-        m_edgeDistribution[1] = m_adjDisp[vertexCount];
+    m_comm = m_puml.comm();
+    MPI_Comm_size(m_comm, &commSize);
 #endif
+    m_processCount = commSize;
+
+    const unsigned long cellfaces = internal::Topology<Topo>::cellfaces();
+    const auto& faces = m_puml.faces();
+    const auto& cells = m_puml.cells();
+    unsigned long vertexCount = cells.size();
+
+    std::vector<unsigned long> adjRawCount(vertexCount);
+    std::vector<unsigned long> adjRaw(vertexCount * cellfaces);
+
+    FaceIterator<Topo> iterator(m_puml);
+    iterator.template forEach<unsigned long>(
+        [&cells](int fid, int cid) { return cells[cid].gid(); },
+        [&adjRawCount, &adjRaw](int id, int lid, const unsigned long& gid) {
+          int idx = (cellfaces * lid) + adjRawCount[lid]++;
+          adjRaw[idx] = gid;
+        });
+
+    m_adjDisp.resize(vertexCount + 1);
+    m_adjDisp[0] = 0;
+    // Note: std::inclusive_scan can be used here but some compilers
+    //  haven't provided support for it e.g., libc++@15.0.0
+    m_adjDisp[1] = adjRawCount[0];
+    for (std::size_t i = 1; i < adjRawCount.size(); ++i) {
+      m_adjDisp[i + 1] = m_adjDisp[i] + adjRawCount[i];
     }
 
-    // FaceHandlerFunc: void(int,int,const T&,const T&,int)
-    template<typename T, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&, int>, bool> = true>
-    void forEachLocalEdges(const T* cellData,
-                        FaceHandlerFunc&& faceHandler,
-                        MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto handler = [&cellData] (int fid, int id){return cellData[id];};
-        forEachLocalEdges<T>(std::move(handler), std::forward<FaceHandlerFunc>(faceHandler), mpit);
+    m_adj.resize(m_adjDisp[vertexCount]);
+    for (unsigned long i = 0, j = 0; i < vertexCount; ++i) {
+      for (unsigned long k = 0; k < adjRawCount[i]; ++k, ++j) {
+        m_adj[j] = adjRaw[(i * cellfaces) + k];
+      }
     }
 
-    // FaceHandlerFunc: void(int,int,const T&,const T&,int)
-    template<typename T, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&, int>, bool> = true>
-    void forEachLocalEdges(const std::vector<T>& cellData,
-                        FaceHandlerFunc&& faceHandler,
-                        MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto handler = [&cellData] (int fid, int id){return cellData[id];};
-        forEachLocalEdges<T>(std::move(handler), std::forward<FaceHandlerFunc>(faceHandler), mpit);
-    }
+    m_vertexDistribution.resize(commSize + 1);
+    m_edgeDistribution.resize(commSize + 1);
+    m_vertexDistribution[0] = 0;
+    m_edgeDistribution[0] = 0;
 
-    // CellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&,const T&,int)
-    template<typename T, typename CellHandlerFunc, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, CellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&, int>, bool> = true>
-    void forEachLocalEdges(CellHandlerFunc&& cellHandler,
-                        FaceHandlerFunc&& faceHandler,
-                        MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        auto realFaceHandler = [faceHandler = std::forward<FaceHandlerFunc>(faceHandler), cellHandler = std::forward<CellHandlerFunc>(cellHandler)]
-            (int fid,int lid,const T& a, int eid) {
-                auto b = std::invoke(cellHandler, fid, lid);
-                std::invoke(faceHandler, fid, lid, a, b, eid);
-            };
-        forEachLocalEdges<T>(std::forward<CellHandlerFunc>(cellHandler), std::move(realFaceHandler), mpit);
+#ifdef USE_MPI
+    MPI_Allgather(&vertexCount,
+                  1,
+                  MPI_UNSIGNED_LONG,
+                  static_cast<unsigned long*>(m_vertexDistribution.data()) + 1,
+                  1,
+                  MPI_UNSIGNED_LONG,
+                  m_comm);
+    MPI_Allgather(&m_adjDisp[vertexCount],
+                  1,
+                  MPI_UNSIGNED_LONG,
+                  static_cast<unsigned long*>(m_edgeDistribution.data()) + 1,
+                  1,
+                  MPI_UNSIGNED_LONG,
+                  m_comm);
+
+    for (unsigned long i = 2; i <= m_processCount; ++i) {
+      m_vertexDistribution[i] += m_vertexDistribution[i - 1];
+      m_edgeDistribution[i] += m_edgeDistribution[i - 1];
     }
-
-    // ExternalCellHandlerFunc: T(int,int)
-    // FaceHandlerFunc: void(int,int,const T&,int)
-    template<typename T, typename ExternalCellHandlerFunc, typename FaceHandlerFunc,
-        std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
-        std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, int>, bool> = true>
-    void forEachLocalEdges(ExternalCellHandlerFunc&& externalCellHandler,
-                        FaceHandlerFunc&& faceHandler,
-                        MPI_Datatype mpit = MPITypeInfer<T>::type()) {
-        
-        std::vector<unsigned long> adjRawCount(localVertexCount());
-        const auto& adjDisp = m_adjDisp;
-        auto realFaceHandler = [&adjDisp, &adjRawCount, faceHandler = std::forward<FaceHandlerFunc>(faceHandler)](int fid,int lid,const T& a) {
-            std::invoke(faceHandler, fid, lid, a, adjDisp[lid] + adjRawCount[lid]++);
+#else
+    m_vertexDistribution[1] = vertexCount;
+    m_edgeDistribution[1] = m_adjDisp[vertexCount];
+#endif
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const T&,int)
+  template <
+      typename T,
+      typename FaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&, int>,
+                       bool> = true>
+  void forEachLocalEdges(const T* cellData,
+                         FaceHandlerFunc&& faceHandler,
+                         MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto handler = [&cellData](int fid, int id) { return cellData[id]; };
+    forEachLocalEdges<T>(std::move(handler), std::forward<FaceHandlerFunc>(faceHandler), mpit);
+  }
+
+  // FaceHandlerFunc: void(int,int,const T&,const T&,int)
+  template <
+      typename T,
+      typename FaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&, int>,
+                       bool> = true>
+  void forEachLocalEdges(const std::vector<T>& cellData,
+                         FaceHandlerFunc&& faceHandler,
+                         MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto handler = [&cellData](int fid, int id) { return cellData[id]; };
+    forEachLocalEdges<T>(std::move(handler), std::forward<FaceHandlerFunc>(faceHandler), mpit);
+  }
+
+  // CellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&,const T&,int)
+  template <
+      typename T,
+      typename CellHandlerFunc,
+      typename FaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, CellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, const T&, int>,
+                       bool> = true>
+  void forEachLocalEdges(CellHandlerFunc&& cellHandler,
+                         FaceHandlerFunc&& faceHandler,
+                         MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+    auto realFaceHandler = [faceHandler = std::forward<FaceHandlerFunc>(faceHandler),
+                            cellHandler = std::forward<CellHandlerFunc>(cellHandler)](
+                               int fid, int lid, const T& a, int eid) {
+      auto b = std::invoke(cellHandler, fid, lid);
+      std::invoke(faceHandler, fid, lid, a, b, eid);
+    };
+    forEachLocalEdges<T>(
+        std::forward<CellHandlerFunc>(cellHandler), std::move(realFaceHandler), mpit);
+  }
+
+  // ExternalCellHandlerFunc: T(int,int)
+  // FaceHandlerFunc: void(int,int,const T&,int)
+  template <
+      typename T,
+      typename ExternalCellHandlerFunc,
+      typename FaceHandlerFunc,
+      std::enable_if_t<std::is_invocable_r_v<T, ExternalCellHandlerFunc, int, int>, bool> = true,
+      std::enable_if_t<std::is_invocable_v<FaceHandlerFunc, int, int, const T&, int>, bool> = true>
+  void forEachLocalEdges(ExternalCellHandlerFunc&& externalCellHandler,
+                         FaceHandlerFunc&& faceHandler,
+                         MPI_Datatype mpit = MPITypeInfer<T>::type()) {
+
+    std::vector<unsigned long> adjRawCount(localVertexCount());
+    const auto& adjDisp = m_adjDisp;
+    auto realFaceHandler =
+        [&adjDisp, &adjRawCount, faceHandler = std::forward<FaceHandlerFunc>(faceHandler)](
+            int fid, int lid, const T& a) {
+          std::invoke(faceHandler, fid, lid, a, adjDisp[lid] + adjRawCount[lid]++);
         };
-        FaceIterator<Topo> iterator(m_puml);
-        iterator.template forEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler), std::move(realFaceHandler), std::move([](int a, int b){}), mpit);
-    }
-
-    unsigned long localVertexCount() const {
-        return m_adjDisp.size() - 1;
-    }
-
-    unsigned long localEdgeCount() const {
-        return m_adj.size();
-    }
-
-    unsigned long globalVertexCount() const {
-        return m_vertexDistribution[m_processCount];
+    FaceIterator<Topo> iterator(m_puml);
+    iterator.template forEach<T>(std::forward<ExternalCellHandlerFunc>(externalCellHandler),
+                                 std::move(realFaceHandler),
+                                 std::move([](int a, int b) {}),
+                                 mpit);
+  }
+
+  [[nodiscard]] auto localVertexCount() const -> unsigned long { return m_adjDisp.size() - 1; }
+
+  [[nodiscard]] auto localEdgeCount() const -> unsigned long { return m_adj.size(); }
+
+  [[nodiscard]] auto globalVertexCount() const -> unsigned long {
+    return m_vertexDistribution[m_processCount];
+  }
+
+  [[nodiscard]] auto globalEdgeCount() const -> unsigned long {
+    return m_edgeDistribution[m_processCount];
+  }
+
+  template <typename OutputType>
+  void geometricCoordinates(std::vector<OutputType>& coord) const {
+    // basic idea: compute the barycenter of the cell (i.e. tetrahedron/hexahedron)
+    coord.resize(3 * localVertexCount());
+    for (unsigned long i = 0; i < m_puml.cells().size(); ++i) {
+      const auto& cell = m_puml.cells()[i];
+      unsigned int lid[internal::Topology<Topo>::cellvertices()];
+      Downward::vertices(m_puml, cell, lid);
+      OutputType x = 0.0;
+      OutputType y = 0.0;
+      OutputType z = 0.0;
+      for (unsigned long j = 0; j < internal::Topology<Topo>::cellvertices(); ++j) {
+        auto vertex = m_puml.vertices()[lid[j]];
+        x += vertex.coordinate()[0];
+        y += vertex.coordinate()[1];
+        z += vertex.coordinate()[2];
+      }
+      x /= internal::Topology<Topo>::cellvertices();
+      y /= internal::Topology<Topo>::cellvertices();
+      z /= internal::Topology<Topo>::cellvertices();
+      coord[(i * 3) + 0] = x;
+      coord[(i * 3) + 1] = y;
+      coord[(i * 3) + 2] = z;
     }
+  }
 
-    unsigned long globalEdgeCount() const {
-        return m_edgeDistribution[m_processCount];
-    }
+  template <typename T>
+  void setVertexWeights(const std::vector<T>& vertexWeights, int vertexWeightCount) {
+    setVertexWeights(vertexWeights.data(), vertexWeightCount);
+  }
 
-    template<typename OutputType>
-    void geometricCoordinates(std::vector<OutputType>& coord) const {
-        // basic idea: compute the barycenter of the cell (i.e. tetrahedron/hexahedron)
-        coord.resize(3 * localVertexCount());
-        for (unsigned long i = 0; i < m_puml.cells().size(); ++i) {
-            const auto& cell = m_puml.cells()[i];
-            unsigned int lid[internal::Topology<Topo>::cellvertices()];
-            Downward::vertices(m_puml, cell, lid);
-            OutputType x = 0.0, y = 0.0, z = 0.0;
-            for (unsigned long j = 0; j < internal::Topology<Topo>::cellvertices(); ++j) {
-                auto vertex = m_puml.vertices()[lid[j]];
-                x += vertex.coordinate()[0];
-                y += vertex.coordinate()[1];
-                z += vertex.coordinate()[2];
-            }
-            x /= internal::Topology<Topo>::cellvertices();
-            y /= internal::Topology<Topo>::cellvertices();
-            z /= internal::Topology<Topo>::cellvertices();
-            coord[i * 3 + 0] = x;
-            coord[i * 3 + 1] = y;
-            coord[i * 3 + 2] = z;
-        }
+  template <typename T>
+  void setVertexWeights(const T* vertexWeights, int vertexWeightCount) {
+    if (vertexWeights == nullptr) {
+      return;
     }
-
-    template<typename T>
-    void setVertexWeights(const std::vector<T>& vertexWeights, int vertexWeightCount) {
-        setVertexWeights(vertexWeights.data(), vertexWeightCount);
+    m_vertexWeightCount = vertexWeightCount;
+    m_vertexWeights.resize(localVertexCount() * vertexWeightCount);
+    for (size_t i = 0; i < m_vertexWeights.size(); ++i) {
+      m_vertexWeights[i] = vertexWeights[i];
     }
+  }
 
-    template<typename T>
-    void setVertexWeights(const T* vertexWeights, int vertexWeightCount) {
-        if (vertexWeights == nullptr) return;
-        m_vertexWeightCount = vertexWeightCount;
-        m_vertexWeights.resize(localVertexCount() * vertexWeightCount);
-        for (size_t i = 0; i < m_vertexWeights.size(); ++i) {
-            m_vertexWeights[i] = vertexWeights[i];
-        }
-    }
+  template <typename T>
+  void setEdgeWeights(const std::vector<T>& edgeWeights) {
+    setEdgeWeights(edgeWeights.data());
+  }
 
-    template<typename T>
-    void setEdgeWeights(const std::vector<T>& edgeWeights) {
-        setEdgeWeights(edgeWeights.data());
+  template <typename T>
+  void setEdgeWeights(const T* edgeWeights) {
+    if (edgeWeights == nullptr) {
+      return;
     }
-
-    template<typename T>
-    void setEdgeWeights(const T* edgeWeights) {
-        if (edgeWeights == nullptr) return;
-        m_edgeWeights.resize(m_adj.size());
-        for (size_t i = 0; i < m_adj.size(); ++i) {
-            m_edgeWeights[i] = edgeWeights[i];
-        }
+    m_edgeWeights.resize(m_adj.size());
+    for (size_t i = 0; i < m_adj.size(); ++i) {
+      m_edgeWeights[i] = edgeWeights[i];
     }
+  }
 
-    const std::vector<unsigned long>& adj() const {
-        return m_adj;
-    }
+  [[nodiscard]] auto adj() const -> const std::vector<unsigned long>& { return m_adj; }
 
-    const std::vector<unsigned long>& adjDisp() const {
-        return m_adjDisp;
-    }
+  [[nodiscard]] auto adjDisp() const -> const std::vector<unsigned long>& { return m_adjDisp; }
 
-    const std::vector<unsigned long>& vertexDistribution() const {
-        return m_vertexDistribution;
-    }
+  [[nodiscard]] auto vertexDistribution() const -> const std::vector<unsigned long>& {
+    return m_vertexDistribution;
+  }
 
-    const std::vector<unsigned long>& edgeDistribution() const {
-        return m_edgeDistribution;
-    }
+  [[nodiscard]] auto edgeDistribution() const -> const std::vector<unsigned long>& {
+    return m_edgeDistribution;
+  }
 
-    const std::vector<unsigned long>& vertexWeights() const {
-        return m_vertexWeights;
-    }
+  [[nodiscard]] auto vertexWeights() const -> const std::vector<unsigned long>& {
+    return m_vertexWeights;
+  }
 
-    const std::vector<unsigned long>& edgeWeights() const {
-        return m_edgeWeights;
-    }
+  [[nodiscard]] auto edgeWeights() const -> const std::vector<unsigned long>& {
+    return m_edgeWeights;
+  }
 
-    const MPI_Comm& comm() const {
-        return m_comm;
-    }
+  [[nodiscard]] auto comm() const -> const MPI_Comm& { return m_comm; }
 
-    const PUML<Topo>& puml() const {
-        return m_puml;
-    }
+  auto puml() const -> const PUML<Topo>& { return m_puml; }
 
-    unsigned long vertexWeightCount() const {
-        return m_vertexWeightCount;
-    }
+  [[nodiscard]] auto vertexWeightCount() const -> unsigned long { return m_vertexWeightCount; }
 
-    unsigned long processCount() const {
-        return m_processCount;
-    }
+  [[nodiscard]] auto processCount() const -> unsigned long { return m_processCount; }
 
-private:
-    std::vector<unsigned long> m_adj;
-    std::vector<unsigned long> m_adjDisp;
-    std::vector<unsigned long> m_vertexWeights;
-    std::vector<unsigned long> m_edgeWeights;
-    std::vector<unsigned long> m_vertexDistribution;
-    std::vector<unsigned long> m_edgeDistribution;
-    unsigned long m_vertexWeightCount = 0;
-    unsigned long m_processCount = 0;
+  private:
+  std::vector<unsigned long> m_adj;
+  std::vector<unsigned long> m_adjDisp;
+  std::vector<unsigned long> m_vertexWeights;
+  std::vector<unsigned long> m_edgeWeights;
+  std::vector<unsigned long> m_vertexDistribution;
+  std::vector<unsigned long> m_edgeDistribution;
+  unsigned long m_vertexWeightCount = 0;
+  unsigned long m_processCount = 0;
 #ifdef USE_MPI
-    MPI_Comm m_comm;
+  MPI_Comm m_comm;
 #endif
-    const PUML<Topo>& m_puml;
+  const PUML<Topo>& m_puml;
 };
 
 using TETPartitionGraph = PartitionGraph<TETRAHEDRON>;
 
-}
+} // namespace PUML
 #endif
diff --git a/PartitionParhip.h b/PartitionParhip.h
index 83541a2..4904039 100644
--- a/PartitionParhip.h
+++ b/PartitionParhip.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,13 +9,13 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_PARTITIONPARHIP_H
 #define PUML_PARTITIONPARHIP_H
 
+#include "PartitionTarget.h"
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
@@ -30,53 +33,66 @@
 
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML {
 
-template<TopoType Topo>
-class PartitionParhip : public PartitionBase<Topo>
-{
+template <TopoType Topo>
+class PartitionParhip : public PartitionBase<Topo> {
 
-public:
-	PartitionParhip(int mode) : mode(mode) {}
+  public:
+  PartitionParhip(int mode) : mode(mode) {}
 #ifdef USE_MPI
-	virtual PartitioningResult partition(int* partition, const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
-	{
-		int rank;
-		MPI_Comm_rank(graph.comm(), &rank);
-
-		std::vector<idxtype> vtxdist(graph.vertexDistribution().begin(), graph.vertexDistribution().end());
-		std::vector<idxtype> xadj(graph.adjDisp().begin(), graph.adjDisp().end());
-		std::vector<idxtype> adjncy(graph.adj().begin(), graph.adj().end());
-		std::vector<idxtype> vwgt(graph.vertexWeights().begin(), graph.vertexWeights().end());
-		std::vector<idxtype> adjwgt(graph.edgeWeights().begin(), graph.edgeWeights().end());
-		auto cellCount = graph.localVertexCount();
-
-		if (!target.vertexWeightsUniform()) {
-			logWarning(rank) << "Node weights (target vertex weights) are currently ignored by ParHIP.";
-		}
-		if (graph.vertexWeights().size() > graph.localVertexCount()) {
-			logWarning(rank) << "Multiple vertex weights are currently ignored by ParHIP.";
-		}
-
-		int edgecut;
-		int nparts = target.vertexCount();
-		std::vector<idxtype> part(cellCount);
-		double imbalance = target.imbalance();
-		MPI_Comm comm = graph.comm();
-		ParHIPPartitionKWay(vtxdist.data(), xadj.data(), adjncy.data(), vwgt.empty() ? nullptr : vwgt.data(), adjwgt.empty() ? nullptr : adjwgt.data(), &nparts, &imbalance, true, seed, mode, &edgecut, part.data(), &comm);
-
-		for (int i = 0; i < cellCount; i++) {
-			partition[i] = part[i];
-		}
-
-		return PartitioningResult::SUCCESS;
-	}
+  virtual auto partition(int* partition,
+                         const PartitionGraph<Topo>& graph,
+                         const PartitionTarget& target,
+                         int seed = 1) -> PartitioningResult {
+    int rank = 0;
+    MPI_Comm_rank(graph.comm(), &rank);
+
+    std::vector<idxtype> vtxdist(graph.vertexDistribution().begin(),
+                                 graph.vertexDistribution().end());
+    std::vector<idxtype> xadj(graph.adjDisp().begin(), graph.adjDisp().end());
+    std::vector<idxtype> adjncy(graph.adj().begin(), graph.adj().end());
+    std::vector<idxtype> vwgt(graph.vertexWeights().begin(), graph.vertexWeights().end());
+    std::vector<idxtype> adjwgt(graph.edgeWeights().begin(), graph.edgeWeights().end());
+    auto cellCount = graph.localVertexCount();
+
+    if (!target.vertexWeightsUniform()) {
+      logWarning(rank) << "Node weights (target vertex weights) are currently ignored by ParHIP.";
+    }
+    if (graph.vertexWeights().size() > graph.localVertexCount()) {
+      logWarning(rank) << "Multiple vertex weights are currently ignored by ParHIP.";
+    }
+
+    int edgecut = 0;
+    int nparts = target.vertexCount();
+    std::vector<idxtype> part(cellCount);
+    double imbalance = target.imbalance();
+    MPI_Comm comm = graph.comm();
+    ParHIPPartitionKWay(vtxdist.data(),
+                        xadj.data(),
+                        adjncy.data(),
+                        vwgt.empty() ? nullptr : vwgt.data(),
+                        adjwgt.empty() ? nullptr : adjwgt.data(),
+                        &nparts,
+                        &imbalance,
+                        true,
+                        seed,
+                        mode,
+                        &edgecut,
+                        part.data(),
+                        &comm);
+
+    for (int i = 0; i < cellCount; i++) {
+      partition[i] = part[i];
+    }
+
+    return PartitioningResult::SUCCESS;
+  }
 #endif // USE_MPI
-private:
-	int mode;
+  private:
+  int mode;
 };
 
-}
+} // namespace PUML
 
 #endif // PUML_PARTITIONPARHIP_H
diff --git a/PartitionParmetis.h b/PartitionParmetis.h
index c628578..ea3a17b 100644
--- a/PartitionParmetis.h
+++ b/PartitionParmetis.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  * @author David Schneller <david.schneller@tum.de>
  */
@@ -14,8 +16,10 @@
 #ifndef PUML_PARTITIONPARMETIS_H
 #define PUML_PARTITIONPARMETIS_H
 
+#include "PartitionTarget.h"
+#include <vector>
+#include "utils/logger.h"
 #ifdef USE_MPI
-#include <mpi.h>
 #endif // USE_MPI
 
 #ifndef USE_PARMETIS
@@ -31,80 +35,111 @@
 #include "PartitionGraph.h"
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML {
 
-enum class ParmetisPartitionMode {
-	Default,
-	Geometric
-};
+enum class ParmetisPartitionMode { Default, Geometric };
 
-template<TopoType Topo>
-class PartitionParmetis : public PartitionBase<Topo>
-{
+template <TopoType Topo>
+class PartitionParmetis : public PartitionBase<Topo> {
 
-public:
-	PartitionParmetis(ParmetisPartitionMode mode) :mode(mode) {
-	}
+  public:
+  PartitionParmetis(ParmetisPartitionMode mode) : mode(mode) {}
 #ifdef USE_MPI
-	virtual PartitioningResult partition(int* partition, const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
-	{
-		auto comm = graph.comm();
-		std::vector<idx_t> vtxdist(graph.vertexDistribution().begin(), graph.vertexDistribution().end());
-		std::vector<idx_t> xadj(graph.adjDisp().begin(), graph.adjDisp().end());
-		std::vector<idx_t> adjncy(graph.adj().begin(), graph.adj().end());
-		std::vector<idx_t> vwgt(graph.vertexWeights().begin(), graph.vertexWeights().end());
-		std::vector<idx_t> adjwgt(graph.edgeWeights().begin(), graph.edgeWeights().end());
-		auto cell_count = graph.localVertexCount();
-
-		idx_t ncon = graph.vertexWeightCount();
-		if (ncon == 0) ncon = 1;
-		idx_t nparts = target.vertexCount();
-		std::vector<real_t> tpwgts(nparts * ncon, static_cast<real_t>(1.) / nparts);
-		if (!target.vertexWeightsUniform()) {
-			for (idx_t i = 0; i < target.vertexCount(); i++) {
-				for (idx_t j = 0; j < ncon; ++j) {
-					tpwgts[i*ncon + j] = target.vertexWeights()[i];
-				}
-			}
-		}
-
-		idx_t options[3] = {1, 0, seed};
-		idx_t numflag = 0;
-		idx_t wgtflag = 0;
-		if (!vwgt.empty()) wgtflag |= 2;
-		if (!adjwgt.empty()) wgtflag |= 1;
-		std::vector<real_t> ubvec(ncon, target.imbalance() + 1.0);
-
-		idx_t edgecut;
-		std::vector<idx_t> part(cell_count);
-
-		if (mode == ParmetisPartitionMode::Default) {
-			ParMETIS_V3_PartKway(vtxdist.data(), xadj.data(), adjncy.data(), vwgt.empty() ? nullptr : vwgt.data(), adjwgt.empty() ? nullptr : adjwgt.data(), &wgtflag, &numflag, &ncon, &nparts, tpwgts.data(), ubvec.data(), options, &edgecut, part.data(), &comm);
-		}
-		else if (mode == ParmetisPartitionMode::Geometric) {
-			idx_t ndims = 3;
-			std::vector<real_t> xyz;
-			graph.geometricCoordinates(xyz);
-			ParMETIS_V3_PartGeomKway(vtxdist.data(), xadj.data(), adjncy.data(), vwgt.empty() ? nullptr : vwgt.data(), adjwgt.empty() ? nullptr : adjwgt.data(), &wgtflag, &numflag, &ndims, xyz.data(), &ncon, &nparts, tpwgts.data(), ubvec.data(), options, &edgecut, part.data(), &comm);
-		}
-		else {
-			logError() << "Unknown partitioning mode for ParMETIS";
-			return PartitioningResult::ERROR;
-		}
-
-		for (int i = 0; i < cell_count; i++) {
-			partition[i] = part[i];
-		}
-
-		return PartitioningResult::SUCCESS;
-	}
+  virtual auto partition(int* partition,
+                         const PartitionGraph<Topo>& graph,
+                         const PartitionTarget& target,
+                         int seed = 1) -> PartitioningResult {
+    auto comm = graph.comm();
+    std::vector<idx_t> vtxdist(graph.vertexDistribution().begin(),
+                               graph.vertexDistribution().end());
+    std::vector<idx_t> xadj(graph.adjDisp().begin(), graph.adjDisp().end());
+    std::vector<idx_t> adjncy(graph.adj().begin(), graph.adj().end());
+    std::vector<idx_t> vwgt(graph.vertexWeights().begin(), graph.vertexWeights().end());
+    std::vector<idx_t> adjwgt(graph.edgeWeights().begin(), graph.edgeWeights().end());
+    auto cellCount = graph.localVertexCount();
+
+    idx_t ncon = graph.vertexWeightCount();
+    if (ncon == 0) {
+      ncon = 1;
+    }
+    idx_t nparts = target.vertexCount();
+    std::vector<real_t> tpwgts(nparts * ncon, static_cast<real_t>(1.) / nparts);
+    if (!target.vertexWeightsUniform()) {
+      for (idx_t i = 0; i < target.vertexCount(); i++) {
+        for (idx_t j = 0; j < ncon; ++j) {
+          tpwgts[(i * ncon) + j] = target.vertexWeights()[i];
+        }
+      }
+    }
+
+    idx_t options[3] = {1, 0, seed};
+    idx_t numflag = 0;
+    idx_t wgtflag = 0;
+    if (!vwgt.empty()) {
+      wgtflag |= 2;
+    }
+    if (!adjwgt.empty()) {
+      wgtflag |= 1;
+    }
+    std::vector<real_t> ubvec(ncon, target.imbalance() + 1.0);
+
+    idx_t edgecut = 0;
+    std::vector<idx_t> part(cellCount);
+
+    if (mode == ParmetisPartitionMode::Default) {
+      ParMETIS_V3_PartKway(vtxdist.data(),
+                           xadj.data(),
+                           adjncy.data(),
+                           vwgt.empty() ? nullptr : vwgt.data(),
+                           adjwgt.empty() ? nullptr : adjwgt.data(),
+                           &wgtflag,
+                           &numflag,
+                           &ncon,
+                           &nparts,
+                           tpwgts.data(),
+                           ubvec.data(),
+                           options,
+                           &edgecut,
+                           part.data(),
+                           &comm);
+    } else if (mode == ParmetisPartitionMode::Geometric) {
+      idx_t ndims = 3;
+      std::vector<real_t> xyz;
+      graph.geometricCoordinates(xyz);
+      ParMETIS_V3_PartGeomKway(vtxdist.data(),
+                               xadj.data(),
+                               adjncy.data(),
+                               vwgt.empty() ? nullptr : vwgt.data(),
+                               adjwgt.empty() ? nullptr : adjwgt.data(),
+                               &wgtflag,
+                               &numflag,
+                               &ndims,
+                               xyz.data(),
+                               &ncon,
+                               &nparts,
+                               tpwgts.data(),
+                               ubvec.data(),
+                               options,
+                               &edgecut,
+                               part.data(),
+                               &comm);
+    } else {
+      logError() << "Unknown partitioning mode for ParMETIS";
+      return PartitioningResult::ERROR;
+    }
+
+    for (int i = 0; i < cellCount; i++) {
+      partition[i] = part[i];
+    }
+
+    return PartitioningResult::SUCCESS;
+  }
 #endif // USE_MPI
 
-private:
-	ParmetisPartitionMode mode;
+  private:
+  ParmetisPartitionMode mode;
 };
 
-}
+} // namespace PUML
 
 #endif // PUML_PARTITIONPARMETIS_H
diff --git a/PartitionPtscotch.h b/PartitionPtscotch.h
index aa3884f..bbfa75b 100644
--- a/PartitionPtscotch.h
+++ b/PartitionPtscotch.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,13 +9,14 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_PARTITIONPTSCOTCH_H
 #define PUML_PARTITIONPTSCOTCH_H
 
+#include "PartitionTarget.h"
+#include "utils/logger.h"
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
@@ -23,7 +27,6 @@
 
 #include <stdint.h>
 #include <stddef.h>
-#include <stdio.h>
 #include <ptscotch.h>
 #include <cmath>
 
@@ -31,95 +34,106 @@
 #include "PartitionGraph.h"
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML {
 
-template<TopoType Topo>
-class PartitionPtscotch : public PartitionBase<Topo>
-{
+template <TopoType Topo>
+class PartitionPtscotch : public PartitionBase<Topo> {
 
-public:
-	PartitionPtscotch(int mode) : mode(mode) {
-
-	}
+  public:
+  PartitionPtscotch(int mode) : mode(mode) {}
 #ifdef USE_MPI
-	virtual PartitioningResult partition(int* partition, const PartitionGraph<Topo>& graph, const PartitionTarget& target, int seed = 1)
-	{
-		int rank;
-		MPI_Comm_rank(graph.comm(), &rank);
-
-		if (graph.vertexWeights().size() > graph.localVertexCount()) {
-			logWarning(rank) << "Multiple vertex weights are currently ignored by PTSCOTCH.";
-		}
-		if (!graph.edgeWeights().empty()) {
-			logWarning(rank) << "The existence of edge weights may make PTSCOTCH very slow.";
-		}
-
-		auto comm = graph.comm();
-
-		std::vector<SCOTCH_Num> adjDisp(graph.adjDisp().begin(), graph.adjDisp().end());
-		std::vector<SCOTCH_Num> adj(graph.adj().begin(), graph.adj().end());
-		std::vector<SCOTCH_Num> vertexWeights(graph.vertexWeights().begin(), graph.vertexWeights().end());
-		std::vector<SCOTCH_Num> edgeWeights(graph.edgeWeights().begin(), graph.edgeWeights().end());
-		auto cellCount = graph.localVertexCount();
-
-		auto nparts = target.vertexCount();
-
-		std::vector<SCOTCH_Num> weights(nparts, 1);
-		if (!target.vertexWeightsUniform()) {
-			// we need to convert from double node weights to integer node weights
-			// (that is due to the interface still being oriented at ParMETIS right now)
-
-			double scale = (double)(1ULL<<24); // if this is not enough (or too much), adjust it
-			for (int i = 0; i < nparts; ++i) {
-				// important: the weights should be non-negative
-				weights[i] = std::max(static_cast<SCOTCH_Num>(1), static_cast<SCOTCH_Num>(std::round(target.vertexWeights()[i] * scale)));
-			}
-		}
-
-		int edgecut;
-		std::vector<SCOTCH_Num> part(cellCount);
-
-		SCOTCH_Dgraph dgraph;
-		SCOTCH_Strat strategy;
-		SCOTCH_Arch arch;
-
-		SCOTCH_Num processCount = graph.processCount();
-		SCOTCH_Num partCount = nparts;
-		SCOTCH_Num stratflag = mode;
-
-		SCOTCH_randomProc(rank);
-		SCOTCH_randomSeed(seed);
-		SCOTCH_randomReset();
-		
-		SCOTCH_dgraphInit(&dgraph, comm);
-		SCOTCH_stratInit(&strategy);
-		SCOTCH_archInit(&arch);
-
-		SCOTCH_dgraphBuild(&dgraph, 0, graph.localVertexCount(), graph.localVertexCount(), adjDisp.data(), nullptr,
-				vertexWeights.empty() ? nullptr : vertexWeights.data(), nullptr, graph.localEdgeCount(), graph.localEdgeCount(), 
-				adj.data(), nullptr, edgeWeights.empty() ? nullptr : edgeWeights.data());
-		SCOTCH_stratDgraphMapBuild(&strategy, stratflag, processCount, partCount, target.imbalance());
-		SCOTCH_archCmpltw(&arch, partCount, weights.data());
-
-		SCOTCH_dgraphMap(&dgraph, &arch, &strategy, part.data());
-		
-		SCOTCH_archExit(&arch);
-		SCOTCH_stratExit(&strategy);
-		SCOTCH_dgraphExit(&dgraph);
-
-		for (int i = 0; i < cellCount; i++) {
-			partition[i] = part[i];
-		}
-
-		return PartitioningResult::SUCCESS;
-	}
+  virtual auto partition(int* partition,
+                         const PartitionGraph<Topo>& graph,
+                         const PartitionTarget& target,
+                         int seed = 1) -> PartitioningResult {
+    int rank = 0;
+    MPI_Comm_rank(graph.comm(), &rank);
+
+    if (graph.vertexWeights().size() > graph.localVertexCount()) {
+      logWarning(rank) << "Multiple vertex weights are currently ignored by PTSCOTCH.";
+    }
+    if (!graph.edgeWeights().empty()) {
+      logWarning(rank) << "The existence of edge weights may make PTSCOTCH very slow.";
+    }
+
+    auto comm = graph.comm();
+
+    std::vector<SCOTCH_Num> adjDisp(graph.adjDisp().begin(), graph.adjDisp().end());
+    std::vector<SCOTCH_Num> adj(graph.adj().begin(), graph.adj().end());
+    std::vector<SCOTCH_Num> vertexWeights(graph.vertexWeights().begin(),
+                                          graph.vertexWeights().end());
+    std::vector<SCOTCH_Num> edgeWeights(graph.edgeWeights().begin(), graph.edgeWeights().end());
+    auto cellCount = graph.localVertexCount();
+
+    auto nparts = target.vertexCount();
+
+    std::vector<SCOTCH_Num> weights(nparts, 1);
+    if (!target.vertexWeightsUniform()) {
+      // we need to convert from double node weights to integer node weights
+      // (that is due to the interface still being oriented at ParMETIS right now)
+
+      auto scale = (double)(1ULL << 24); // if this is not enough (or too much), adjust it
+      for (int i = 0; i < nparts; ++i) {
+        // important: the weights should be non-negative
+        weights[i] =
+            std::max(static_cast<SCOTCH_Num>(1),
+                     static_cast<SCOTCH_Num>(std::round(target.vertexWeights()[i] * scale)));
+      }
+    }
+
+    int edgecut = 0;
+    std::vector<SCOTCH_Num> part(cellCount);
+
+    SCOTCH_Dgraph dgraph;
+    SCOTCH_Strat strategy;
+    SCOTCH_Arch arch;
+
+    SCOTCH_Num processCount = graph.processCount();
+    SCOTCH_Num partCount = nparts;
+    SCOTCH_Num stratflag = mode;
+
+    SCOTCH_randomProc(rank);
+    SCOTCH_randomSeed(seed);
+    SCOTCH_randomReset();
+
+    SCOTCH_dgraphInit(&dgraph, comm);
+    SCOTCH_stratInit(&strategy);
+    SCOTCH_archInit(&arch);
+
+    SCOTCH_dgraphBuild(&dgraph,
+                       0,
+                       graph.localVertexCount(),
+                       graph.localVertexCount(),
+                       adjDisp.data(),
+                       nullptr,
+                       vertexWeights.empty() ? nullptr : vertexWeights.data(),
+                       nullptr,
+                       graph.localEdgeCount(),
+                       graph.localEdgeCount(),
+                       adj.data(),
+                       nullptr,
+                       edgeWeights.empty() ? nullptr : edgeWeights.data());
+    SCOTCH_stratDgraphMapBuild(&strategy, stratflag, processCount, partCount, target.imbalance());
+    SCOTCH_archCmpltw(&arch, partCount, weights.data());
+
+    SCOTCH_dgraphMap(&dgraph, &arch, &strategy, part.data());
+
+    SCOTCH_archExit(&arch);
+    SCOTCH_stratExit(&strategy);
+    SCOTCH_dgraphExit(&dgraph);
+
+    for (int i = 0; i < cellCount; i++) {
+      partition[i] = part[i];
+    }
+
+    return PartitioningResult::SUCCESS;
+  }
 #endif // USE_MPI
 
-private:
-	int mode;
+  private:
+  int mode;
 };
 
-}
+} // namespace PUML
 
 #endif // PUML_PARTITIONPTSCOTCH_H
diff --git a/PartitionTarget.h b/PartitionTarget.h
index e1c103d..f136377 100644
--- a/PartitionTarget.h
+++ b/PartitionTarget.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,76 +9,55 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_PARTITION_TARGET_H
 #define PUML_PARTITION_TARGET_H
 
+#include <cstddef>
 #ifdef USE_MPI
-#include <mpi.h>
 #endif // USE_MPI
 
 #include <vector>
 #include <cassert>
-#include "Topology.h"
-#include "PUML.h"
 
-#include "utils/logger.h"
-
-namespace PUML
-{
+namespace PUML {
 
 class PartitionTarget {
-public:
-    PartitionTarget() {}
-
-    void setVertexWeightsUniform(std::size_t vertexCount) {
-        m_vertexCount = vertexCount;
-        m_vertexWeights.clear();
-    }
+  public:
+  PartitionTarget() = default;
 
-    void setVertexWeights(const std::vector<double>& vertexWeights) {
-        m_vertexWeights = vertexWeights;
-        m_vertexCount = vertexWeights.size();
-    }
+  void setVertexWeightsUniform(std::size_t vertexCount) {
+    m_vertexCount = vertexCount;
+    m_vertexWeights.clear();
+  }
 
-    void setVertexWeights(std::size_t vertexCount, double* vertexWeights) {
-        m_vertexCount = vertexCount;
-        m_vertexWeights = std::vector<double>(vertexWeights, vertexWeights + vertexCount);
-    }
+  void setVertexWeights(const std::vector<double>& vertexWeights) {
+    m_vertexWeights = vertexWeights;
+    m_vertexCount = vertexWeights.size();
+  }
 
-    void setImbalance(double imbalance) {
-        m_imbalance = imbalance;
-    }
+  void setVertexWeights(std::size_t vertexCount, double* vertexWeights) {
+    m_vertexCount = vertexCount;
+    m_vertexWeights = std::vector<double>(vertexWeights, vertexWeights + vertexCount);
+  }
 
-    const std::vector<double>& vertexWeights() const {
-        return m_vertexWeights;
-    }
+  void setImbalance(double imbalance) { m_imbalance = imbalance; }
 
-    bool vertexWeightsUniform() const {
-        return m_vertexWeights.empty();
-    }
+  [[nodiscard]] auto vertexWeights() const -> const std::vector<double>& { return m_vertexWeights; }
 
-    bool edgeWeightsUniform() const {
-        // TODO: implement non-uniform
-        return true;
-    }
+  [[nodiscard]] auto vertexWeightsUniform() const -> bool { return m_vertexWeights.empty(); }
 
-    double imbalance() const {
-        return m_imbalance;
-    }
+  [[nodiscard]] auto imbalance() const -> double { return m_imbalance; }
 
-    std::size_t vertexCount() const {
-        return m_vertexCount;
-    }
+  [[nodiscard]] auto vertexCount() const -> std::size_t { return m_vertexCount; }
 
-private:
-    std::vector<double> m_vertexWeights;
-    std::size_t m_vertexCount;
-    double m_imbalance;
+  private:
+  std::vector<double> m_vertexWeights;
+  std::size_t m_vertexCount{};
+  double m_imbalance{};
 };
 
-}
+} // namespace PUML
 #endif
diff --git a/README.md b/README.md
index 589c4b3..bbeccda 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,16 @@
+<!--
+    SPDX-FileCopyrightText: 2017-2024 Technical University of Munich
+
+    SPDX-License-Identifier: BSD-3-Clause
+-->
+
 # PUML2
-a Parallel Unstructured Mesh Library for reading large unstructured meshes
+A **P**arallel **U**nstructured **M**esh **L**ibrary optimized for reading large unstructured meshes, version **2**.
 
 PUML2 is a parallel reader for large unstructured meshes.
-Currently, it supports meshes stored in the [XDMF format](http://xdmf.org) with [HDF5](https://support.hdfgroup.org/HDF5/).
-PUML2 reads meshes in O(n + m/n) and requires O(n + m/n) memory per MPI process where n is the total number of processes and m the number of elements.
+Currently, it supports meshes stored in [HDF5](https://support.hdfgroup.org/HDF5/), described by the [XDMF format](http://xdmf.org).
+PUML2 reads meshes in $\mathcal{O}(n + m/n)$ and requires $\mathcal{O}(n + m/n)$ memory per MPI process where $n$ is the total number of processes and $m$ the number of elements.
 The current implementation is limited to tetrahedral meshes but the code is prepared to support hexahedral meshes as well.
 
-PUML2 is a C++ header-only library based on C++11.
+PUML2 is a C++ header-only library written in C++17.
 You simply copy the header files into your project and start using it.
diff --git a/Topology.h b/Topology.h
index c0610e1..12c67f7 100644
--- a/Topology.h
+++ b/Topology.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,95 +9,97 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
 #ifndef PUML_TOPOLOGY_H
 #define PUML_TOPOLOGY_H
 
-namespace PUML
-{
+namespace PUML {
 
 /**
  * The topology types
  */
-enum TopoType {
-	TETRAHEDRON,
-	HEXAHEDRON
-};
+enum TopoType { TETRAHEDRON, HEXAHEDRON };
 
-namespace internal
-{
+namespace internal {
 
 /**
  * Class describing the different topologies
  */
-template<TopoType>
-class Topology
-{
-public:
-	/**
-	 * @return The number of vertices of a cell
-	 */
-	static constexpr unsigned int cellvertices();
-
-	/**
-	 * @return The number of faces of a cell
-	 */
-	static constexpr unsigned int cellfaces();
-
-	/**
-	 * @return The number of edges for a cell
-	 */
-	static constexpr unsigned int celledges();
-
-	/**
-	 * @return The number of edges for a face
-	 */
-	static constexpr unsigned int faceedges()
-	{ return facevertices(); /* This is always the same */ }
-
-	/**
-	 * @return The number vertices for a face
-	 */
-	static constexpr unsigned int facevertices();
+template <TopoType>
+class Topology {
+  public:
+  /**
+   * @return The number of vertices of a cell
+   */
+  static constexpr auto cellvertices() -> unsigned int;
+
+  /**
+   * @return The number of faces of a cell
+   */
+  static constexpr auto cellfaces() -> unsigned int;
+
+  /**
+   * @return The number of edges for a cell
+   */
+  static constexpr auto celledges() -> unsigned int;
+
+  /**
+   * @return The number of edges for a face
+   */
+  static constexpr auto faceedges() -> unsigned int {
+    return facevertices(); /* This is always the same */
+  }
+
+  /**
+   * @return The number vertices for a face
+   */
+  static constexpr auto facevertices() -> unsigned int;
 };
 
-template<> inline
-constexpr unsigned int Topology<TETRAHEDRON>::cellvertices()
-{ return 4; }
-
-template<> inline
-constexpr unsigned int Topology<HEXAHEDRON>::cellvertices()
-{ return 8; }
-
-template<> inline
-constexpr unsigned int Topology<TETRAHEDRON>::cellfaces()
-{ return 4; }
+template <>
+constexpr auto Topology<TETRAHEDRON>::cellvertices() -> unsigned int {
+  return 4;
+}
 
-template<> inline
-constexpr unsigned int Topology<HEXAHEDRON>::cellfaces()
-{ return 6; }
+template <>
+constexpr auto Topology<HEXAHEDRON>::cellvertices() -> unsigned int {
+  return 8;
+}
 
-template<> inline
-constexpr unsigned int Topology<TETRAHEDRON>::celledges()
-{ return 6; }
+template <>
+constexpr auto Topology<TETRAHEDRON>::cellfaces() -> unsigned int {
+  return 4;
+}
 
-template<> inline
-constexpr unsigned int Topology<HEXAHEDRON>::celledges()
-{ return 12; }
+template <>
+constexpr auto Topology<HEXAHEDRON>::cellfaces() -> unsigned int {
+  return 6;
+}
 
-template<> inline
-constexpr unsigned int Topology<TETRAHEDRON>::facevertices()
-{ return 3; }
+template <>
+constexpr auto Topology<TETRAHEDRON>::celledges() -> unsigned int {
+  return 6;
+}
 
-template<> inline
-constexpr unsigned int Topology<HEXAHEDRON>::facevertices()
-{ return 4; }
+template <>
+constexpr auto Topology<HEXAHEDRON>::celledges() -> unsigned int {
+  return 12;
+}
 
+template <>
+constexpr auto Topology<TETRAHEDRON>::facevertices() -> unsigned int {
+  return 3;
 }
 
+template <>
+constexpr auto Topology<HEXAHEDRON>::facevertices() -> unsigned int {
+  return 4;
 }
 
-#endif // PUML_TOPOLOGY_H
\ No newline at end of file
+} // namespace internal
+
+} // namespace PUML
+
+#endif // PUML_TOPOLOGY_H
diff --git a/TypeInference.h b/TypeInference.h
index daa2a73..b90c544 100644
--- a/TypeInference.h
+++ b/TypeInference.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,56 +9,177 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author David Schneller <david.schneller@tum.de>
  */
 
 #ifndef PUML_TYPE_INFERENCE_H
 #define PUML_TYPE_INFERENCE_H
 
+#include <hdf5.h>
 #ifdef USE_MPI
 #include <mpi.h>
 #endif // USE_MPI
 
-#include <hdf5.h>
-
-namespace PUML
-{
+namespace PUML {
 
 #ifdef USE_MPI
-template<typename T> class MPITypeInfer { public: static MPI_Datatype type() { return MPI_BYTE; } };
-template<> class MPITypeInfer<char> { public: static MPI_Datatype type() { return MPI_CHAR; } };
-template<> class MPITypeInfer<signed char> { public: static MPI_Datatype type() { return MPI_SIGNED_CHAR; } };
-template<> class MPITypeInfer<unsigned char> { public: static MPI_Datatype type() { return MPI_UNSIGNED_CHAR; } };
-template<> class MPITypeInfer<short> { public: static MPI_Datatype type() { return MPI_SHORT; } };
-template<> class MPITypeInfer<unsigned short> { public: static MPI_Datatype type() { return MPI_UNSIGNED_SHORT; } };
-template<> class MPITypeInfer<int> { public: static MPI_Datatype type() { return MPI_INT; } };
-template<> class MPITypeInfer<unsigned> { public: static MPI_Datatype type() { return MPI_UNSIGNED; } };
-template<> class MPITypeInfer<long> { public: static MPI_Datatype type() { return MPI_LONG; } };
-template<> class MPITypeInfer<unsigned long> { public: static MPI_Datatype type() { return MPI_UNSIGNED_LONG; } };
-template<> class MPITypeInfer<long long> { public: static MPI_Datatype type() { return MPI_LONG_LONG; } };
-template<> class MPITypeInfer<unsigned long long> { public: static MPI_Datatype type() { return MPI_UNSIGNED_LONG_LONG; } };
-template<> class MPITypeInfer<float> { public: static MPI_Datatype type() { return MPI_FLOAT; } };
-template<> class MPITypeInfer<double> { public: static MPI_Datatype type() { return MPI_DOUBLE; } };
-template<> class MPITypeInfer<long double> { public: static MPI_Datatype type() { return MPI_LONG_DOUBLE; } };
-template<> class MPITypeInfer<wchar_t> { public: static MPI_Datatype type() { return MPI_WCHAR; } };
+template <typename T>
+class MPITypeInfer {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_BYTE; }
+};
+template <>
+class MPITypeInfer<char> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_CHAR; }
+};
+template <>
+class MPITypeInfer<signed char> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_SIGNED_CHAR; }
+};
+template <>
+class MPITypeInfer<unsigned char> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_UNSIGNED_CHAR; }
+};
+template <>
+class MPITypeInfer<short> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_SHORT; }
+};
+template <>
+class MPITypeInfer<unsigned short> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_UNSIGNED_SHORT; }
+};
+template <>
+class MPITypeInfer<int> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_INT; }
+};
+template <>
+class MPITypeInfer<unsigned> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_UNSIGNED; }
+};
+template <>
+class MPITypeInfer<long> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_LONG; }
+};
+template <>
+class MPITypeInfer<unsigned long> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_UNSIGNED_LONG; }
+};
+template <>
+class MPITypeInfer<long long> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_LONG_LONG; }
+};
+template <>
+class MPITypeInfer<unsigned long long> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_UNSIGNED_LONG_LONG; }
+};
+template <>
+class MPITypeInfer<float> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_FLOAT; }
+};
+template <>
+class MPITypeInfer<double> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_DOUBLE; }
+};
+template <>
+class MPITypeInfer<long double> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_LONG_DOUBLE; }
+};
+template <>
+class MPITypeInfer<wchar_t> {
+  public:
+  static auto type() -> MPI_Datatype { return MPI_WCHAR; }
+};
 #endif
 
-template<typename T> class HDF5TypeInfer { public: static hid_t type() { return -1; } };
-template<> class HDF5TypeInfer<char> { public: static hid_t type() { return H5T_NATIVE_CHAR; } };
-template<> class HDF5TypeInfer<signed char> { public: static hid_t type() { return H5T_NATIVE_SCHAR; } };
-template<> class HDF5TypeInfer<unsigned char> { public: static hid_t type() { return H5T_NATIVE_UCHAR; } };
-template<> class HDF5TypeInfer<short> { public: static hid_t type() { return H5T_NATIVE_SHORT; } };
-template<> class HDF5TypeInfer<unsigned short> { public: static hid_t type() { return H5T_NATIVE_USHORT; } };
-template<> class HDF5TypeInfer<int> { public: static hid_t type() { return H5T_NATIVE_INT; } };
-template<> class HDF5TypeInfer<unsigned> { public: static hid_t type() { return H5T_NATIVE_UINT; } };
-template<> class HDF5TypeInfer<long> { public: static hid_t type() { return H5T_NATIVE_LONG; } };
-template<> class HDF5TypeInfer<unsigned long> { public: static hid_t type() { return H5T_NATIVE_ULONG; } };
-template<> class HDF5TypeInfer<long long> { public: static hid_t type() { return H5T_NATIVE_LLONG; } };
-template<> class HDF5TypeInfer<unsigned long long> { public: static hid_t type() { return H5T_NATIVE_ULLONG; } };
-template<> class HDF5TypeInfer<float> { public: static hid_t type() { return H5T_NATIVE_FLOAT; } };
-template<> class HDF5TypeInfer<double> { public: static hid_t type() { return H5T_NATIVE_DOUBLE; } };
-template<> class HDF5TypeInfer<long double> { public: static hid_t type() { return H5T_NATIVE_LDOUBLE; } };
+template <typename T>
+class HDF5TypeInfer {
+  public:
+  static auto type() -> hid_t { return -1; }
+};
+template <>
+class HDF5TypeInfer<char> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_CHAR; }
+};
+template <>
+class HDF5TypeInfer<signed char> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_SCHAR; }
+};
+template <>
+class HDF5TypeInfer<unsigned char> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_UCHAR; }
+};
+template <>
+class HDF5TypeInfer<short> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_SHORT; }
+};
+template <>
+class HDF5TypeInfer<unsigned short> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_USHORT; }
+};
+template <>
+class HDF5TypeInfer<int> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_INT; }
+};
+template <>
+class HDF5TypeInfer<unsigned> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_UINT; }
+};
+template <>
+class HDF5TypeInfer<long> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_LONG; }
+};
+template <>
+class HDF5TypeInfer<unsigned long> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_ULONG; }
+};
+template <>
+class HDF5TypeInfer<long long> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_LLONG; }
+};
+template <>
+class HDF5TypeInfer<unsigned long long> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_ULLONG; }
+};
+template <>
+class HDF5TypeInfer<float> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_FLOAT; }
+};
+template <>
+class HDF5TypeInfer<double> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_DOUBLE; }
+};
+template <>
+class HDF5TypeInfer<long double> {
+  public:
+  static auto type() -> hid_t { return H5T_NATIVE_LDOUBLE; }
+};
 
 } // namespace PUML
 
diff --git a/Upward.h b/Upward.h
index a4b9037..8bf8954 100644
--- a/Upward.h
+++ b/Upward.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -16,106 +18,103 @@
 #include <algorithm>
 #include <cassert>
 #include <cstring>
+#include <vector>
+#include <iterator>
 
 #include "PUML.h"
 #include "Topology.h"
 
-namespace PUML
-{
-
-class Upward
-{
-public:
-	/**
-	 * Returns all local cell ids for a face
-	 *
-	 * @param puml The PUML mesh
-	 * @param cell The face for which the cells should be returned
-	 * @param lid The local ids of the cells
-	 */
-	template<TopoType Topo>
-	static void cells(const PUML<Topo> &puml, const typename PUML<Topo>::face_t &face, int* lid)
-	{
-		memcpy(lid, face.m_upward, 2 * sizeof(int));
-	}
-
-	template<TopoType Topo, bool M = false>
-	static void faces(const PUML<Topo> &puml, const typename PUML<Topo>::edge_t &edge, std::vector<int> &lid)
-	{
-		merge<M>(lid, edge.m_upward);
-	}
-
-	template<TopoType Topo, bool M = false>
-	static void cells(const PUML<Topo> &puml, const typename PUML<Topo>::edge_t &edge, std::vector<int> &lid)
-	{
-		std::vector<int> faceIds;
-		faces(puml, edge, faceIds);
-
-		std::vector<int> cellIds;
-		for (std::vector<int>::const_iterator it = faceIds.begin();
-				it != faceIds.end(); ++it) {
-			int tmp[2];
-			cells(puml, puml.faces()[*it], tmp);
-			unsigned int c = (tmp[1] < 0 ? 1 : 2);
-
-			std::vector<int> merged;
-			std::set_union(cellIds.begin(), cellIds.end(),
-				tmp, tmp+c,
-				std::back_inserter(merged));
-			std::swap(merged, cellIds);
-		}
-
-		if (M)
-			merge<true>(lid, cellIds);
-		else
-			std::swap(lid, cellIds);
-	}
-
-	template<TopoType Topo, bool M = false>
-	static void edges(const PUML<Topo> &puml, const typename PUML<Topo>::vertex_t &vertex, std::vector<int> &lid)
-	{
-		merge<M>(lid, vertex.m_upward);
-	}
-
-	template<TopoType Topo, bool M = false>
-	static void cells(const PUML<Topo> &puml, const typename PUML<Topo>::vertex_t &vertex, std::vector<int> &lid)
-	{
-		std::vector<int> edgeIds;
-		edges(puml, vertex, edgeIds);
-
-		std::vector<int> cellIds;
-		for (std::vector<int>::const_iterator it = edgeIds.begin();
-				it != edgeIds.end(); ++it) {
-			merge<true>(cellIds, puml.edges()[*it].m_upward);
-		}
-
-		if (M)
-			merge<true>(lid, cellIds);
-		else
-			std::swap(lid, cellIds);
-	}
-
-private:
-	template<bool M>
-	static void merge(std::vector<int> &res, const std::vector<int> &v);
+namespace PUML {
+
+class Upward {
+  public:
+  /**
+   * Returns all local cell ids for a face
+   *
+   * @param puml The PUML mesh
+   * @param cell The face for which the cells should be returned
+   * @param lid The local ids of the cells
+   */
+  template <TopoType Topo>
+  static void cells(const PUML<Topo>& puml, const typename PUML<Topo>::face_t& face, int* lid) {
+    memcpy(lid, face.m_upward, 2 * sizeof(int));
+  }
+
+  template <TopoType Topo, bool M = false>
+  static void faces(const PUML<Topo>& puml,
+                    const typename PUML<Topo>::edge_t& edge,
+                    std::vector<int>& lid) {
+    merge<M>(lid, edge.m_upward);
+  }
+
+  template <TopoType Topo, bool M = false>
+  static void cells(const PUML<Topo>& puml,
+                    const typename PUML<Topo>::edge_t& edge,
+                    std::vector<int>& lid) {
+    std::vector<int> faceIds;
+    faces(puml, edge, faceIds);
+
+    std::vector<int> cellIds;
+    for (int faceId : faceIds) {
+      int tmp[2];
+      cells(puml, puml.faces()[faceId], tmp);
+      const unsigned int c = (tmp[1] < 0 ? 1 : 2);
+
+      std::vector<int> merged;
+      std::set_union(cellIds.begin(), cellIds.end(), tmp, tmp + c, std::back_inserter(merged));
+      std::swap(merged, cellIds);
+    }
+
+    if (M) {
+      merge<true>(lid, cellIds);
+    } else {
+      std::swap(lid, cellIds);
+    }
+  }
+
+  template <TopoType Topo, bool M = false>
+  static void edges(const PUML<Topo>& puml,
+                    const typename PUML<Topo>::vertex_t& vertex,
+                    std::vector<int>& lid) {
+    merge<M>(lid, vertex.m_upward);
+  }
+
+  template <TopoType Topo, bool M = false>
+  static void cells(const PUML<Topo>& puml,
+                    const typename PUML<Topo>::vertex_t& vertex,
+                    std::vector<int>& lid) {
+    std::vector<int> edgeIds;
+    edges(puml, vertex, edgeIds);
+
+    std::vector<int> cellIds;
+    for (int edgeId : edgeIds) {
+      merge<true>(cellIds, puml.edges()[edgeId].m_upward);
+    }
+
+    if (M) {
+      merge<true>(lid, cellIds);
+    } else {
+      std::swap(lid, cellIds);
+    }
+  }
+
+  private:
+  template <bool M>
+  static void merge(std::vector<int>& res, const std::vector<int>& v);
 };
 
-template<> inline
-void Upward::merge<false>(std::vector<int> &res, const std::vector<int> &v)
-{
-	res = v;
+template <>
+inline void Upward::merge<false>(std::vector<int>& res, const std::vector<int>& v) {
+  res = v;
 }
 
-template<> inline
-void Upward::merge<true>(std::vector<int> &res, const std::vector<int> &v)
-{
-	std::vector<int> tmp;
-	std::set_union(v.begin(), v.end(),
-		res.begin(), res.end(),
-		std::back_inserter(tmp));
-	std::swap(tmp, res);
+template <>
+inline void Upward::merge<true>(std::vector<int>& res, const std::vector<int>& v) {
+  std::vector<int> tmp;
+  std::set_union(v.begin(), v.end(), res.begin(), res.end(), std::back_inserter(tmp));
+  std::swap(tmp, res);
 }
 
-}
+} // namespace PUML
 
 #endif // PUML_UPWARD_H
diff --git a/Utils.h b/Utils.h
index eeb324e..eea5659 100644
--- a/Utils.h
+++ b/Utils.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -18,49 +20,42 @@
 #include "PUML.h"
 #include "Topology.h"
 
-namespace PUML
-{
+namespace PUML::internal {
 
-namespace internal
-{
-
-template<TopoType Topo, typename E>
-struct ElementVector
-{
-	static const std::vector<E>& elements(const PUML<Topo> &puml);
+template <TopoType Topo, typename E>
+struct ElementVector {
+  static auto elements(const PUML<Topo>& puml) -> const std::vector<E>&;
 };
 
-template<TopoType Topo>
-struct ElementVector<Topo, typename PUML<Topo>::cell_t>
-{
-	static const std::vector<typename PUML<Topo>::cell_t>& elements(const PUML<Topo> &puml)
-	{ return puml.cells(); }
+template <TopoType Topo>
+struct ElementVector<Topo, typename PUML<Topo>::cell_t> {
+  static auto elements(const PUML<Topo>& puml) -> const std::vector<typename PUML<Topo>::cell_t>& {
+    return puml.cells();
+  }
 };
 
-template<TopoType Topo>
-struct ElementVector<Topo, typename PUML<Topo>::vertex_t>
-{
-	static const std::vector<typename PUML<Topo>::vertex_t>& elements(const PUML<Topo> &puml)
-	{ return puml.vertices(); }
+template <TopoType Topo>
+struct ElementVector<Topo, typename PUML<Topo>::vertex_t> {
+  static auto elements(const PUML<Topo>& puml)
+      -> const std::vector<typename PUML<Topo>::vertex_t>& {
+    return puml.vertices();
+  }
 };
 
-class Utils
-{
-public:
-	/**
-	 * Get the global ids for a set of local ids
-	 */
-	template<TopoType Topo, typename E, unsigned int N>
-	static void l2g(const PUML<Topo> &puml, const unsigned int lid[N], unsigned long gid[N])
-	{
-		const std::vector<E>& elements = ElementVector<Topo, E>::elements(puml);
-		for (unsigned int i = 0; i < N; i++)
-			gid[i] = elements[lid[i]].m_gid;
-	}
+class Utils {
+  public:
+  /**
+   * Get the global ids for a set of local ids
+   */
+  template <TopoType Topo, typename E, unsigned int N>
+  static void l2g(const PUML<Topo>& puml, const unsigned int lid[N], unsigned long gid[N]) {
+    const std::vector<E>& elements = ElementVector<Topo, E>::elements(puml);
+    for (unsigned int i = 0; i < N; i++) {
+      gid[i] = elements[lid[i]].m_gid;
+    }
+  }
 };
 
-}
-
-}
+} // namespace PUML::internal
 
-#endif // PUML_UTILS_H
\ No newline at end of file
+#endif // PUML_UTILS_H
diff --git a/VertexElementMap.h b/VertexElementMap.h
index 1475143..28042ac 100644
--- a/VertexElementMap.h
+++ b/VertexElementMap.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +9,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -15,106 +17,89 @@
 
 #include <algorithm>
 #include <cstring>
+#include <functional>
 #include <unordered_map>
 
-namespace PUML
-{
-
-namespace internal
-{
+namespace PUML::internal {
 
 /**
  * Mapps from a list of local vertex ids to the local id of the element
  */
-template<unsigned int N>
-class VertexElementMap
-{
-private:
-	/**
-	 * Description of a element by the local vertex ids
-	 */
-	struct Element
-	{
-		/** The vertices that define this element */
-		unsigned int vertices[N];
-
-		Element(const unsigned int vertices[N])
-		{
-			memcpy(this->vertices, vertices, N*sizeof(unsigned int));
-			std::sort(this->vertices, this->vertices+N);
-		}
-
-		bool operator==(const Element &other) const
-		{
-			return memcmp(vertices, other.vertices, N*sizeof(unsigned int)) == 0;
-		}
-	};
-
-	struct ElementHash
-	{
-		std::size_t operator()(const Element &element) const
-		{
-			std::size_t h = std::hash<unsigned int>{}(element.vertices[0]);
-			for (unsigned int i = 1; i < N; i++)
-				hash_combine(h, element.vertices[i]);
-
-			return h;
-		}
-	};
-
-	std::unordered_map<Element, unsigned int, ElementHash> m_elements;
-
-public:
-	VertexElementMap()
-	{ }
-
-	unsigned int add(const unsigned int vertices[N])
-	{
-		const Element e(vertices);
-
-		typename std::unordered_map<Element, unsigned int, ElementHash>::const_iterator it = m_elements.find(e);
-		if (it == m_elements.end()) {
-			unsigned int id = m_elements.size();
-			it = m_elements.emplace(e, id).first;
-		}
-
-		return it->second;
-	}
-
-	size_t size() const
-	{
-		return m_elements.size();
-	}
-
-	int find(unsigned int vertices[N]) const
-	{
-		typename std::unordered_map<Element, unsigned int, ElementHash>::const_iterator it = m_elements.find(vertices);
-
-		if (it == m_elements.end())
-			return -1;
-
-		return it->second;
-	}
-
-	void clear()
-	{
-		m_elements.clear();
-	}
-
-private:
-	/**
-	 * Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
-	 */
-	template<typename T>
-	static void hash_combine(std::size_t& seed, const T& v)
-	{
-		std::hash<T> hasher;
-		seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-	}
+template <unsigned int N>
+class VertexElementMap {
+  private:
+  /**
+   * Description of a element by the local vertex ids
+   */
+  struct Element {
+    /** The vertices that define this element */
+    unsigned int vertices[N]{};
+
+    Element(const unsigned int vertices[N]) {
+      memcpy(this->vertices, vertices, N * sizeof(unsigned int));
+      std::sort(this->vertices, this->vertices + N);
+    }
+
+    auto operator==(const Element& other) const -> bool {
+      return memcmp(vertices, other.vertices, N * sizeof(unsigned int)) == 0;
+    }
+  };
+
+  struct ElementHash {
+    auto operator()(const Element& element) const -> std::size_t {
+      std::size_t h = std::hash<unsigned int>{}(element.vertices[0]);
+      for (unsigned int i = 1; i < N; i++) {
+        hashCombine(h, element.vertices[i]);
+      }
+
+      return h;
+    }
+  };
+
+  std::unordered_map<Element, unsigned int, ElementHash> m_elements;
+
+  public:
+  VertexElementMap() = default;
+
+  auto add(const unsigned int vertices[N]) -> unsigned int {
+    const Element e(vertices);
+
+    typename std::unordered_map<Element, unsigned int, ElementHash>::const_iterator it =
+        m_elements.find(e);
+    if (it == m_elements.end()) {
+      unsigned int id = m_elements.size();
+      it = m_elements.emplace(e, id).first;
+    }
+
+    return it->second;
+  }
+
+  [[nodiscard]] auto size() const -> size_t { return m_elements.size(); }
+
+  auto find(unsigned int vertices[N]) const -> int {
+    typename std::unordered_map<Element, unsigned int, ElementHash>::const_iterator it =
+        m_elements.find(vertices);
+
+    if (it == m_elements.end()) {
+      return -1;
+    }
+
+    return it->second;
+  }
+
+  void clear() { m_elements.clear(); }
+
+  private:
+  /**
+   * Taken from: https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
+   */
+  template <typename T>
+  static void hashCombine(std::size_t& seed, const T& v) {
+    std::hash<T> hasher;
+    seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+  }
 };
 
-}
-
-}
+} // namespace PUML::internal
 
 #endif // PUML_VERTEXELEMENTMAP_H
diff --git a/cmake/FindPTSCOTCH.cmake b/cmake/FindPTSCOTCH.cmake
index 2d619b4..06d55e1 100644
--- a/cmake/FindPTSCOTCH.cmake
+++ b/cmake/FindPTSCOTCH.cmake
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: CECILL-C
+
 ###
 #
 # @copyright (c) 2009-2014 The University of Tennessee and The University
diff --git a/cmake/FindParHIP.cmake b/cmake/FindParHIP.cmake
index e35bd28..c054b3f 100644
--- a/cmake/FindParHIP.cmake
+++ b/cmake/FindParHIP.cmake
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: 2023-2024 Technical University of Munich
+#
+# SPDX-License-Identifier: BSD-3-Clause
 
 find_path(PARHIP_INCLUDE_DIR parhip_interface.h
         HINTS ${PARHIP_INCLUDE_DIR} ENV PARHIP_INCLUDE_DIR ${PARHIP_DIR} ENV PARHIP_DIR
diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake
index 0098427..c987d51 100644
--- a/cmake/FindParMETIS.cmake
+++ b/cmake/FindParMETIS.cmake
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: BSD-2-Clause
+
 # - Try to find ParMETIS
 # Once done this will define
 #
diff --git a/test/base-test/puml.cpp b/test/base-test/puml.cpp
index c877926..611d344 100644
--- a/test/base-test/puml.cpp
+++ b/test/base-test/puml.cpp
@@ -1,3 +1,7 @@
+// SPDX-FileCopyrightText: 2017 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +10,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2017 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  */
 
@@ -23,99 +26,99 @@
 
 #include "XdmfWriter/XdmfWriter.h"
 
-int main(int argc, char* argv[])
-{
-	MPI_Init(&argc, &argv);
+int main(int argc, char* argv[]) {
+  MPI_Init(&argc, &argv);
 
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  int rank;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
-	utils::Args args;
-	args.addAdditionalOption("in", "the PUML mesh file");
-	args.addAdditionalOption("out", "the output file");
+  utils::Args args;
+  args.addAdditionalOption("in", "the PUML mesh file");
+  args.addAdditionalOption("out", "the output file");
 
-	switch (args.parse(argc, argv)) {
-	case utils::Args::Error:
-		MPI_Abort(MPI_COMM_WORLD, -1);
-		break;
-	case utils::Args::Help:
-		MPI_Finalize();
-		return 1;
-	}
+  switch (args.parse(argc, argv)) {
+  case utils::Args::Error:
+    MPI_Abort(MPI_COMM_WORLD, -1);
+    break;
+  case utils::Args::Help:
+    MPI_Finalize();
+    return 1;
+  }
 
-	PUML::TETPUML puml;
+  PUML::TETPUML puml;
 
-	std::string infile = args.getAdditionalArgument<const char*>("in");
-	std::string outfile = args.getAdditionalArgument<const char*>("out");
+  std::string infile = args.getAdditionalArgument<const char*>("in");
+  std::string outfile = args.getAdditionalArgument<const char*>("out");
 
-	// Read the mesh
-	logInfo(rank) << "Reading mesh";
-	puml.open((infile + ":/connect").c_str(), (infile + ":/geometry").c_str());
+  // Read the mesh
+  logInfo(rank) << "Reading mesh";
+  puml.open((infile + ":/connect").c_str(), (infile + ":/geometry").c_str());
 
-	logInfo(rank) << "Reading other data";
-	puml.addData((infile  + ":/group").c_str(), PUML::CELL);
+  logInfo(rank) << "Reading other data";
+  puml.addData((infile + ":/group").c_str(), PUML::CELL);
 
-	// Run the partitioning
-	logInfo(rank) << "Creating partitions";
-	int* partition = new int[puml.numOriginalCells()];
+  // Run the partitioning
+  logInfo(rank) << "Creating partitions";
+  int* partition = new int[puml.numOriginalCells()];
 
-	PUML::TETPartitionGraph graph(puml);
-	auto partitioner = PUML::Partition::getPartitioner(PUML::PartitionerType::Parmetis);
-	PUML::PartitionTarget target;
-	target.setVertexWeightsUniform();
-	target.setImbalance(1.01);
-	partitioner->partition(partition, graph, target);
+  PUML::TETPartitionGraph graph(puml);
+  auto partitioner = PUML::Partition::getPartitioner(PUML::PartitionerType::Parmetis);
+  PUML::PartitionTarget target;
+  target.setVertexWeightsUniform();
+  target.setImbalance(1.01);
+  partitioner->partition(partition, graph, target);
 
-	// Redistribute the cells
-	logInfo(rank) << "Redistributing cells";
-	puml.partition(partition);
-	delete [] partition;
+  // Redistribute the cells
+  logInfo(rank) << "Redistributing cells";
+  puml.partition(partition);
+  delete[] partition;
 
-	// Generate the mesh information
-	logInfo(rank) << "Generating mesh information";
-	puml.generateMesh();
+  // Generate the mesh information
+  logInfo(rank) << "Generating mesh information";
+  puml.generateMesh();
 
-	// Write test
-	logInfo(rank) << "Writing test file";
-	const std::vector<PUML::TETPUML::cell_t> &cells = puml.cells();
+  // Write test
+  logInfo(rank) << "Writing test file";
+  const std::vector<PUML::TETPUML::cell_t>& cells = puml.cells();
 
-	const std::vector<PUML::TETPUML::vertex_t> &vertices = puml.vertices();
+  const std::vector<PUML::TETPUML::vertex_t>& vertices = puml.vertices();
 
-	xdmfwriter::XdmfWriter<xdmfwriter::TETRAHEDRON, double> writer(xdmfwriter::POSIX, outfile.c_str());
+  xdmfwriter::XdmfWriter<xdmfwriter::TETRAHEDRON, double> writer(xdmfwriter::POSIX,
+                                                                 outfile.c_str());
 
-	std::vector<const char*> cellVars;
-	cellVars.push_back("group");
-	std::vector<const char*> vertexVars;
-	writer.init(cellVars, vertexVars);
+  std::vector<const char*> cellVars;
+  cellVars.push_back("group");
+  std::vector<const char*> vertexVars;
+  writer.init(cellVars, vertexVars);
 
-	double* coords = new double[vertices.size()*3];
-	for (unsigned int i = 0; i < vertices.size(); i++) {
-		memcpy(&coords[i*3], vertices[i].coordinate(), 3*sizeof(double));
-	}
+  double* coords = new double[vertices.size() * 3];
+  for (unsigned int i = 0; i < vertices.size(); i++) {
+    memcpy(&coords[i * 3], vertices[i].coordinate(), 3 * sizeof(double));
+  }
 
-	unsigned int* cellPoints = new unsigned int[cells.size()*4];
-	for (unsigned int i = 0; i < cells.size(); i++) {
-		PUML::Downward::vertices(puml, cells[i], &cellPoints[i*4]);
-	}
+  unsigned int* cellPoints = new unsigned int[cells.size() * 4];
+  for (unsigned int i = 0; i < cells.size(); i++) {
+    PUML::Downward::vertices(puml, cells[i], &cellPoints[i * 4]);
+  }
 
-	writer.setMesh(cells.size(), cellPoints, vertices.size(), coords);
+  writer.setMesh(cells.size(), cellPoints, vertices.size(), coords);
 
-	writer.addTimeStep(0);
+  writer.addTimeStep(0);
 
-	// Write cell/vertex data
-	double* cellData = new double[cells.size()];
-	for (unsigned int i = 0; i < cells.size(); i++)
-		cellData[i] = puml.cellData(0)[i];
+  // Write cell/vertex data
+  double* cellData = new double[cells.size()];
+  for (unsigned int i = 0; i < cells.size(); i++)
+    cellData[i] = puml.cellData(0)[i];
 
-	writer.writeCellData(0, cellData);
+  writer.writeCellData(0, cellData);
 
-	delete [] cellData;
+  delete[] cellData;
 
-	writer.close();
+  writer.close();
 
-	logInfo(rank) << "Done";
+  logInfo(rank) << "Done";
 
-	MPI_Finalize();
+  MPI_Finalize();
 
-	return 0;
+  return 0;
 }
\ No newline at end of file
diff --git a/test/partition-test/puml.cpp b/test/partition-test/puml.cpp
index 8ec5c75..268459b 100644
--- a/test/partition-test/puml.cpp
+++ b/test/partition-test/puml.cpp
@@ -1,3 +1,7 @@
+// SPDX-FileCopyrightText: 2019-2023 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
 /**
  * @file
  *  This file is part of PUML
@@ -6,7 +10,6 @@
  *  notice in the file 'COPYING' at the root directory of this package
  *  and the copyright notice at https://github.com/TUM-I5/PUMGen
  *
- * @copyright 2019-2023 Technische Universitaet Muenchen
  * @author Sebastian Rettenberger <sebastian.rettenberger@tum.de>
  * @author David Schneller <david.schneller@tum.de>
  */
@@ -35,440 +38,448 @@
 #include "Partition.h"
 
 std::vector<nlohmann::json> argparse(int argc, char* argv[]) {
-	std::vector<nlohmann::json> settings {
-		nlohmann::json()
-	};
-	for (int i = 1; i < argc; ++i) {
-		nlohmann::json js;
-		std::string carg(argv[i]);
-		if (carg.find('=') != std::string::npos) {
-			size_t idx = carg.find('=');
-			std::string name = carg.substr(0, idx);
-			std::string val = carg.substr(idx + 1);
-
-			// a quick&dirty argument type evaluator (it should actually use some trimming as well, but that's TODO, in case it should ever be needed)
-			if ((*val.begin()) == '\"' && (*val.rbegin()) == '\"') {
-				// string
-				js[name] = val.substr(1, val.size() - 2);
-			}
-			else if (val == "true") {
-				// true bool
-				js[name] = true;
-			}
-			else if (val == "false") {
-				// false bool
-				js[name] = false;
-			}
-			else {
-				// number (represented by a double)
-				js[name] = std::stod(val);
-			}
-		}
-		else {
-			std::ifstream settings_file(carg);
-			settings_file >> js;
-		}
-		
-		std::vector<nlohmann::json> jss;
-		if (js.is_array()) {
-			jss = std::vector<nlohmann::json>(js.begin(), js.end());
-		}
-		else {
-			jss = std::vector<nlohmann::json> { js };
-		}
-
-		std::vector<nlohmann::json> nsettings(jss.size() * settings.size());
-		for (int j = 0, l = 0; j < jss.size(); ++j) {
-			for (int k = 0; k < settings.size(); ++k, ++l) {
-				nlohmann::json copy = settings[k];
-				copy.merge_patch(jss[j]);
-				nsettings[l] = copy;
-			}
-		}
-		settings = nsettings;
-	}
-	return settings;
+  std::vector<nlohmann::json> settings{nlohmann::json()};
+  for (int i = 1; i < argc; ++i) {
+    nlohmann::json js;
+    std::string carg(argv[i]);
+    if (carg.find('=') != std::string::npos) {
+      size_t idx = carg.find('=');
+      std::string name = carg.substr(0, idx);
+      std::string val = carg.substr(idx + 1);
+
+      // a quick&dirty argument type evaluator (it should actually use some trimming as well, but
+      // that's TODO, in case it should ever be needed)
+      if ((*val.begin()) == '\"' && (*val.rbegin()) == '\"') {
+        // string
+        js[name] = val.substr(1, val.size() - 2);
+      } else if (val == "true") {
+        // true bool
+        js[name] = true;
+      } else if (val == "false") {
+        // false bool
+        js[name] = false;
+      } else {
+        // number (represented by a double)
+        js[name] = std::stod(val);
+      }
+    } else {
+      std::ifstream settings_file(carg);
+      settings_file >> js;
+    }
+
+    std::vector<nlohmann::json> jss;
+    if (js.is_array()) {
+      jss = std::vector<nlohmann::json>(js.begin(), js.end());
+    } else {
+      jss = std::vector<nlohmann::json>{js};
+    }
+
+    std::vector<nlohmann::json> nsettings(jss.size() * settings.size());
+    for (int j = 0, l = 0; j < jss.size(); ++j) {
+      for (int k = 0; k < settings.size(); ++k, ++l) {
+        nlohmann::json copy = settings[k];
+        copy.merge_patch(jss[j]);
+        nsettings[l] = copy;
+      }
+    }
+    settings = nsettings;
+  }
+  return settings;
 }
 
-template<typename T>
-T readSetting(const nlohmann::json& settings, const std::string& arg)
-{
-	if (settings.find(arg) != settings.end()) {
-		try {
-			return settings[arg];
-		}
-		catch (const std::exception& e) {
-			logError() << "Error while reading argument" << arg << " ... The error was" << e.what();
-			return T(); // TODO: unreachable hint
-		}
-	}
-	else {
-		logError() << "Argument" << arg << "not found in the settings list.";
-		return T(); // TODO: unreachable hint
-	}
+template <typename T>
+T readSetting(const nlohmann::json& settings, const std::string& arg) {
+  if (settings.find(arg) != settings.end()) {
+    try {
+      return settings[arg];
+    } catch (const std::exception& e) {
+      logError() << "Error while reading argument" << arg << " ... The error was" << e.what();
+      return T(); // TODO: unreachable hint
+    }
+  } else {
+    logError() << "Argument" << arg << "not found in the settings list.";
+    return T(); // TODO: unreachable hint
+  }
 }
 
-int main(int argc, char* argv[])
-{
-	int p;
-	MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &p);
-
-	MPI_Comm comm = MPI_COMM_WORLD;
-	int rank, size;
-	MPI_Comm_rank(comm, &rank);
-	MPI_Comm_size(comm, &size);
-
-	if (argc == 1) {
-		logWarning(rank) << "No settings given. They can be set as command line arguments. "
-		    "An argument can have two forms, either a key-value pair in the form \"key=value\" (note the '=', and no spaces around it), "
-			"or it can be a file name which contains a JSON file (note in particular: you may mix files and key-value pairs, and you may supply multiple files). "
-			"Later arguments override earlier arguments. If a file contains an array of configurations, each configuration is tested."
-			"Any following argument is then applied to all configurations. If again an array of configurations is given, the cross product between the two arrays is used.";
-	}
-
-	auto settingsList = argparse(argc, argv);
-
-	
-	logInfo(rank) << "PUML Partition Tester --- Starting";
-	logInfo(rank) << settingsList.size() << "tests";
-
-	PUML::TETPUML* puml = nullptr;
-
-	std::string lastmesh = "";
-	std::string lastmatmesh = "";
-	std::string lastmat = "";
-
-	for (int i = 0; i < settingsList.size(); ++i) {
-		auto settings = settingsList[i];
-		logInfo(rank) << "Test" << (i+1) << "of" << settingsList.size();
-		logInfo(rank) << "Using configuration: " << settings;
-		if (settings.is_null()) {
-			logInfo(rank) << "Aborting...";
-			break;
-		}
-
-		if (puml == nullptr || readSetting<std::string>(settings, "meshfile") != lastmesh) {
-			delete puml;
-			puml = new PUML::TETPUML();
-			std::string infile = readSetting<std::string>(settings, "meshfile");
-			logInfo(rank) << "Reading mesh" << infile;
-			puml->open((infile + ":/connect").c_str(), (infile + ":/geometry").c_str());
-
-			logInfo(rank) << "Reading other data";
-			puml->addData((infile + ":/group").c_str(), PUML::CELL);
-			puml->addData((infile + ":/boundary").c_str(), PUML::CELL);
-
-			logInfo(rank) << "Generating mesh information";
-			puml->generateMesh();
-
-			lastmesh = readSetting<std::string>(settings, "meshfile");
-		}
-
-		std::string partitionerName = readSetting<std::string>(settings, "name");
-		PUML::PartitionerType partitionerType = PUML::PartitionerType::None;
-
-		if (partitionerName == "none") {partitionerType = PUML::PartitionerType::None;}
-		else if (partitionerName == "parmetis") {partitionerType = PUML::PartitionerType::Parmetis;}
-		else if (partitionerName == "parmetis-geo") {partitionerType = PUML::PartitionerType::ParmetisGeometric;}
-		else if (partitionerName == "ptscotch") {partitionerType = PUML::PartitionerType::PtScotch;}
-		else if (partitionerName == "ptscotch-b") {partitionerType = PUML::PartitionerType::PtScotchBalance;}
-		else if (partitionerName == "ptscotch-q") {partitionerType = PUML::PartitionerType::PtScotchQuality;}
-		else if (partitionerName == "ptscotch-bq") {partitionerType = PUML::PartitionerType::PtScotchBalanceQuality;}
-		else if (partitionerName == "ptscotch-s") {partitionerType = PUML::PartitionerType::PtScotchSpeed;}
-		else if (partitionerName == "ptscotch-sb") {partitionerType = PUML::PartitionerType::PtScotchBalanceSpeed;}
-		else if (partitionerName == "parhip-ultrafast") {partitionerType = PUML::PartitionerType::ParHIPUltrafastMesh;}
-		else if (partitionerName == "parhip-fast") {partitionerType = PUML::PartitionerType::ParHIPFastMesh;}
-		else if (partitionerName == "parhip-eco") {partitionerType = PUML::PartitionerType::ParHIPEcoMesh;}
-		else if (partitionerName == "parhip-ultrafastsocial") {partitionerType = PUML::PartitionerType::ParHIPUltrafastSocial;}
-		else if (partitionerName == "parhip-fastsocial") {partitionerType = PUML::PartitionerType::ParHIPFastSocial;}
-		else if (partitionerName == "parhip-ecosocial") {partitionerType = PUML::PartitionerType::ParHIPEcoSocial;}
-		else {
-			logInfo(rank) << "Partitioner name not recognized. Aborting...";
-			MPI_Finalize();
-			return 1;
-		}
-
-		auto partitionInvoker = PUML::TETPartition::getPartitioner(partitionerType);
-		if (partitionInvoker == nullptr) {
-			logInfo(rank) << "Partitioner name not recognized. Aborting...";
-			MPI_Finalize();
-			return 1;
-		}
-
-		logInfo(rank) << "Creating partitions using partitioner" << partitionerName;
-		std::vector<int> partition(puml->numOriginalCells());
-		PUML::TETPartitionGraph graph(*puml);
-
-		std::string vertexweightsetting = readSetting<std::string>(settings, "vw");
-		std::string edgeweightsetting = readSetting<std::string>(settings, "ew");
-		std::string nodeweightsetting = readSetting<std::string>(settings, "nw");
-
-		int * vertexweights = nullptr;
-		int * edgeweights = nullptr;
-		double * nodeweights = nullptr;
-		if (vertexweightsetting == "none") {
-			// blank
-		}
-		else if (vertexweightsetting == "uniform") {
-			vertexweights = new int[puml->cells().size()];
-			for (int i = 0; i < puml->numOriginalCells(); ++i) {
-				vertexweights[i] = 1;
-			}
-		}
-		else if (vertexweightsetting == "random") {
-			vertexweights = new int[puml->cells().size()];
-			std::default_random_engine gen(123);
-			std::uniform_int_distribution<int> distribution(1,1000);
-			for (int i = 0; i < puml->numOriginalCells(); ++i) {
-				vertexweights[i] = distribution(gen);
-			}
-		}
-		else if (vertexweightsetting == "increasing") {
-			vertexweights = new int[puml->cells().size()];
-			for (int i = 0; i < puml->numOriginalCells(); ++i) {
-				vertexweights[i] = i + 1;
-			}
-		}
-		else {
-			logInfo(rank) << "Vertex weight setting not recognized. Aborting...";
-			MPI_Finalize();
-			return 1;
-		}
-
-		if (edgeweightsetting == "none") {
-			// blank
-		}
-		else if (edgeweightsetting == "uniform") {
-			edgeweights = new int[graph.localEdgeCount()];
-			for (int i = 0; i < graph.localEdgeCount(); ++i) {
-				edgeweights[i] = 1;
-			}
-		}
-		else if (edgeweightsetting == "random") {
-			edgeweights = new int[graph.localEdgeCount()];
-			std::default_random_engine gen(123);
-			std::uniform_int_distribution<int> distribution(1,1000);
-			for (int i = 0; i < graph.localEdgeCount(); ++i) {
-				edgeweights[i] = distribution(gen);
-			}
-		}
-		else if (edgeweightsetting == "increasing") {
-			edgeweights = new int[graph.localEdgeCount()];
-			for (int i = 0; i < graph.localEdgeCount(); ++i) {
-				edgeweights[i] = i + 1;
-			}
-		}
-		else {
-			logInfo(rank) << "Edge weight setting not recognized. Aborting...";
-			MPI_Finalize();
-			return 1;
-		}
-
-		int nparts = readSetting<int>(settings, "nparts");
-
-		if (nodeweightsetting == "none") {
-			// empty
-		}
-		else if (nodeweightsetting == "uniform") {
-			nodeweights = new double[nparts];
-			for (int i = 0; i < nparts; ++i) {
-				nodeweights[i] = 1. / nparts;
-			}
-		}
-		else if (nodeweightsetting == "random") {
-			nodeweights = new double[nparts];
-			std::default_random_engine gen(123);
-			std::uniform_int_distribution<int> distribution(1,1000);
-			double sum = 0;
-			for (int i = 0; i < nparts; ++i) {
-				nodeweights[i] = distribution(gen);
-				sum += nodeweights[i];
-			}
-			for (int i = 0; i < nparts; ++i) {
-				nodeweights[i] /= sum;
-			}
-		}
-		else if (nodeweightsetting == "harmonic") {
-			nodeweights = new double[nparts];
-			double sum = 0;
-			for (int i = 0; i < nparts; ++i) {
-				sum += 1. / (i + 1);
-			}
-			for (int i = 0; i < nparts; ++i) {
-				nodeweights[i] = 1. / (sum * (i + 1));
-			}
-		}
-
-		graph.setVertexWeights(vertexweights, 1);
-		graph.setEdgeWeights(edgeweights);
-		double imbalance = readSetting<double>(settings, "imbalance");
-		int seed = readSetting<int>(settings, "seed");
-
-		PUML::PartitionTarget target;
-		target.setImbalance(imbalance);
-		if (nodeweights == nullptr) {
-			target.setVertexWeightsUniform(nparts);
-		}
-		else {
-			target.setVertexWeights(nparts, nodeweights);
-		}
-
-		logDebug() << "Initial stats" << rank << graph.localVertexCount() << graph.localEdgeCount();
-
-		logInfo(rank) << "Starting the actual partitioning.";
-		struct rusage memoryStatsBegin;
-		getrusage(RUSAGE_SELF, &memoryStatsBegin);
-		double startt = MPI_Wtime();
-		partitionInvoker->partition(partition.data(), graph, target, seed);
-		MPI_Barrier(comm);
-		double endt = MPI_Wtime();
-		struct rusage memoryStatsEnd;
-		getrusage(RUSAGE_SELF, &memoryStatsEnd);
-		logInfo(rank) << "Actual partitioning done. Now saving stats";
-
-		// counts weighted and unweighted sizes
-		std::vector<int> partitionSize(nparts*4);
-		std::vector<int> partitionSizeLocal(nparts*4);
-		for (int i = 0; i < puml->cells().size(); ++i) {
-			assert(partition[i] >= 0);
-			assert(partition[i] < nparts);
-			++partitionSizeLocal[partition[i]];
-		}
-		if (vertexweights == nullptr) {
-			for (int i = 0; i < nparts; ++i) {
-				partitionSizeLocal[i + nparts] = partitionSizeLocal[i];
-			}
-		}
-		else {
-			for (int i = 0; i < puml->cells().size(); ++i) {
-				partitionSizeLocal[partition[i] + nparts] += vertexweights[i];
-			}
-		}
-
-		std::vector<int> detailPartition(nparts*nparts);
-		std::vector<int> localDetailPartition(nparts*nparts);
-
-		graph.forEachLocalEdges<int>(
-			partition,
-			[&](int fid, int lid, const int& g, const int& l, int id) {
-				if (g != l) {
-					++partitionSizeLocal[g + nparts * 2];
-					if (edgeweights == nullptr) {
-						++partitionSizeLocal[g + nparts * 3];
-						++localDetailPartition[g * nparts + l];
-					}
-					else {
-						partitionSizeLocal[g + nparts * 3] += edgeweights[id];
-						localDetailPartition[g * nparts + l] += edgeweights[id];
-					}
-				}
-			}
-		);
-
-		MPI_Reduce(partitionSizeLocal.data(), partitionSize.data(), partitionSizeLocal.size(), MPI_INT, MPI_SUM, 0, comm);
-		MPI_Reduce(localDetailPartition.data(), detailPartition.data(), localDetailPartition.size(), MPI_INT, MPI_SUM, 0, comm);
-
-		long peakmemstart = memoryStatsBegin.ru_maxrss;
-		long peakmemend = memoryStatsEnd.ru_maxrss;
-
-		long peakmemlocal[] = {peakmemstart, peakmemend};
-		long peakmem[2];
-		long avgpeakmem[2];
-		MPI_Reduce(peakmemlocal, peakmem, 2, MPI_LONG, MPI_MAX, 0, comm);
-		MPI_Reduce(peakmemlocal, avgpeakmem, 2, MPI_LONG, MPI_SUM, 0, comm);
-
-		double realavgpeakmem[2];
-		realavgpeakmem[0] = (double)avgpeakmem[0] / size;
-		realavgpeakmem[1] = (double)avgpeakmem[1] / size;
-
-		std::vector<long> peakmemrankstart(size);
-		std::vector<long> peakmemrankend(size);
-
-		MPI_Gather(&memoryStatsBegin.ru_maxrss, 1, MPI_LONG, peakmemrankstart.data(), 1, MPI_LONG, 0, comm);
-		MPI_Gather(&memoryStatsEnd.ru_maxrss, 1, MPI_LONG, peakmemrankend.data(), 1, MPI_LONG, 0, comm);
-
-		if (rank == 0) {
-			std::vector<nlohmann::json> node_data(nparts);
-			size_t ec = 0;
-			size_t ecw = 0;
-			size_t ps = 0;
-			size_t psw = 0;
-			size_t pss = 0;
-			size_t psws = 0;
-			for (int i = 0; i < nparts; ++i) {
-				node_data[i]["size"] = partitionSize[i];
-				node_data[i]["size_weighted"] = partitionSize[i + nparts];
-				node_data[i]["cut"] = partitionSize[i + nparts * 2];
-				node_data[i]["cut_weighted"] = partitionSize[i + nparts * 3];
-
-				std::vector<int> detail(detailPartition.begin() + nparts * i, detailPartition.begin() + nparts * (i+1));
-				node_data[i]["cut_weighted_detail"] = detail;
-
-				ec += partitionSize[i + nparts*2];
-				ecw += partitionSize[i + nparts*3];
-				ps = std::max(ps, (size_t)partitionSize[i]);
-				psw = std::max(psw, (size_t)partitionSize[i + nparts]);
-				pss += partitionSize[i];
-				psws += partitionSize[i + nparts];
-			}
-			ec /= 2; ecw /= 2;
-			double mi = (double)ps / ((double)pss / (double)nparts);
-			double miw = (double)psw / ((double)psws / (double)nparts);
-			logDebug(rank) << "End stats:" << ec << ecw << mi << miw << peakmem[0] << peakmem[1] << (endt - startt);
-
-			nlohmann::json output;
-
-			output["cut"] = ec;
-			output["cut_weighted"] = ecw;
-			output["imbalance"] = mi;
-			output["imbalance_weighted"] = miw;
-			output["total_vertices"] = graph.globalVertexCount();
-			output["total_edges"] = graph.globalEdgeCount();
-			output["mpi_size"] = size;
-			output["partition_stats"] = node_data;
-			output["time"] = endt-startt;
-			output["configuration"] = settings;
-
-			output["peakmembeforemax"] = peakmem[0];
-			output["peakmemaftermax"] = peakmem[1];
-			output["peakmembeforeavg"] = realavgpeakmem[0];
-			output["peakmemafteravg"] = realavgpeakmem[1];
-
-			std::vector<nlohmann::json> rankdata(size);
-			for (int i = 0; i < size; ++i) {
-				rankdata[i]["peakmembefore"] = peakmemrankstart[i];
-				rankdata[i]["peakmemafter"] = peakmemrankend[i];
-			}
-
-			output["rank_stats"] = rankdata;
-
-			std::string odir = readSetting<std::string>(settings, "output");
-			if (odir == "-") {
-				// use stdout
-				logInfo(rank) << "Results: " << output.dump(4);
-			}
-			else {
-				size_t hash = std::hash<nlohmann::json>()(output);
-
-				// this is not ideal, but it avoids the dependency on std::filesystem
-				std::ofstream outputFile(odir + "/" + std::to_string(hash) + ".json");
-				outputFile << output;
-			}
-		}
-
-		if (vertexweights != nullptr) {
-			delete[] vertexweights;
-		}
-		if (edgeweights != nullptr) {
-			delete[] edgeweights;
-		}
-		if (nodeweights != nullptr) {
-			delete[] nodeweights;
-		}
-
-		logInfo(rank) << "Done";
-	}
-	MPI_Finalize();
-
-	return 0;
+int main(int argc, char* argv[]) {
+  int p;
+  MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &p);
+
+  MPI_Comm comm = MPI_COMM_WORLD;
+  int rank, size;
+  MPI_Comm_rank(comm, &rank);
+  MPI_Comm_size(comm, &size);
+
+  if (argc == 1) {
+    logWarning(rank)
+        << "No settings given. They can be set as command line arguments. "
+           "An argument can have two forms, either a key-value pair in the form \"key=value\" "
+           "(note the '=', and no spaces around it), "
+           "or it can be a file name which contains a JSON file (note in particular: you may mix "
+           "files and key-value pairs, and you may supply multiple files). "
+           "Later arguments override earlier arguments. If a file contains an array of "
+           "configurations, each configuration is tested."
+           "Any following argument is then applied to all configurations. If again an array of "
+           "configurations is given, the cross product between the two arrays is used.";
+  }
+
+  auto settingsList = argparse(argc, argv);
+
+  logInfo(rank) << "PUML Partition Tester --- Starting";
+  logInfo(rank) << settingsList.size() << "tests";
+
+  PUML::TETPUML* puml = nullptr;
+
+  std::string lastmesh = "";
+  std::string lastmatmesh = "";
+  std::string lastmat = "";
+
+  for (int i = 0; i < settingsList.size(); ++i) {
+    auto settings = settingsList[i];
+    logInfo(rank) << "Test" << (i + 1) << "of" << settingsList.size();
+    logInfo(rank) << "Using configuration: " << settings;
+    if (settings.is_null()) {
+      logInfo(rank) << "Aborting...";
+      break;
+    }
+
+    if (puml == nullptr || readSetting<std::string>(settings, "meshfile") != lastmesh) {
+      delete puml;
+      puml = new PUML::TETPUML();
+      std::string infile = readSetting<std::string>(settings, "meshfile");
+      logInfo(rank) << "Reading mesh" << infile;
+      puml->open((infile + ":/connect").c_str(), (infile + ":/geometry").c_str());
+
+      logInfo(rank) << "Reading other data";
+      puml->addData((infile + ":/group").c_str(), PUML::CELL);
+      puml->addData((infile + ":/boundary").c_str(), PUML::CELL);
+
+      logInfo(rank) << "Generating mesh information";
+      puml->generateMesh();
+
+      lastmesh = readSetting<std::string>(settings, "meshfile");
+    }
+
+    std::string partitionerName = readSetting<std::string>(settings, "name");
+    PUML::PartitionerType partitionerType = PUML::PartitionerType::None;
+
+    if (partitionerName == "none") {
+      partitionerType = PUML::PartitionerType::None;
+    } else if (partitionerName == "parmetis") {
+      partitionerType = PUML::PartitionerType::Parmetis;
+    } else if (partitionerName == "parmetis-geo") {
+      partitionerType = PUML::PartitionerType::ParmetisGeometric;
+    } else if (partitionerName == "ptscotch") {
+      partitionerType = PUML::PartitionerType::PtScotch;
+    } else if (partitionerName == "ptscotch-b") {
+      partitionerType = PUML::PartitionerType::PtScotchBalance;
+    } else if (partitionerName == "ptscotch-q") {
+      partitionerType = PUML::PartitionerType::PtScotchQuality;
+    } else if (partitionerName == "ptscotch-bq") {
+      partitionerType = PUML::PartitionerType::PtScotchBalanceQuality;
+    } else if (partitionerName == "ptscotch-s") {
+      partitionerType = PUML::PartitionerType::PtScotchSpeed;
+    } else if (partitionerName == "ptscotch-sb") {
+      partitionerType = PUML::PartitionerType::PtScotchBalanceSpeed;
+    } else if (partitionerName == "parhip-ultrafast") {
+      partitionerType = PUML::PartitionerType::ParHIPUltrafastMesh;
+    } else if (partitionerName == "parhip-fast") {
+      partitionerType = PUML::PartitionerType::ParHIPFastMesh;
+    } else if (partitionerName == "parhip-eco") {
+      partitionerType = PUML::PartitionerType::ParHIPEcoMesh;
+    } else if (partitionerName == "parhip-ultrafastsocial") {
+      partitionerType = PUML::PartitionerType::ParHIPUltrafastSocial;
+    } else if (partitionerName == "parhip-fastsocial") {
+      partitionerType = PUML::PartitionerType::ParHIPFastSocial;
+    } else if (partitionerName == "parhip-ecosocial") {
+      partitionerType = PUML::PartitionerType::ParHIPEcoSocial;
+    } else {
+      logInfo(rank) << "Partitioner name not recognized. Aborting...";
+      MPI_Finalize();
+      return 1;
+    }
+
+    auto partitionInvoker = PUML::TETPartition::getPartitioner(partitionerType);
+    if (partitionInvoker == nullptr) {
+      logInfo(rank) << "Partitioner name not recognized. Aborting...";
+      MPI_Finalize();
+      return 1;
+    }
+
+    logInfo(rank) << "Creating partitions using partitioner" << partitionerName;
+    std::vector<int> partition(puml->numOriginalCells());
+    PUML::TETPartitionGraph graph(*puml);
+
+    std::string vertexweightsetting = readSetting<std::string>(settings, "vw");
+    std::string edgeweightsetting = readSetting<std::string>(settings, "ew");
+    std::string nodeweightsetting = readSetting<std::string>(settings, "nw");
+
+    int* vertexweights = nullptr;
+    int* edgeweights = nullptr;
+    double* nodeweights = nullptr;
+    if (vertexweightsetting == "none") {
+      // blank
+    } else if (vertexweightsetting == "uniform") {
+      vertexweights = new int[puml->cells().size()];
+      for (int i = 0; i < puml->numOriginalCells(); ++i) {
+        vertexweights[i] = 1;
+      }
+    } else if (vertexweightsetting == "random") {
+      vertexweights = new int[puml->cells().size()];
+      std::default_random_engine gen(123);
+      std::uniform_int_distribution<int> distribution(1, 1000);
+      for (int i = 0; i < puml->numOriginalCells(); ++i) {
+        vertexweights[i] = distribution(gen);
+      }
+    } else if (vertexweightsetting == "increasing") {
+      vertexweights = new int[puml->cells().size()];
+      for (int i = 0; i < puml->numOriginalCells(); ++i) {
+        vertexweights[i] = i + 1;
+      }
+    } else {
+      logInfo(rank) << "Vertex weight setting not recognized. Aborting...";
+      MPI_Finalize();
+      return 1;
+    }
+
+    if (edgeweightsetting == "none") {
+      // blank
+    } else if (edgeweightsetting == "uniform") {
+      edgeweights = new int[graph.localEdgeCount()];
+      for (int i = 0; i < graph.localEdgeCount(); ++i) {
+        edgeweights[i] = 1;
+      }
+    } else if (edgeweightsetting == "random") {
+      edgeweights = new int[graph.localEdgeCount()];
+      std::default_random_engine gen(123);
+      std::uniform_int_distribution<int> distribution(1, 1000);
+      for (int i = 0; i < graph.localEdgeCount(); ++i) {
+        edgeweights[i] = distribution(gen);
+      }
+    } else if (edgeweightsetting == "increasing") {
+      edgeweights = new int[graph.localEdgeCount()];
+      for (int i = 0; i < graph.localEdgeCount(); ++i) {
+        edgeweights[i] = i + 1;
+      }
+    } else {
+      logInfo(rank) << "Edge weight setting not recognized. Aborting...";
+      MPI_Finalize();
+      return 1;
+    }
+
+    int nparts = readSetting<int>(settings, "nparts");
+
+    if (nodeweightsetting == "none") {
+      // empty
+    } else if (nodeweightsetting == "uniform") {
+      nodeweights = new double[nparts];
+      for (int i = 0; i < nparts; ++i) {
+        nodeweights[i] = 1. / nparts;
+      }
+    } else if (nodeweightsetting == "random") {
+      nodeweights = new double[nparts];
+      std::default_random_engine gen(123);
+      std::uniform_int_distribution<int> distribution(1, 1000);
+      double sum = 0;
+      for (int i = 0; i < nparts; ++i) {
+        nodeweights[i] = distribution(gen);
+        sum += nodeweights[i];
+      }
+      for (int i = 0; i < nparts; ++i) {
+        nodeweights[i] /= sum;
+      }
+    } else if (nodeweightsetting == "harmonic") {
+      nodeweights = new double[nparts];
+      double sum = 0;
+      for (int i = 0; i < nparts; ++i) {
+        sum += 1. / (i + 1);
+      }
+      for (int i = 0; i < nparts; ++i) {
+        nodeweights[i] = 1. / (sum * (i + 1));
+      }
+    }
+
+    graph.setVertexWeights(vertexweights, 1);
+    graph.setEdgeWeights(edgeweights);
+    double imbalance = readSetting<double>(settings, "imbalance");
+    int seed = readSetting<int>(settings, "seed");
+
+    PUML::PartitionTarget target;
+    target.setImbalance(imbalance);
+    if (nodeweights == nullptr) {
+      target.setVertexWeightsUniform(nparts);
+    } else {
+      target.setVertexWeights(nparts, nodeweights);
+    }
+
+    logDebug() << "Initial stats" << rank << graph.localVertexCount() << graph.localEdgeCount();
+
+    logInfo(rank) << "Starting the actual partitioning.";
+    struct rusage memoryStatsBegin;
+    getrusage(RUSAGE_SELF, &memoryStatsBegin);
+    double startt = MPI_Wtime();
+    partitionInvoker->partition(partition.data(), graph, target, seed);
+    MPI_Barrier(comm);
+    double endt = MPI_Wtime();
+    struct rusage memoryStatsEnd;
+    getrusage(RUSAGE_SELF, &memoryStatsEnd);
+    logInfo(rank) << "Actual partitioning done. Now saving stats";
+
+    // counts weighted and unweighted sizes
+    std::vector<int> partitionSize(nparts * 4);
+    std::vector<int> partitionSizeLocal(nparts * 4);
+    for (int i = 0; i < puml->cells().size(); ++i) {
+      assert(partition[i] >= 0);
+      assert(partition[i] < nparts);
+      ++partitionSizeLocal[partition[i]];
+    }
+    if (vertexweights == nullptr) {
+      for (int i = 0; i < nparts; ++i) {
+        partitionSizeLocal[i + nparts] = partitionSizeLocal[i];
+      }
+    } else {
+      for (int i = 0; i < puml->cells().size(); ++i) {
+        partitionSizeLocal[partition[i] + nparts] += vertexweights[i];
+      }
+    }
+
+    std::vector<int> detailPartition(nparts * nparts);
+    std::vector<int> localDetailPartition(nparts * nparts);
+
+    graph.forEachLocalEdges<int>(partition,
+                                 [&](int fid, int lid, const int& g, const int& l, int id) {
+                                   if (g != l) {
+                                     ++partitionSizeLocal[g + nparts * 2];
+                                     if (edgeweights == nullptr) {
+                                       ++partitionSizeLocal[g + nparts * 3];
+                                       ++localDetailPartition[g * nparts + l];
+                                     } else {
+                                       partitionSizeLocal[g + nparts * 3] += edgeweights[id];
+                                       localDetailPartition[g * nparts + l] += edgeweights[id];
+                                     }
+                                   }
+                                 });
+
+    MPI_Reduce(partitionSizeLocal.data(),
+               partitionSize.data(),
+               partitionSizeLocal.size(),
+               MPI_INT,
+               MPI_SUM,
+               0,
+               comm);
+    MPI_Reduce(localDetailPartition.data(),
+               detailPartition.data(),
+               localDetailPartition.size(),
+               MPI_INT,
+               MPI_SUM,
+               0,
+               comm);
+
+    long peakmemstart = memoryStatsBegin.ru_maxrss;
+    long peakmemend = memoryStatsEnd.ru_maxrss;
+
+    long peakmemlocal[] = {peakmemstart, peakmemend};
+    long peakmem[2];
+    long avgpeakmem[2];
+    MPI_Reduce(peakmemlocal, peakmem, 2, MPI_LONG, MPI_MAX, 0, comm);
+    MPI_Reduce(peakmemlocal, avgpeakmem, 2, MPI_LONG, MPI_SUM, 0, comm);
+
+    double realavgpeakmem[2];
+    realavgpeakmem[0] = (double)avgpeakmem[0] / size;
+    realavgpeakmem[1] = (double)avgpeakmem[1] / size;
+
+    std::vector<long> peakmemrankstart(size);
+    std::vector<long> peakmemrankend(size);
+
+    MPI_Gather(
+        &memoryStatsBegin.ru_maxrss, 1, MPI_LONG, peakmemrankstart.data(), 1, MPI_LONG, 0, comm);
+    MPI_Gather(&memoryStatsEnd.ru_maxrss, 1, MPI_LONG, peakmemrankend.data(), 1, MPI_LONG, 0, comm);
+
+    if (rank == 0) {
+      std::vector<nlohmann::json> node_data(nparts);
+      size_t ec = 0;
+      size_t ecw = 0;
+      size_t ps = 0;
+      size_t psw = 0;
+      size_t pss = 0;
+      size_t psws = 0;
+      for (int i = 0; i < nparts; ++i) {
+        node_data[i]["size"] = partitionSize[i];
+        node_data[i]["size_weighted"] = partitionSize[i + nparts];
+        node_data[i]["cut"] = partitionSize[i + nparts * 2];
+        node_data[i]["cut_weighted"] = partitionSize[i + nparts * 3];
+
+        std::vector<int> detail(detailPartition.begin() + nparts * i,
+                                detailPartition.begin() + nparts * (i + 1));
+        node_data[i]["cut_weighted_detail"] = detail;
+
+        ec += partitionSize[i + nparts * 2];
+        ecw += partitionSize[i + nparts * 3];
+        ps = std::max(ps, (size_t)partitionSize[i]);
+        psw = std::max(psw, (size_t)partitionSize[i + nparts]);
+        pss += partitionSize[i];
+        psws += partitionSize[i + nparts];
+      }
+      ec /= 2;
+      ecw /= 2;
+      double mi = (double)ps / ((double)pss / (double)nparts);
+      double miw = (double)psw / ((double)psws / (double)nparts);
+      logDebug(rank) << "End stats:" << ec << ecw << mi << miw << peakmem[0] << peakmem[1]
+                     << (endt - startt);
+
+      nlohmann::json output;
+
+      output["cut"] = ec;
+      output["cut_weighted"] = ecw;
+      output["imbalance"] = mi;
+      output["imbalance_weighted"] = miw;
+      output["total_vertices"] = graph.globalVertexCount();
+      output["total_edges"] = graph.globalEdgeCount();
+      output["mpi_size"] = size;
+      output["partition_stats"] = node_data;
+      output["time"] = endt - startt;
+      output["configuration"] = settings;
+
+      output["peakmembeforemax"] = peakmem[0];
+      output["peakmemaftermax"] = peakmem[1];
+      output["peakmembeforeavg"] = realavgpeakmem[0];
+      output["peakmemafteravg"] = realavgpeakmem[1];
+
+      std::vector<nlohmann::json> rankdata(size);
+      for (int i = 0; i < size; ++i) {
+        rankdata[i]["peakmembefore"] = peakmemrankstart[i];
+        rankdata[i]["peakmemafter"] = peakmemrankend[i];
+      }
+
+      output["rank_stats"] = rankdata;
+
+      std::string odir = readSetting<std::string>(settings, "output");
+      if (odir == "-") {
+        // use stdout
+        logInfo(rank) << "Results: " << output.dump(4);
+      } else {
+        size_t hash = std::hash<nlohmann::json>()(output);
+
+        // this is not ideal, but it avoids the dependency on std::filesystem
+        std::ofstream outputFile(odir + "/" + std::to_string(hash) + ".json");
+        outputFile << output;
+      }
+    }
+
+    if (vertexweights != nullptr) {
+      delete[] vertexweights;
+    }
+    if (edgeweights != nullptr) {
+      delete[] edgeweights;
+    }
+    if (nodeweights != nullptr) {
+      delete[] nodeweights;
+    }
+
+    logInfo(rank) << "Done";
+  }
+  MPI_Finalize();
+
+  return 0;
 }
\ No newline at end of file
diff --git a/test/simple-test/puml.cpp b/test/simple-test/puml.cpp
index ff7770e..b57b75c 100644
--- a/test/simple-test/puml.cpp
+++ b/test/simple-test/puml.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: 2024 Technical University of Munich
+//
+// SPDX-License-Identifier: BSD-3-Clause
 /**
  * @file
  *  This file is part of PUML
@@ -25,122 +28,122 @@
 #include "Partition.h"
 #include "PartitionGraph.h"
 
-template<typename T>
+template <typename T>
 void printElement(std::stringstream& stream, const T& data) {
-	if constexpr (std::is_array_v<T>) {
-		for (std::size_t i = 0; i < std::extent_v<T, 0>; ++i) {
-			if (i > 0) {
-				stream << ",";
-			}
-			printElement(stream, data[i]);
-		}
-	}
-	else {
-		stream << data;
-	}
+  if constexpr (std::is_array_v<T>) {
+    for (std::size_t i = 0; i < std::extent_v<T, 0>; ++i) {
+      if (i > 0) {
+        stream << ",";
+      }
+      printElement(stream, data[i]);
+    }
+  } else {
+    stream << data;
+  }
 }
 
 /*template<>
-static void printElement<PUML::TETPUML::cell_t>(std::stringstream& stream, const PUML::TETPUML::cell_t& data) {
-	PUML::Downward::vertices();
+static void printElement<PUML::TETPUML::cell_t>(std::stringstream& stream, const
+PUML::TETPUML::cell_t& data) { PUML::Downward::vertices();
 }*/
 
-template<>
-void printElement<PUML::TETPUML::vertex_t>(std::stringstream& stream, const PUML::TETPUML::vertex_t& data) {
-	const auto& cref = data.coordinate();
-	double coords[3] = {cref[0], cref[1], cref[2]};
-	printElement(stream, coords);
+template <>
+void printElement<PUML::TETPUML::vertex_t>(std::stringstream& stream,
+                                           const PUML::TETPUML::vertex_t& data) {
+  const auto& cref = data.coordinate();
+  double coords[3] = {cref[0], cref[1], cref[2]};
+  printElement(stream, coords);
 }
 
-template<typename T>
+template <typename T>
 static void printArray(const T* data, std::size_t size) {
-	int rank;
-	int rsize;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &rsize);
-
-	unsigned long long countSoFar = 0;
-
-	if (rank > 0) {
-		MPI_Recv(&countSoFar, 1, MPI_UNSIGNED_LONG_LONG, rank - 1, 15, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	}
-
-	logInfo(0) << "Rank" << rank;
-
-	std::stringstream stream;
-	for (std::size_t i = 0; i < size; ++i) {
-		stream << countSoFar << ": ";
-		printElement(stream, data[i]);
-		logInfo(0) << stream.str().c_str();
-		stream.str("");
-		++countSoFar;
-	}
-
-	if (rank < rsize - 1) {
-		MPI_Send(&countSoFar, 1, MPI_UNSIGNED_LONG_LONG, rank + 1, 15, MPI_COMM_WORLD);
-	}
-
-	MPI_Barrier(MPI_COMM_WORLD);
+  int rank;
+  int rsize;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &rsize);
+
+  unsigned long long countSoFar = 0;
+
+  if (rank > 0) {
+    MPI_Recv(
+        &countSoFar, 1, MPI_UNSIGNED_LONG_LONG, rank - 1, 15, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+  }
+
+  logInfo(0) << "Rank" << rank;
+
+  std::stringstream stream;
+  for (std::size_t i = 0; i < size; ++i) {
+    stream << countSoFar << ": ";
+    printElement(stream, data[i]);
+    logInfo(0) << stream.str().c_str();
+    stream.str("");
+    ++countSoFar;
+  }
+
+  if (rank < rsize - 1) {
+    MPI_Send(&countSoFar, 1, MPI_UNSIGNED_LONG_LONG, rank + 1, 15, MPI_COMM_WORLD);
+  }
+
+  MPI_Barrier(MPI_COMM_WORLD);
 }
 
-template<typename T>
+template <typename T>
 static void printArray(const std::vector<T>& data) {
-	printArray(data.data(), data.size());
+  printArray(data.data(), data.size());
 }
 
-int main(int argc, char* argv[])
-{
-	MPI_Init(&argc, &argv);
+int main(int argc, char* argv[]) {
+  MPI_Init(&argc, &argv);
 
-	int rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  int rank;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
-	utils::Args args;
-	args.addAdditionalOption("in", "the PUML mesh file");
+  utils::Args args;
+  args.addAdditionalOption("in", "the PUML mesh file");
 
-	switch (args.parse(argc, argv)) {
-	case utils::Args::Error:
-		MPI_Abort(MPI_COMM_WORLD, -1);
-		break;
-	case utils::Args::Help:
-		MPI_Finalize();
-		return 1;
-	}
+  switch (args.parse(argc, argv)) {
+  case utils::Args::Error:
+    MPI_Abort(MPI_COMM_WORLD, -1);
+    break;
+  case utils::Args::Help:
+    MPI_Finalize();
+    return 1;
+  }
 
-	PUML::TETPUML puml;
+  PUML::TETPUML puml;
 
-	std::string infile = args.getAdditionalArgument<const char*>("in");
+  std::string infile = args.getAdditionalArgument<const char*>("in");
 
-	// Read the mesh
-	logInfo(rank) << "Reading mesh";
-	puml.open((infile + ":/connect").c_str(), (infile + ":/geometry").c_str());
+  // Read the mesh
+  logInfo(rank) << "Reading mesh";
+  puml.open((infile + ":/connect").c_str(), (infile + ":/geometry").c_str());
 
-	logInfo(rank) << "Reading other data (i.e. groups, boundaries)";
-	puml.addData<int>((infile  + ":/group").c_str(), PUML::CELL, {});
-	puml.addData<int>((infile  + ":/boundary").c_str(), PUML::CELL, {});
+  logInfo(rank) << "Reading other data (i.e. groups, boundaries)";
+  puml.addData<int>((infile + ":/group").c_str(), PUML::CELL, {});
+  puml.addData<int>((infile + ":/boundary").c_str(), PUML::CELL, {});
 
-	std::vector<unsigned long long> test(puml.numOriginalCells(), 0x5555555555555555ULL);
-	puml.addDataArray<unsigned long long>(test.data(), PUML::CELL, {});
+  std::vector<unsigned long long> test(puml.numOriginalCells(), 0x5555555555555555ULL);
+  puml.addDataArray<unsigned long long>(test.data(), PUML::CELL, {});
 
-	// Generate the mesh information
-	logInfo(rank) << "Generating mesh information";
-	puml.generateMesh();
+  // Generate the mesh information
+  logInfo(rank) << "Generating mesh information";
+  puml.generateMesh();
 
-	// Write test
-	logInfo(rank) << "Printing test information";
-	const std::vector<PUML::TETPUML::cell_t> &cells = puml.cells();
+  // Write test
+  logInfo(rank) << "Printing test information";
+  const std::vector<PUML::TETPUML::cell_t>& cells = puml.cells();
 
-	const std::vector<PUML::TETPUML::vertex_t> &vertices = puml.vertices();
-	printArray(vertices);
+  const std::vector<PUML::TETPUML::vertex_t>& vertices = puml.vertices();
+  printArray(vertices);
 
-	const int* groups = reinterpret_cast<const int*>(puml.cellData(0));
-	printArray(groups, cells.size());
-	const unsigned long long* testt = reinterpret_cast<const unsigned long long*>(puml.cellData(2));
-	printArray(testt, cells.size());
+  const int* groups = reinterpret_cast<const int*>(puml.cellData(0));
+  printArray(groups, cells.size());
+  const unsigned long long* testt = reinterpret_cast<const unsigned long long*>(puml.cellData(2));
+  printArray(testt, cells.size());
 
-	logInfo(rank) << "Done";
+  logInfo(rank) << "Done";
 
-	MPI_Finalize();
+  MPI_Finalize();
 
-	return 0;
+  return 0;
 }