diff --git a/src/serac/numerics/functional/boundary_integral_kernels.hpp b/src/serac/numerics/functional/boundary_integral_kernels.hpp index 2c53b45b4..3959ac96c 100644 --- a/src/serac/numerics/functional/boundary_integral_kernels.hpp +++ b/src/serac/numerics/functional/boundary_integral_kernels.hpp @@ -153,18 +153,14 @@ auto batch_apply_qf(lambda qf, const tensor& positions, const tens return outputs; } -template -void evaluation_kernel_impl(FunctionSignature, const std::vector& inputs, +/// @trial_elements the element type for each trial space +template +void evaluation_kernel_impl(trial_element_type trial_elements, test_element, const std::vector& inputs, double* outputs, const double* positions, const double* jacobians, lambda_type qf, [[maybe_unused]] derivative_type* qf_derivatives, uint32_t num_elements, - std::integer_sequence) + camp::int_seq) { - using test_element = finite_element; - - /// @brief the element type for each trial space - static constexpr tuple...> trial_elements{}; - // mfem provides this information as opaque arrays of doubles, // so we reinterpret the pointer with constexpr int dim = dimension_of(geom) + 1; @@ -331,9 +327,11 @@ std::function&, double*, bool)> evaluation signature s, lambda_type qf, const double* positions, const double* jacobians, std::shared_ptr qf_derivatives, uint32_t num_elements) { + auto trial_elements = trial_elements_tuple(s); + auto test = get_test_element(s); return [=](const std::vector& inputs, double* outputs, bool /* update state */) { - evaluation_kernel_impl(s, inputs, outputs, positions, jacobians, qf, qf_derivatives.get(), - num_elements, s.index_seq); + evaluation_kernel_impl(trial_elements, test, inputs, outputs, positions, jacobians, qf, + qf_derivatives.get(), num_elements, s.index_seq); }; } diff --git a/src/serac/numerics/functional/detail/hexahedron_H1.inl b/src/serac/numerics/functional/detail/hexahedron_H1.inl index 25d3f5f37..2a5ef7479 100644 --- a/src/serac/numerics/functional/detail/hexahedron_H1.inl +++ b/src/serac/numerics/functional/detail/hexahedron_H1.inl @@ -167,7 +167,7 @@ struct finite_element > { } template - static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) { // we want to compute the following: // @@ -228,8 +228,9 @@ struct finite_element > { } template - static void integrate(const tensor, q * q * q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q * q * q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/hexahedron_Hcurl.inl b/src/serac/numerics/functional/detail/hexahedron_Hcurl.inl index 93d0d8e7d..b6277937b 100644 --- a/src/serac/numerics/functional/detail/hexahedron_Hcurl.inl +++ b/src/serac/numerics/functional/detail/hexahedron_Hcurl.inl @@ -334,7 +334,7 @@ struct finite_element> { } template - static auto interpolate(const dof_type& element_values, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& element_values, const TensorProductQuadratureRule&) { constexpr bool apply_weights = false; constexpr tensor B1 = calculate_B1(); @@ -395,9 +395,9 @@ struct finite_element> { } template - static void integrate(const tensor, q * q * q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, - [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q * q * q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + [[maybe_unused]] int step = 1) { constexpr bool apply_weights = true; constexpr tensor B1 = calculate_B1(); diff --git a/src/serac/numerics/functional/detail/hexahedron_L2.inl b/src/serac/numerics/functional/detail/hexahedron_L2.inl index ec7c09b1b..51efeaeec 100644 --- a/src/serac/numerics/functional/detail/hexahedron_L2.inl +++ b/src/serac/numerics/functional/detail/hexahedron_L2.inl @@ -171,7 +171,7 @@ struct finite_element > { } template - static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) { // we want to compute the following: // @@ -232,8 +232,9 @@ struct finite_element > { } template - static void integrate(const tensor, q * q * q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q * q * q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/qoi.inl b/src/serac/numerics/functional/detail/qoi.inl index 80f74ce01..189b7f205 100644 --- a/src/serac/numerics/functional/detail/qoi.inl +++ b/src/serac/numerics/functional/detail/qoi.inl @@ -24,15 +24,15 @@ struct finite_element { SERAC_HOST_DEVICE static constexpr double shape_functions(double /* xi */) { return 1.0; } template - static void integrate(const tensor&, const TensorProductQuadratureRule&, dof_type*, - [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor&, const TensorProductQuadratureRule&, dof_type*, + [[maybe_unused]] int step = 1) { return; // integrating zeros is a no-op } template - static void integrate(const tensor& qf_output, const TensorProductQuadratureRule&, - dof_type* element_total, [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor& qf_output, const TensorProductQuadratureRule&, + dof_type* element_total, [[maybe_unused]] int step = 1) { if constexpr (geometry == mfem::Geometry::SEGMENT) { static_assert(Q == q); @@ -77,8 +77,9 @@ struct finite_element { // this overload is used for boundary integrals, since they pad the // output to be a tuple with a hardcoded `zero` flux term template - static void integrate(const tensor, Q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_total, [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, Q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_total, + [[maybe_unused]] int step = 1) { if constexpr (is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/quadrilateral_H1.inl b/src/serac/numerics/functional/detail/quadrilateral_H1.inl index 953bab991..452bd6e17 100644 --- a/src/serac/numerics/functional/detail/quadrilateral_H1.inl +++ b/src/serac/numerics/functional/detail/quadrilateral_H1.inl @@ -202,7 +202,7 @@ struct finite_element > { // A(dy, qx) := B(qx, dx) * X_e(dy, dx) // X_q(qy, qx) := B(qy, dy) * A(dy, qx) template - static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) { static constexpr bool apply_weights = false; static constexpr auto B = calculate_B(); @@ -245,8 +245,9 @@ struct finite_element > { // flux can be one of: {zero, tensor, tensor, tensor, // tensor} template - static void integrate(const tensor, q * q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q * q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/quadrilateral_Hcurl.inl b/src/serac/numerics/functional/detail/quadrilateral_Hcurl.inl index 420567561..6e332f27e 100644 --- a/src/serac/numerics/functional/detail/quadrilateral_Hcurl.inl +++ b/src/serac/numerics/functional/detail/quadrilateral_Hcurl.inl @@ -285,7 +285,7 @@ struct finite_element > { } template - static auto interpolate(const dof_type& element_values, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& element_values, const TensorProductQuadratureRule&) { constexpr bool apply_weights = false; constexpr tensor B1 = calculate_B1(); @@ -323,9 +323,9 @@ struct finite_element > { } template - static void integrate(const tensor, q * q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, - [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q * q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + [[maybe_unused]] int step = 1) { constexpr bool apply_weights = true; constexpr tensor B1 = calculate_B1(); diff --git a/src/serac/numerics/functional/detail/quadrilateral_L2.inl b/src/serac/numerics/functional/detail/quadrilateral_L2.inl index 2f2008017..09d653b5f 100644 --- a/src/serac/numerics/functional/detail/quadrilateral_L2.inl +++ b/src/serac/numerics/functional/detail/quadrilateral_L2.inl @@ -201,7 +201,7 @@ struct finite_element > { // A(dy, qx) := B(qx, dx) * X_e(dy, dx) // X_q(qy, qx) := B(qy, dy) * A(dy, qx) template - static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) { static constexpr bool apply_weights = false; static constexpr auto B = calculate_B(); @@ -244,8 +244,9 @@ struct finite_element > { // flux can be one of: {zero, tensor, tensor, tensor, // tensor} template - static void integrate(const tensor, q * q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q * q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/segment_H1.inl b/src/serac/numerics/functional/detail/segment_H1.inl index 4dbe8f0e5..5651be12d 100644 --- a/src/serac/numerics/functional/detail/segment_H1.inl +++ b/src/serac/numerics/functional/detail/segment_H1.inl @@ -94,7 +94,7 @@ struct finite_element > { } template - static auto batch_apply_shape_fn(int jx, tensor input, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto batch_apply_shape_fn(int jx, tensor input, const TensorProductQuadratureRule&) { static constexpr bool apply_weights = false; static constexpr auto B = calculate_B(); @@ -121,7 +121,7 @@ struct finite_element > { } template - static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) { static constexpr bool apply_weights = false; static constexpr auto B = calculate_B(); @@ -155,9 +155,9 @@ struct finite_element > { } template - static void integrate(const tensor, q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, - [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + [[maybe_unused]] int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/segment_L2.inl b/src/serac/numerics/functional/detail/segment_L2.inl index af85ff6a1..214e757dc 100644 --- a/src/serac/numerics/functional/detail/segment_L2.inl +++ b/src/serac/numerics/functional/detail/segment_L2.inl @@ -121,7 +121,7 @@ struct finite_element > { } template - static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule&) { static constexpr bool apply_weights = false; static constexpr auto B = calculate_B(); @@ -155,9 +155,9 @@ struct finite_element > { } template - static void integrate(const tensor, q>& qf_output, - const TensorProductQuadratureRule&, dof_type* element_residual, - [[maybe_unused]] int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q>& qf_output, + const TensorProductQuadratureRule&, dof_type* element_residual, + [[maybe_unused]] int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/tetrahedron_H1.inl b/src/serac/numerics/functional/detail/tetrahedron_H1.inl index cf66e4a2f..d3ea86061 100644 --- a/src/serac/numerics/functional/detail/tetrahedron_H1.inl +++ b/src/serac/numerics/functional/detail/tetrahedron_H1.inl @@ -361,7 +361,7 @@ struct finite_element > { } template - static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) { constexpr auto xi = GaussLegendreNodes(); @@ -384,8 +384,9 @@ struct finite_element > { } template - static void integrate(const tensor, nqpts(q)>& qf_output, - const TensorProductQuadratureRule&, tensor* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, nqpts(q)>& qf_output, + const TensorProductQuadratureRule&, + tensor* element_residual, int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/tetrahedron_L2.inl b/src/serac/numerics/functional/detail/tetrahedron_L2.inl index 97f6467bb..df30f3409 100644 --- a/src/serac/numerics/functional/detail/tetrahedron_L2.inl +++ b/src/serac/numerics/functional/detail/tetrahedron_L2.inl @@ -366,7 +366,7 @@ struct finite_element > { } template - static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) { constexpr auto xi = GaussLegendreNodes(); @@ -389,8 +389,9 @@ struct finite_element > { } template - static void integrate(const tensor, nqpts(q)>& qf_output, - const TensorProductQuadratureRule&, tensor* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, nqpts(q)>& qf_output, + const TensorProductQuadratureRule&, + tensor* element_residual, int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/triangle_H1.inl b/src/serac/numerics/functional/detail/triangle_H1.inl index 22456d767..fe2bc560d 100644 --- a/src/serac/numerics/functional/detail/triangle_H1.inl +++ b/src/serac/numerics/functional/detail/triangle_H1.inl @@ -266,7 +266,7 @@ struct finite_element > { } template - static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) { constexpr auto xi = GaussLegendreNodes(); static constexpr int num_quadrature_points = q * (q + 1) / 2; @@ -290,8 +290,9 @@ struct finite_element > { } template - static void integrate(const tensor, q*(q + 1) / 2>& qf_output, - const TensorProductQuadratureRule&, tensor* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q*(q + 1) / 2>& qf_output, + const TensorProductQuadratureRule&, + tensor* element_residual, int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/detail/triangle_L2.inl b/src/serac/numerics/functional/detail/triangle_L2.inl index aa92a8eaf..51f64af5e 100644 --- a/src/serac/numerics/functional/detail/triangle_L2.inl +++ b/src/serac/numerics/functional/detail/triangle_L2.inl @@ -275,7 +275,7 @@ struct finite_element > { } template - static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) + SERAC_HOST_DEVICE static auto interpolate(const tensor& X, const TensorProductQuadratureRule&) { constexpr auto xi = GaussLegendreNodes(); static constexpr int num_quadrature_points = q * (q + 1) / 2; @@ -299,8 +299,9 @@ struct finite_element > { } template - static void integrate(const tensor, q*(q + 1) / 2>& qf_output, - const TensorProductQuadratureRule&, tensor* element_residual, int step = 1) + SERAC_HOST_DEVICE static void integrate(const tensor, q*(q + 1) / 2>& qf_output, + const TensorProductQuadratureRule&, + tensor* element_residual, int step = 1) { if constexpr (is_zero{} && is_zero{}) { return; diff --git a/src/serac/numerics/functional/domain_integral_kernels.hpp b/src/serac/numerics/functional/domain_integral_kernels.hpp index ed7415fc0..00e6db9ba 100644 --- a/src/serac/numerics/functional/domain_integral_kernels.hpp +++ b/src/serac/numerics/functional/domain_integral_kernels.hpp @@ -10,7 +10,10 @@ #include "serac/numerics/functional/function_signature.hpp" #include "serac/numerics/functional/differentiate_wrt.hpp" +#include +#include #include +#include namespace serac { @@ -24,40 +27,40 @@ namespace domain_integral { * trial space */ template -struct QFunctionArgument; +SERAC_HOST_DEVICE struct QFunctionArgument; /// @overload template -struct QFunctionArgument, Dimension > { +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { using type = tuple >; ///< what will be passed to the q-function }; /// @overload template -struct QFunctionArgument, Dimension > { +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { using type = tuple, tensor >; ///< what will be passed to the q-function }; /// @overload template -struct QFunctionArgument, Dimension > { +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { using type = tuple >; ///< what will be passed to the q-function }; /// @overload template -struct QFunctionArgument, Dimension > { +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { using type = tuple, tensor >; ///< what will be passed to the q-function }; /// @overload template -struct QFunctionArgument, Dimension<2> > { +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension<2> > { using type = tuple, double>; ///< what will be passed to the q-function }; /// @overload template -struct QFunctionArgument, Dimension<3> > { +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension<3> > { using type = tuple, tensor >; ///< what will be passed to the q-function }; @@ -99,7 +102,7 @@ auto get_derivative_type(lambda qf, qpt_data_type&& qpt_data) }; template -auto batch_apply_qf_no_qdata(lambda qf, const tensor x, const T&... inputs) +SERAC_HOST_DEVICE auto batch_apply_qf_no_qdata(lambda qf, const tensor x, const T&... inputs) { using return_type = decltype(qf(tensor{}, T{}[0]...)); tensor outputs{}; @@ -114,8 +117,8 @@ auto batch_apply_qf_no_qdata(lambda qf, const tensor x, const T& } template -auto batch_apply_qf(lambda qf, const tensor x, qpt_data_type* qpt_data, bool update_state, - const T&... inputs) +SERAC_HOST_DEVICE auto batch_apply_qf(lambda qf, const tensor x, qpt_data_type* qpt_data, + bool update_state, const T&... inputs) { using return_type = decltype(qf(tensor{}, qpt_data[0], T{}[0]...)); tensor outputs{}; @@ -134,44 +137,42 @@ auto batch_apply_qf(lambda qf, const tensor x, qpt_data_type* qp return outputs; } -template -void evaluation_kernel_impl(FunctionSignature, const std::vector& inputs, - double* outputs, const double* positions, const double* jacobians, lambda_type qf, +template +void evaluation_kernel_impl(trial_element_tuple_type trial_elements, test_element_type, + const std::vector& inputs, double* outputs, const double* positions, + const double* jacobians, lambda_type qf, [[maybe_unused]] axom::ArrayView qf_state, [[maybe_unused]] derivative_type* qf_derivatives, uint32_t num_elements, bool update_state, - std::integer_sequence) + camp::int_seq) { - using test_element = finite_element; - - /// @brief the element type for each trial space - static constexpr tuple...> trial_elements{}; - // mfem provides this information as opaque arrays of doubles, // so we reinterpret the pointer with - auto r = reinterpret_cast(outputs); - auto x = reinterpret_cast::type*>(positions); - auto J = reinterpret_cast::type*>(jacobians); - static constexpr TensorProductQuadratureRule rule{}; + auto r = reinterpret_cast(outputs); + auto x = reinterpret_cast::type*>(positions); + auto J = reinterpret_cast::type*>(jacobians); + TensorProductQuadratureRule rule{}; - static constexpr int qpts_per_elem = num_quadrature_points(geom, Q); + [[maybe_unused]] auto qpts_per_elem = num_quadrature_points(geom, Q); [[maybe_unused]] tuple u = { reinterpret_cast(trial_elements))::dof_type*>(inputs[indices])...}; // for each element in the domain - for (uint32_t e = 0; e < num_elements; e++) { + for (uint32_t e = 0; e < num_elements; ++e) { // load the jacobians and positions for each quadrature point in this element auto J_e = J[e]; auto x_e = x[e]; + [[maybe_unused]] static constexpr trial_element_tuple_type trial_element_tuple{}; // batch-calculate values / derivatives of each trial space, at each quadrature point [[maybe_unused]] tuple qf_inputs = {promote_each_to_dual_when( - get(trial_elements).interpolate(get(u)[e], rule))...}; + get(trial_element_tuple).interpolate(get(u)[e], rule))...}; // use J_e to transform values / derivatives on the parent element // to the to the corresponding values / derivatives on the physical element - (parent_to_physical(trial_elements).family>(get(qf_inputs), J_e), ...); + (parent_to_physical(trial_element_tuple).family>(get(qf_inputs), J_e), ...); // (batch) evalute the q-function at each quadrature point // @@ -188,25 +189,27 @@ void evaluation_kernel_impl(FunctionSignature, const std::vecto // use J to transform sources / fluxes on the physical element // back to the corresponding sources / fluxes on the parent element - physical_to_parent(qf_outputs, J_e); + physical_to_parent(qf_outputs, J_e); // write out the q-function derivatives after applying the // physical_to_parent transformation, so that those transformations // won't need to be applied in the action_of_gradient and element_gradient kernels if constexpr (differentiation_index != serac::NO_DIFFERENTIATION) { for (int q = 0; q < leading_dimension(qf_outputs); q++) { - qf_derivatives[e * qpts_per_elem + uint32_t(q)] = get_gradient(qf_outputs[q]); + qf_derivatives[e * uint32_t(qpts_per_elem) + uint32_t(q)] = get_gradient(qf_outputs[q]); } } // (batch) integrate the material response against the test-space basis functions - test_element::integrate(get_value(qf_outputs), rule, &r[e]); + test_element_type::integrate(get_value(qf_outputs), rule, &r[e]); } + + return; } //clang-format off template -auto chain_rule(const S& dfdx, const T& dx) +SERAC_HOST_DEVICE auto chain_rule(const S& dfdx, const T& dx) { if constexpr (is_QOI) { return serac::chain_rule(serac::get<0>(dfdx), serac::get<0>(dx)) + @@ -223,7 +226,7 @@ auto chain_rule(const S& dfdx, const T& dx) //clang-format on template -auto batch_apply_chain_rule(derivative_type* qf_derivatives, const tensor& inputs) +SERAC_HOST_DEVICE auto batch_apply_chain_rule(derivative_type* qf_derivatives, const tensor& inputs) { using return_type = decltype(chain_rule(derivative_type{}, T{})); tensor outputs{}; @@ -265,14 +268,14 @@ void action_of_gradient_kernel(const double* dU, double* dR, derivatives_type* q using test_element = finite_element; using trial_element = finite_element; - static constexpr bool is_QOI = (test::family == Family::QOI); - static constexpr int num_qpts = num_quadrature_points(g, Q); + constexpr bool is_QOI = (test::family == Family::QOI); + constexpr int num_qpts = num_quadrature_points(g, Q); // mfem provides this information in 1D arrays, so we reshape it // into strided multidimensional arrays before using - auto du = reinterpret_cast(dU); - auto dr = reinterpret_cast(dR); - static constexpr TensorProductQuadratureRule rule{}; + auto du = reinterpret_cast(dU); + auto dr = reinterpret_cast(dR); + constexpr TensorProductQuadratureRule rule{}; // for each element in the domain for (uint32_t e = 0; e < num_elements; e++) { @@ -351,9 +354,11 @@ std::function&, double*, bool)> evaluation std::shared_ptr > qf_state, std::shared_ptr qf_derivatives, uint32_t num_elements) { + auto trial_elements = trial_elements_tuple(s); + auto test = get_test_element(s); return [=](const std::vector& inputs, double* outputs, bool update_state) { - domain_integral::evaluation_kernel_impl(s, inputs, outputs, positions, jacobians, qf, - (*qf_state)[geom], qf_derivatives.get(), num_elements, + domain_integral::evaluation_kernel_impl(trial_elements, test, inputs, outputs, positions, jacobians, + qf, (*qf_state)[geom], qf_derivatives.get(), num_elements, update_state, s.index_seq); }; } diff --git a/src/serac/numerics/functional/finite_element.hpp b/src/serac/numerics/functional/finite_element.hpp index eecc4b07a..28174f703 100644 --- a/src/serac/numerics/functional/finite_element.hpp +++ b/src/serac/numerics/functional/finite_element.hpp @@ -244,7 +244,7 @@ struct QOI { * @param jacobians the jacobians of the isoparametric map from parent to physical space of each quadrature point */ template -void parent_to_physical(tensor& qf_input, const tensor& jacobians) +SERAC_HOST_DEVICE void parent_to_physical(tensor& qf_input, const tensor& jacobians) { [[maybe_unused]] constexpr int VALUE = 0; [[maybe_unused]] constexpr int DERIVATIVE = 1; @@ -285,7 +285,7 @@ void parent_to_physical(tensor& qf_input, const tensor -void physical_to_parent(tensor& qf_output, const tensor& jacobians) +SERAC_HOST_DEVICE void physical_to_parent(tensor& qf_output, const tensor& jacobians) { [[maybe_unused]] constexpr int SOURCE = 0; [[maybe_unused]] constexpr int FLUX = 1; diff --git a/src/serac/numerics/functional/function_signature.hpp b/src/serac/numerics/functional/function_signature.hpp index 07c2df520..e5ce5d9bb 100644 --- a/src/serac/numerics/functional/function_signature.hpp +++ b/src/serac/numerics/functional/function_signature.hpp @@ -1,4 +1,6 @@ #pragma once +#include +#include "serac/numerics/functional/finite_element.hpp" template struct FunctionSignature; @@ -17,5 +19,34 @@ struct FunctionSignature { static constexpr int num_args = sizeof...(input_types); /// integer sequence used to make iterating over arguments easier - static constexpr auto index_seq = std::make_integer_sequence{}; + static constexpr auto index_seq = camp::make_int_seq{}; }; + +/** + * @brief This helper function template expands the variadic parameter trials and uses + * the pack to construct a tuple of finite elements. + * + * Expansion of the template parameter trials must occur in a separate template + * function from evaluation_kernel_impl. The __host__ __device__ marker on the + * lambda declared in evaluation_kernel_impl prevents use of multiple variadic + * template parameters in evaluation_kernel_impl. See section 14.7.3 of the + * CUDA programming guide, item 9. + */ +template +auto trial_elements_tuple(FunctionSignature) +{ + return serac::tuple...>{}; +} + +/** + * @brief This helper function template returns a finite element based on the output + * type of FunctionSignature. + * + * See the comments for trial_elements_tuple to understand why this is needed in + * domain_integral_kernels::evaluation_kernel and boundary_integral_kernels::evaluation_kernel. + */ +template +auto get_test_element(FunctionSignature) +{ + return serac::finite_element{}; +} diff --git a/src/serac/numerics/functional/tensor.hpp b/src/serac/numerics/functional/tensor.hpp index 373bdb305..bb89f83d9 100644 --- a/src/serac/numerics/functional/tensor.hpp +++ b/src/serac/numerics/functional/tensor.hpp @@ -1308,7 +1308,7 @@ auto matrix_sqrt(const tensor& A) * @param B the right operand */ template -auto contract(const tensor& A, const tensor& B) +SERAC_HOST_DEVICE auto contract(const tensor& A, const tensor& B) { constexpr int Adims[] = {m, n...}; constexpr int Bdims[] = {p, q}; @@ -1366,7 +1366,7 @@ auto contract(const tensor& A, const tensor& B) /// @overload template -auto contract(const zero&, const T&) +SERAC_HOST_DEVICE auto contract(const zero&, const T&) { return zero{}; } diff --git a/src/serac/physics/heat_transfer.hpp b/src/serac/physics/heat_transfer.hpp index 14a3377a6..d8563ed72 100644 --- a/src/serac/physics/heat_transfer.hpp +++ b/src/serac/physics/heat_transfer.hpp @@ -67,7 +67,7 @@ const TimesteppingOptions default_static_options = {TimestepMethod::QuasiStatic} /** * @brief An object containing the solver for a thermal conduction PDE * - * This is a generic linear thermal diffusion oeprator of the form + * This is a generic linear thermal diffusion operator of the form * * \f[ * \mathbf{M} \frac{\partial \mathbf{u}}{\partial t} = -\kappa \mathbf{K} \mathbf{u} + \mathbf{f} @@ -285,6 +285,64 @@ class HeatTransfer, std::integer_sequ cycle_ += 1; } + /** + * @brief Functor representing the integrand of a thermal material. Material type must be + * a functor as well. + */ + template + struct ThermalMaterialIntegrand { + /** + * @brief Construct a ThermalMaterialIntegrand functor with material model of type `MaterialType`. + * @param[in] material A functor representing the material model. Should be a functor, or a class/struct with + * public operator() method. Must NOT be a generic lambda, or serac will not compile due to static asserts below. + */ + ThermalMaterialIntegrand(MaterialType material) : material_(material) {} + + // Due to nvcc's lack of support for extended generic lambdas (i.e. functions of the form + // auto lambda = [] __host__ __device__ (auto) {}; ), MaterialType cannot be an extended + // generic lambda. The static asserts below check this. + private: + class DummyArgumentType { + }; + static_assert(!std::is_invocable::value); + static_assert(!std::is_invocable::value); + static_assert(!std::is_invocable::value); + + public: + /** + * @brief Evaluate integrand + */ + template + auto operator()(X x, T temperature, dT_dt dtemp_dt, Shape shape, Params... params) + { + // Get the value and the gradient from the input tuple + auto [u, du_dX] = temperature; + auto [p, dp_dX] = shape; + auto du_dt = get(dtemp_dt); + + auto I_plus_dp_dX = I + dp_dX; + auto inv_I_plus_dp_dX = inv(I_plus_dp_dX); + auto det_I_plus_dp_dX = det(I_plus_dp_dX); + + // Note that the current configuration x = X + p, where X is the original reference + // configuration and p is the shape displacement. We need the gradient with + // respect to the perturbed reference configuration x = X + p for the material model. Therefore, we calculate + // du/dx = du/dX * dX/dx = du/dX * (dx/dX)^-1 = du/dX * (I + dp/dX)^-1 + + auto du_dx = dot(du_dX, inv_I_plus_dp_dX); + + auto [heat_capacity, heat_flux] = material_(x + p, u, du_dx, params...); + + // Note that the return is integrated in the perturbed reference + // configuration, hence the det(I + dp_dx) = det(dx/dX) + return serac::tuple{heat_capacity * du_dt * det_I_plus_dp_dX, + -1.0 * dot(inv_I_plus_dp_dX, heat_flux) * det_I_plus_dp_dX}; + } + + private: + MaterialType material_; + }; + /** * @brief Set the thermal material model for the physics solver * @@ -311,33 +369,9 @@ class HeatTransfer, std::integer_sequ template void setMaterial(DependsOn, MaterialType material) { - residual_->AddDomainIntegral( - Dimension{}, DependsOn<0, 1, 2, active_parameters + NUM_STATE_VARS...>{}, - [material](auto x, auto temperature, auto dtemp_dt, auto shape, auto... params) { - // Get the value and the gradient from the input tuple - auto [u, du_dX] = temperature; - auto [p, dp_dX] = shape; - auto du_dt = get(dtemp_dt); - - auto I_plus_dp_dX = I + dp_dX; - auto inv_I_plus_dp_dX = inv(I_plus_dp_dX); - auto det_I_plus_dp_dX = det(I_plus_dp_dX); - - // Note that the current configuration x = X + p, where X is the original reference - // configuration and p is the shape displacement. We need the gradient with - // respect to the perturbed reference configuration x = X + p for the material model. Therefore, we calculate - // du/dx = du/dX * dX/dx = du/dX * (dx/dX)^-1 = du/dX * (I + dp/dX)^-1 - - auto du_dx = dot(du_dX, inv_I_plus_dp_dX); - - auto [heat_capacity, heat_flux] = material(x + p, u, du_dx, params...); - - // Note that the return is integrated in the perturbed reference - // configuration, hence the det(I + dp_dx) = det(dx/dX) - return serac::tuple{heat_capacity * du_dt * det_I_plus_dp_dX, - -1.0 * dot(inv_I_plus_dp_dX, heat_flux) * det_I_plus_dp_dX}; - }, - mesh_); + ThermalMaterialIntegrand integrand(material); + residual_->AddDomainIntegral(Dimension{}, DependsOn<0, 1, 2, active_parameters + NUM_STATE_VARS...>{}, + integrand, mesh_); } /// @overload