diff --git a/CMakeLists.txt b/CMakeLists.txt index 6657266a5..5cdd182e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,11 @@ if(ENABLE_CUDA AND ${CMAKE_VERSION} VERSION_LESS 3.18.0) message(FATAL_ERROR "Serac requires CMake version 3.18.0+ when CUDA is enabled.") endif() +# N.B. leave compilers unspecified when configuring CMake to ensure that +# the clang/clang++ binaries appropriate for enzyme are chosen +set(CMAKE_C_COMPILER "${LLVM_BIN_DIR}/clang") +set(CMAKE_CXX_COMPILER "${LLVM_BIN_DIR}/clang++") + project(serac LANGUAGES CXX C) # MPI is required in Serac. diff --git a/cmake/thirdparty/SetupSeracThirdParty.cmake b/cmake/thirdparty/SetupSeracThirdParty.cmake index a0109a3df..532541a3d 100644 --- a/cmake/thirdparty/SetupSeracThirdParty.cmake +++ b/cmake/thirdparty/SetupSeracThirdParty.cmake @@ -559,3 +559,20 @@ if (NOT SERAC_THIRD_PARTY_LIBRARIES_FOUND) endforeach() endif() endif() + + +#------------------------------------------------------------------------------ +# Enzyme +#------------------------------------------------------------------------------ +if (FALSE) +include(FetchContent) + +FetchContent_Declare( + enzyme + URL https://github.com/EnzymeAD/Enzyme/archive/refs/tags/v0.0.110.tar.gz + SOURCE_SUBDIR enzyme +) +FetchContent_MakeAvailable(enzyme) +else() +add_subdirectory("/home/sam/code/Enzyme/official/enzyme" ${PROJECT_BINARY_DIR}/tmp/enzyme) +endif () diff --git a/src/serac/numerics/functional/CMakeLists.txt b/src/serac/numerics/functional/CMakeLists.txt index b7edf3741..d60bd3f41 100644 --- a/src/serac/numerics/functional/CMakeLists.txt +++ b/src/serac/numerics/functional/CMakeLists.txt @@ -69,8 +69,15 @@ blt_add_library( HEADERS ${functional_headers} ${functional_detail_headers} SOURCES ${functional_sources} DEPENDS_ON ${functional_depends} - ) +) +# without this, I get +# "error while loading shared libraries: libomp.so: cannot open +# shared object file: No such file or directory" +target_link_libraries(serac_functional PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(serac_functional PUBLIC ClangEnzymeFlags) + +target_compile_options(serac_functional PUBLIC -mllvm -enzyme-loose-types) install(FILES ${functional_headers} DESTINATION include/serac/numerics/functional ) install(FILES ${functional_detail_headers} DESTINATION include/serac/numerics/functional/detail ) diff --git a/src/serac/numerics/functional/detail/triangle_H1.inl b/src/serac/numerics/functional/detail/triangle_H1.inl index 37c1600fa..fe3bda06d 100644 --- a/src/serac/numerics/functional/detail/triangle_H1.inl +++ b/src/serac/numerics/functional/detail/triangle_H1.inl @@ -240,7 +240,7 @@ struct finite_element > { } template - static auto batch_apply_shape_fn(int j, tensor input, const TensorProductQuadratureRule&) + static auto batch_apply_shape_fn(int j, tensor input, const TensorProductQuadratureRule&) { using source_t = decltype(get<0>(get<0>(in_t{})) + dot(get<1>(get<0>(in_t{})), tensor{})); using flux_t = decltype(get<0>(get<1>(in_t{})) + dot(get<1>(get<1>(in_t{})), tensor{})); diff --git a/src/serac/numerics/functional/domain_integral_kernels.hpp b/src/serac/numerics/functional/domain_integral_kernels.hpp index 9f13cd42f..ba7754b85 100644 --- a/src/serac/numerics/functional/domain_integral_kernels.hpp +++ b/src/serac/numerics/functional/domain_integral_kernels.hpp @@ -9,6 +9,7 @@ #include "serac/numerics/functional/quadrature_data.hpp" #include "serac/numerics/functional/function_signature.hpp" #include "serac/numerics/functional/differentiate_wrt.hpp" +#include "serac/numerics/functional/enzyme_wrapper.hpp" #include #include @@ -90,24 +91,36 @@ template & arg_tuple) { - return apply_qf_helper(qf, t, x_q, qpt_data, arg_tuple, - std::make_integer_sequence(sizeof...(T))>{}); + return apply_qf_helper(qf, t, x_q, qpt_data, arg_tuple, std::make_integer_sequence(sizeof...(T))>{}); } template auto get_derivative_type(lambda qf, qpt_data_type&& qpt_data) { using qf_arguments = serac::tuple >::type...>; - return get_gradient(apply_qf(qf, double{}, serac::tuple, tensor >{}, qpt_data, - make_dual_wrt(qf_arguments{}))); + using output_type = decltype(apply_qf(qf, double{}, serac::tuple, tensor >{}, qpt_data, qf_arguments{})); + return typename impl::nested< output_type, decltype(type(qf_arguments{})) >::type{}; }; -template + +template +SERAC_HOST_DEVICE auto parent_to_physical(const tuple& qf_input, const tensor & invJ) { + return tuple{get<0>(qf_input), dot(get<1>(qf_input), invJ)}; +} + +template +SERAC_HOST_DEVICE auto physical_to_parent(const tuple& qf_output, const tensor & invJ, double detJ) { + // assumes family == Family::H1 for now + return tuple{get<0>(qf_output) * detJ, dot(get<1>(qf_output), transpose(invJ)) * detJ}; +} + +template < typename lambda, int dim, int n, typename... T> SERAC_HOST_DEVICE auto batch_apply_qf_no_qdata(lambda qf, double t, const tensor& x, const tensor& J, const T&... inputs) { using position_t = serac::tuple, tensor >; using return_type = decltype(qf(double{}, position_t{}, T{}[0]...)); + tensor outputs{}; for (int i = 0; i < n; i++) { tensor x_q; @@ -118,11 +131,39 @@ SERAC_HOST_DEVICE auto batch_apply_qf_no_qdata(lambda qf, double t, const tensor } x_q[j] = x(j, i); } - outputs[i] = qf(t, serac::tuple{x_q, J_q}, inputs[i]...); + double detJ_q = det(J_q); + tensor invJ_q = inv(J_q); + auto qf_output = qf(t, serac::tuple{x_q, J_q}, parent_to_physical(inputs[i], invJ_q) ...); + outputs[i] = physical_to_parent(qf_output, invJ_q, detJ_q); } return outputs; } +template < uint32_t differentiation_index, typename derivative_type, typename lambda, int dim, int n, typename... T> +SERAC_HOST_DEVICE auto batch_apply_qf_derivative(derivative_type * doutputs, lambda qf, double t, const tensor& x, + const tensor& J, const tensor &... inputs) +{ + for (int i = 0; i < n; i++) { + tensor x_q; + tensor J_q; + for (int j = 0; j < dim; j++) { + for (int k = 0; k < dim; k++) { + J_q[j][k] = J(k, j, i); + } + x_q[j] = x(j, i); + } + double detJ_q = det(J_q); + tensor invJ_q = inv(J_q); + auto func = [&](const auto & ... input){ + return physical_to_parent(qf(t, serac::tuple{x_q, J_q}, parent_to_physical(input, invJ_q) ...), invJ_q, detJ_q); + }; + + doutputs[i] = jacfwd(func, inputs[i]...); + } + + return doutputs; +} + template SERAC_HOST_DEVICE auto batch_apply_qf(lambda qf, double t, const tensor& x, const tensor& J, qpt_data_type* qpt_data, bool update_state, @@ -153,7 +194,9 @@ template void evaluation_kernel_impl(trial_element_tuple trial_elements, test_element, double t, - const std::vector& inputs, double* outputs, const double* positions, + const std::vector& inputs_e, + double* outputs, + const double* positions, const double* jacobians, lambda_type qf, [[maybe_unused]] axom::ArrayView qf_state, [[maybe_unused]] derivative_type* qf_derivatives, const int* elements, @@ -169,7 +212,7 @@ void evaluation_kernel_impl(trial_element_tuple trial_elements, test_element, do [[maybe_unused]] auto qpts_per_elem = num_quadrature_points(geom, Q); [[maybe_unused]] tuple u = { - reinterpret_cast(trial_elements))::dof_type*>(inputs[indices])...}; + reinterpret_cast(trial_elements))::dof_type*>(inputs_e[indices])...}; // for each element in the domain for (uint32_t e = 0; e < num_elements; ++e) { @@ -179,37 +222,21 @@ void evaluation_kernel_impl(trial_element_tuple trial_elements, test_element, do //[[maybe_unused]] static constexpr trial_element_tuple trial_element_tuple{}; // batch-calculate values / derivatives of each trial space, at each quadrature point - [[maybe_unused]] tuple qf_inputs = {promote_each_to_dual_when( - get(trial_elements).interpolate(get(u)[elements[e]], rule))...}; - - // use J_e to transform values / derivatives on the parent element - // to the to the corresponding values / derivatives on the physical element - (parent_to_physical(trial_elements).family>(get(qf_inputs), J_e), ...); + tuple qf_inputs = {get(trial_elements).interpolate(get(u)[elements[e]], rule)...}; // (batch) evalute the q-function at each quadrature point // // note: the weird immediately-invoked lambda expression is // a workaround for a bug in GCC(<12.0) where it fails to // decide which function overload to use, and crashes - auto qf_outputs = [&]() { - if constexpr (std::is_same_v) { - return batch_apply_qf_no_qdata(qf, t, x_e, J_e, get(qf_inputs)...); - } else { - return batch_apply_qf(qf, t, x_e, J_e, &qf_state(e, 0), update_state, get(qf_inputs)...); - } - }(); - - // use J to transform sources / fluxes on the physical element - // back to the corresponding sources / fluxes on the parent element - physical_to_parent(qf_outputs, J_e); + // TODO: reenable internal_variables + auto qf_outputs = batch_apply_qf_no_qdata(qf, t, x_e, J_e, get(qf_inputs)...); // write out the q-function derivatives after applying the // physical_to_parent transformation, so that those transformations // won't need to be applied in the action_of_gradient and element_gradient kernels if constexpr (differentiation_index != serac::NO_DIFFERENTIATION) { - for (int q = 0; q < leading_dimension(qf_outputs); q++) { - qf_derivatives[e * uint32_t(qpts_per_elem) + uint32_t(q)] = get_gradient(qf_outputs[q]); - } + batch_apply_qf_derivative(qf_derivatives + e * uint32_t(qpts_per_elem), qf, t, x_e, J_e, get(qf_inputs)...); } // (batch) integrate the material response against the test-space basis functions @@ -229,6 +256,17 @@ SERAC_HOST_DEVICE auto chain_rule(const S& dfdx, const T& dx) } if constexpr (!is_QOI) { +#if 0 + +serac::tuple< + serac::tuple< serac::tensor, 2>, serac::tensor, 2> >, + serac::tuple< serac::tensor, 2, 2>, serac::tensor, 2, 2>> +>, +serac::tuple< serac::tensor, serac::tensor > + +#endif + + return serac::tuple{serac::chain_rule(serac::get<0>(serac::get<0>(dfdx)), serac::get<0>(dx)) + serac::chain_rule(serac::get<1>(serac::get<0>(dfdx)), serac::get<1>(dx)), serac::chain_rule(serac::get<0>(serac::get<1>(dfdx)), serac::get<0>(dx)) + diff --git a/src/serac/numerics/functional/domain_integral_kernels_old.hpp b/src/serac/numerics/functional/domain_integral_kernels_old.hpp new file mode 100644 index 000000000..9f13cd42f --- /dev/null +++ b/src/serac/numerics/functional/domain_integral_kernels_old.hpp @@ -0,0 +1,402 @@ +// Copyright (c) 2019-2024, Lawrence Livermore National Security, LLC and +// other Serac Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +#pragma once + +#include "serac/infrastructure/accelerator.hpp" +#include "serac/numerics/functional/quadrature_data.hpp" +#include "serac/numerics/functional/function_signature.hpp" +#include "serac/numerics/functional/differentiate_wrt.hpp" + +#include +#include +#include +#include + +namespace serac { + +namespace domain_integral { + +/** + * @tparam space the user-specified trial space + * @tparam dimension describes whether the problem is 1D, 2D, or 3D + * + * @brief a struct used to encode what type of arguments will be passed to a domain integral q-function, for the given + * trial space + */ +template +SERAC_HOST_DEVICE struct QFunctionArgument; + +/// @overload +template +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { + using type = tuple >; ///< what will be passed to the q-function +}; + +/// @overload +template +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { + using type = tuple, tensor >; ///< what will be passed to the q-function +}; + +/// @overload +template +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { + using type = tuple >; ///< what will be passed to the q-function +}; +/// @overload +template +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension > { + using type = tuple, tensor >; ///< what will be passed to the q-function +}; + +/// @overload +template +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension<2> > { + using type = tuple, double>; ///< what will be passed to the q-function +}; + +/// @overload +template +SERAC_HOST_DEVICE struct QFunctionArgument, Dimension<3> > { + using type = tuple, tensor >; ///< what will be passed to the q-function +}; + +/// @brief layer of indirection needed to unpack the entries of the argument tuple +SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING +template +SERAC_HOST_DEVICE auto apply_qf_helper(lambda&& qf, double t, coords_type&& x_q, qpt_data_type&& qpt_data, + const T& arg_tuple, std::integer_sequence) +{ + if constexpr (std::is_same::type, Nothing>::value) { + return qf(t, x_q, serac::get(arg_tuple)...); + } else { + return qf(t, x_q, qpt_data, serac::get(arg_tuple)...); + } +} + +/** + * @brief Actually calls the q-function + * This is an indirection layer to provide a transparent call site usage regardless of whether + * quadrature point (state) information is required + * @param[in] qf The quadrature function functor object + * @param[in] x_q The physical coordinates of the quadrature point + * @param[in] arg_tuple The values and derivatives at the quadrature point, as a dual + * @param[inout] qpt_data The state information at the quadrature point + */ +template +SERAC_HOST_DEVICE auto apply_qf(lambda&& qf, double t, coords_type&& x_q, qpt_data_type&& qpt_data, + const serac::tuple& arg_tuple) +{ + return apply_qf_helper(qf, t, x_q, qpt_data, arg_tuple, + std::make_integer_sequence(sizeof...(T))>{}); +} + +template +auto get_derivative_type(lambda qf, qpt_data_type&& qpt_data) +{ + using qf_arguments = serac::tuple >::type...>; + return get_gradient(apply_qf(qf, double{}, serac::tuple, tensor >{}, qpt_data, + make_dual_wrt(qf_arguments{}))); +}; + +template +SERAC_HOST_DEVICE auto batch_apply_qf_no_qdata(lambda qf, double t, const tensor& x, + const tensor& J, const T&... inputs) +{ + using position_t = serac::tuple, tensor >; + using return_type = decltype(qf(double{}, position_t{}, T{}[0]...)); + tensor outputs{}; + for (int i = 0; i < n; i++) { + tensor x_q; + tensor J_q; + for (int j = 0; j < dim; j++) { + for (int k = 0; k < dim; k++) { + J_q[j][k] = J(k, j, i); + } + x_q[j] = x(j, i); + } + outputs[i] = qf(t, serac::tuple{x_q, J_q}, inputs[i]...); + } + return outputs; +} + +template +SERAC_HOST_DEVICE auto batch_apply_qf(lambda qf, double t, const tensor& x, + const tensor& J, qpt_data_type* qpt_data, bool update_state, + const T&... inputs) +{ + using position_t = serac::tuple, tensor >; + using return_type = decltype(qf(double{}, position_t{}, qpt_data[0], T{}[0]...)); + tensor outputs{}; + for (int i = 0; i < n; i++) { + tensor x_q; + tensor J_q; + for (int j = 0; j < dim; j++) { + for (int k = 0; k < dim; k++) { + J_q[j][k] = J(k, j, i); + } + x_q[j] = x(j, i); + } + auto qdata = qpt_data[i]; + outputs[i] = qf(t, serac::tuple{x_q, J_q}, qdata, inputs[i]...); + if (update_state) { + qpt_data[i] = qdata; + } + } + return outputs; +} + +template +void evaluation_kernel_impl(trial_element_tuple trial_elements, test_element, double t, + const std::vector& inputs, double* outputs, const double* positions, + const double* jacobians, lambda_type qf, + [[maybe_unused]] axom::ArrayView qf_state, + [[maybe_unused]] derivative_type* qf_derivatives, const int* elements, + uint32_t num_elements, bool update_state, camp::int_seq) +{ + // mfem provides this information as opaque arrays of doubles, + // so we reinterpret the pointer with + auto r = reinterpret_cast(outputs); + auto x = reinterpret_cast::type*>(positions); + auto J = reinterpret_cast::type*>(jacobians); + TensorProductQuadratureRule rule{}; + + [[maybe_unused]] auto qpts_per_elem = num_quadrature_points(geom, Q); + + [[maybe_unused]] tuple u = { + reinterpret_cast(trial_elements))::dof_type*>(inputs[indices])...}; + + // for each element in the domain + for (uint32_t e = 0; e < num_elements; ++e) { + // load the jacobians and positions for each quadrature point in this element + auto J_e = J[e]; + auto x_e = x[e]; + + //[[maybe_unused]] static constexpr trial_element_tuple trial_element_tuple{}; + // batch-calculate values / derivatives of each trial space, at each quadrature point + [[maybe_unused]] tuple qf_inputs = {promote_each_to_dual_when( + get(trial_elements).interpolate(get(u)[elements[e]], rule))...}; + + // use J_e to transform values / derivatives on the parent element + // to the to the corresponding values / derivatives on the physical element + (parent_to_physical(trial_elements).family>(get(qf_inputs), J_e), ...); + + // (batch) evalute the q-function at each quadrature point + // + // note: the weird immediately-invoked lambda expression is + // a workaround for a bug in GCC(<12.0) where it fails to + // decide which function overload to use, and crashes + auto qf_outputs = [&]() { + if constexpr (std::is_same_v) { + return batch_apply_qf_no_qdata(qf, t, x_e, J_e, get(qf_inputs)...); + } else { + return batch_apply_qf(qf, t, x_e, J_e, &qf_state(e, 0), update_state, get(qf_inputs)...); + } + }(); + + // use J to transform sources / fluxes on the physical element + // back to the corresponding sources / fluxes on the parent element + physical_to_parent(qf_outputs, J_e); + + // write out the q-function derivatives after applying the + // physical_to_parent transformation, so that those transformations + // won't need to be applied in the action_of_gradient and element_gradient kernels + if constexpr (differentiation_index != serac::NO_DIFFERENTIATION) { + for (int q = 0; q < leading_dimension(qf_outputs); q++) { + qf_derivatives[e * uint32_t(qpts_per_elem) + uint32_t(q)] = get_gradient(qf_outputs[q]); + } + } + + // (batch) integrate the material response against the test-space basis functions + test_element::integrate(get_value(qf_outputs), rule, &r[elements[e]]); + } + + return; +} + +//clang-format off +template +SERAC_HOST_DEVICE auto chain_rule(const S& dfdx, const T& dx) +{ + if constexpr (is_QOI) { + return serac::chain_rule(serac::get<0>(dfdx), serac::get<0>(dx)) + + serac::chain_rule(serac::get<1>(dfdx), serac::get<1>(dx)); + } + + if constexpr (!is_QOI) { + return serac::tuple{serac::chain_rule(serac::get<0>(serac::get<0>(dfdx)), serac::get<0>(dx)) + + serac::chain_rule(serac::get<1>(serac::get<0>(dfdx)), serac::get<1>(dx)), + serac::chain_rule(serac::get<0>(serac::get<1>(dfdx)), serac::get<0>(dx)) + + serac::chain_rule(serac::get<1>(serac::get<1>(dfdx)), serac::get<1>(dx))}; + } +} +//clang-format on + +template +SERAC_HOST_DEVICE auto batch_apply_chain_rule(derivative_type* qf_derivatives, const tensor& inputs) +{ + using return_type = decltype(chain_rule(derivative_type{}, T{})); + tensor outputs{}; + for (int i = 0; i < n; i++) { + outputs[i] = chain_rule(qf_derivatives[i], inputs[i]); + } + return outputs; +} + +/** + * @brief The base kernel template used to create create custom directional derivative + * kernels associated with finite element calculations + * + * @tparam test The type of the test function space + * @tparam trial The type of the trial function space + * The above spaces can be any combination of {H1, Hcurl, Hdiv (TODO), L2 (TODO), QOI} + * + * Template parameters other than the test and trial spaces are used for customization + optimization + * and are erased through the @p std::function members of @p DomainIntegral + * @tparam g The shape of the element (only quadrilateral and hexahedron are supported at present) + * @tparam Q parameter describing number of quadrature points (see num_quadrature_points() function for more details) + * @tparam derivatives_type Type representing the derivative of the q-function w.r.t. its input arguments + * + * @note lambda does not appear as a template argument, as the directional derivative is + * inherently just a linear transformation + * + * @param[in] dU The full set of per-element DOF values (primary input) + * @param[inout] dR The full set of per-element residuals (primary output) + * @param[in] derivatives_ptr The address at which derivatives of the q-function with + * respect to its arguments are stored + * @param[in] J_ The Jacobians of the element transformations at all quadrature points + * @see mfem::GeometricFactors + * @param[in] num_elements The number of elements in the mesh + */ + +template +void action_of_gradient_kernel(const double* dU, double* dR, derivatives_type* qf_derivatives, const int* elements, + std::size_t num_elements) +{ + using test_element = finite_element; + using trial_element = finite_element; + + constexpr bool is_QOI = (test::family == Family::QOI); + constexpr int num_qpts = num_quadrature_points(g, Q); + + // mfem provides this information in 1D arrays, so we reshape it + // into strided multidimensional arrays before using + auto du = reinterpret_cast(dU); + auto dr = reinterpret_cast(dR); + constexpr TensorProductQuadratureRule rule{}; + + // for each element in the domain + for (uint32_t e = 0; e < num_elements; e++) { + // (batch) interpolate each quadrature point's value + auto qf_inputs = trial_element::interpolate(du[elements[e]], rule); + + // (batch) evalute the q-function at each quadrature point + auto qf_outputs = batch_apply_chain_rule(qf_derivatives + e * num_qpts, qf_inputs); + + // (batch) integrate the material response against the test-space basis functions + test_element::integrate(qf_outputs, rule, &dr[elements[e]]); + } +} + +/** + * @brief The base kernel template used to compute tangent element entries that can be assembled + * into a tangent matrix + * + * @tparam test The type of the test function space + * @tparam trial The type of the trial function space + * The above spaces can be any combination of {H1, Hcurl, Hdiv (TODO), L2 (TODO), QOI} + * + * Template parameters other than the test and trial spaces are used for customization + optimization + * and are erased through the @p std::function members of @p Integral + * @tparam g The shape of the element (only quadrilateral and hexahedron are supported at present) + * @tparam Q parameter describing number of quadrature points (see num_quadrature_points() function for more details) + * @tparam derivatives_type Type representing the derivative of the q-function w.r.t. its input arguments + * + * + * @param[inout] dk 3-dimensional array storing the element gradient matrices + * @param[in] derivatives_ptr pointer to data describing the derivatives of the q-function with respect to its arguments + * @param[in] J_ The Jacobians of the element transformations at all quadrature points + * @see mfem::GeometricFactors + * @param[in] num_elements The number of elements in the mesh + */ +template +void element_gradient_kernel(ExecArrayView dK, derivatives_type* qf_derivatives, + const int* elements, std::size_t num_elements) +{ + // quantities of interest have no flux term, so we pad the derivative + // tuple with a "zero" type in the second position to treat it like the standard case + constexpr bool is_QOI = test::family == Family::QOI; + using padded_derivative_type = std::conditional_t, derivatives_type>; + + using test_element = finite_element; + using trial_element = finite_element; + + constexpr int nquad = num_quadrature_points(g, Q); + + static constexpr TensorProductQuadratureRule rule{}; + + // for each element in the domain + for (uint32_t e = 0; e < num_elements; e++) { + auto* output_ptr = reinterpret_cast(&dK(elements[e], 0, 0)); + + tensor derivatives{}; + for (int q = 0; q < nquad; q++) { + if constexpr (is_QOI) { + get<0>(derivatives(q)) = qf_derivatives[e * nquad + uint32_t(q)]; + } else { + derivatives(q) = qf_derivatives[e * nquad + uint32_t(q)]; + } + } + + for (int J = 0; J < trial_element::ndof; J++) { + auto source_and_flux = trial_element::batch_apply_shape_fn(J, derivatives, rule); + test_element::integrate(source_and_flux, rule, output_ptr + J, trial_element::ndof); + } + } +} + +template +auto evaluation_kernel(signature s, lambda_type qf, const double* positions, const double* jacobians, + std::shared_ptr > qf_state, + std::shared_ptr qf_derivatives, const int* elements, uint32_t num_elements) +{ + auto trial_elements = trial_elements_tuple(s); + auto test_element = get_test_element(s); + return [=](double time, const std::vector& inputs, double* outputs, bool update_state) { + domain_integral::evaluation_kernel_impl( + trial_elements, test_element, time, inputs, outputs, positions, jacobians, qf, (*qf_state)[geom], + qf_derivatives.get(), elements, num_elements, update_state, s.index_seq); + }; +} + +template +std::function jacobian_vector_product_kernel( + signature, std::shared_ptr qf_derivatives, const int* elements, uint32_t num_elements) +{ + return [=](const double* du, double* dr) { + using test_space = typename signature::return_type; + using trial_space = typename std::tuple_element::type; + action_of_gradient_kernel(du, dr, qf_derivatives.get(), elements, num_elements); + }; +} + +template +std::function)> element_gradient_kernel( + signature, std::shared_ptr qf_derivatives, const int* elements, uint32_t num_elements) +{ + return [=](ExecArrayView K_elem) { + using test_space = typename signature::return_type; + using trial_space = typename std::tuple_element::type; + element_gradient_kernel(K_elem, qf_derivatives.get(), elements, num_elements); + }; +} + +} // namespace domain_integral + +} // namespace serac diff --git a/src/serac/numerics/functional/enzyme_declarations.hpp b/src/serac/numerics/functional/enzyme_declarations.hpp new file mode 100644 index 000000000..ef6ba8cf6 --- /dev/null +++ b/src/serac/numerics/functional/enzyme_declarations.hpp @@ -0,0 +1,12 @@ +#pragma once + +int enzyme_dup; +int enzyme_dupnoneed; +int enzyme_out; +int enzyme_const; + +template < typename return_type, typename ... T > +return_type __enzyme_fwddiff(void*, T ... ); + +template < typename return_type, typename ... T > +return_type __enzyme_autodiff(void*, T ... ); diff --git a/src/serac/numerics/functional/enzyme_wrapper.hpp b/src/serac/numerics/functional/enzyme_wrapper.hpp new file mode 100644 index 000000000..8ea92c9b7 --- /dev/null +++ b/src/serac/numerics/functional/enzyme_wrapper.hpp @@ -0,0 +1,313 @@ +#pragma once + +#include + +#include "serac/numerics/functional/tuple.hpp" +#include "serac/numerics/functional/tensor.hpp" + +namespace serac { + +namespace impl { + + constexpr int vsize(const zero &) { return 0; } + + constexpr int vsize(const double &) { return 1; } + + template < int n > + constexpr int vsize(const tensor< double, n > &) { return n; } + + template < int m, int n > + constexpr int vsize(const tensor< double, m, n > &) { return m * n; } + + template < typename T0, typename T1 > + constexpr int vsize(const tuple< T0, T1 > &) { return vsize(T0{}) + vsize(T1{}); } + + template < typename T0, typename T1 > + struct nested; + + template <> + struct nested< double, double >{ using type = double; }; + + template < typename T > + struct nested< zero, T >{ using type = zero; }; + + template < int ... n > + struct nested< double, tensor >{ using type = tensor; }; + + template < typename T0, typename T1 > + struct nested< double, tuple >{ using type = tuple; }; + +//////////////////////////////////////////////////////////////////////////////// + + template < int ... n > + struct nested< tensor, double >{ using type = tensor; }; + + template < int ... m, int ... n > + struct nested< tensor, tensor >{ using type = tensor; }; + +//////////////////////////////////////////////////////////////////////////////// + + // sam: with tuples-of-tuples, figuring out which indices correspond to the directional + // derivatives gets pretty nasty, so I'm commenting these out to prevent jacfwd from + // compiling in that case + // + template < typename S0, typename S1, typename T > + struct nested< tuple< S0, S1 >, T >{ + using type = tuple< typename nested::type, typename nested::type >; + }; + + template < typename S0, typename S1, typename T0, typename T1 > + struct nested< tuple< S0, S1 >, tuple< T0, T1 > >{ + using type = tuple< + tuple< typename nested::type, typename nested::type >, + tuple< typename nested::type, typename nested::type > + >; + }; + +//////////////////////////////////////////////////////////////////////////////// + + template< typename output_type, typename function, typename ... arg_types > + void wrapper(output_type & output, const function & f, const arg_types & ... args) { + output = f(args...); + } + +//////////////////////////////////////////////////////////////////////////////// + + template < typename function, typename input_type > + __attribute__((always_inline)) + auto jvp(const function & f, const input_type & x) { + using output_type = decltype(f(x)); + void * func_ptr = reinterpret_cast(wrapper< output_type, function, input_type >); + return [=](const input_type & dx) { + output_type unused{}; + output_type df{}; + __enzyme_fwddiff(func_ptr, + enzyme_dupnoneed, &unused, &df, + enzyme_const, reinterpret_cast(&f), + enzyme_dup, &x, &dx + ); + return df; + }; + } + +//////////////////////////////////////////////////////////////////////////////// + + template < typename function, typename input_type > + __attribute__((always_inline)) + auto jacfwd(const function & f, const input_type & x) { + using output_type = decltype(f(x)); + using jac_type = typename impl::nested::type; + void * func_ptr = reinterpret_cast(wrapper< output_type, function, input_type >); + + jac_type J{}; + input_type dx{}; + + double * dx_ptr = reinterpret_cast(&dx); + + // tuple input and tuple output + if constexpr (is_tuple{} && is_tuple{}) { + + static_assert((tuple_size{} == 2) && (tuple_size{} == 2), "error: jacfwd() currently only supports tuples of 2 values"); + + constexpr int m0 = impl::vsize(get<0>(output_type{})); + constexpr int m1 = impl::vsize(get<1>(output_type{})); + constexpr int n0 = impl::vsize(get<0>(input_type{})); + constexpr int n1 = impl::vsize(get<1>(input_type{})); + + for (int j = 0; j < (n0 + n1); j++) { + dx_ptr[j] = 1.0; + + //std::cout << dx << std::endl; + + output_type unused{}; + output_type df_dxj{}; + __enzyme_fwddiff(func_ptr, + enzyme_dupnoneed, &unused, &df_dxj, + enzyme_const, reinterpret_cast(&f), + enzyme_dup, &x, &dx + ); + + //std::cout << df_dxj << std::endl; + + double * df0_dxj_ptr = reinterpret_cast(&get<0>(df_dxj)); + double * df1_dxj_ptr = reinterpret_cast(&get<1>(df_dxj)); + + if (j < n0) { + int j0 = j; + double * J00_ptr = reinterpret_cast(&get<0>(get<0>(J))); + double * J10_ptr = reinterpret_cast(&get<0>(get<1>(J))); + for (int i0 = 0; i0 < m0; i0++) { + J00_ptr[i0 * n0 + j0] = df0_dxj_ptr[i0]; + } + + for (int i1 = 0; i1 < m1; i1++) { + J10_ptr[i1 * n0 + j0] = df1_dxj_ptr[i1]; + } + } else { + int j1 = j - n0; + double * J01_ptr = reinterpret_cast(&get<1>(get<0>(J))); + double * J11_ptr = reinterpret_cast(&get<1>(get<1>(J))); + for (int i0 = 0; i0 < m0; i0++) { + J01_ptr[i0 * n1 + j1] = df0_dxj_ptr[i0]; + } + + for (int i1 = 0; i1 < m1; i1++) { + J11_ptr[i1 * n1 + j1] = df1_dxj_ptr[i1]; + } + } + + //std::cout << J << std::endl; + + dx_ptr[j] = 0.0; + } + + return J; + + } else { + + constexpr int m = impl::vsize(output_type{}); + constexpr int n = impl::vsize(input_type{}); + + double * J_ptr = reinterpret_cast(&J); + + for (int j = 0; j < n; j++) { + dx_ptr[j] = 1.0; + + //std::cout << dx << std::endl; + + output_type unused{}; + output_type df_dxj{}; + __enzyme_fwddiff(func_ptr, + enzyme_dupnoneed, &unused, &df_dxj, + enzyme_const, reinterpret_cast(&f), + enzyme_dup, &x, &dx + ); + + //std::cout << df_dxj << std::endl; + + double * df_dxj_ptr = reinterpret_cast(&df_dxj); + for (int i = 0; i < m; i++) { + J_ptr[i * n + j] = df_dxj_ptr[i]; + } + + //std::cout << J << std::endl; + + dx_ptr[j] = 0.0; + } + + return J; + + } + + + } + +} + +//////////////////////////////////////////////////////////////////////////////// + +template < int i, typename function, typename T0 > +__attribute__((always_inline)) +auto jvp(const function & f, const T0 & arg0) { + if constexpr (i == 0) { return impl::jvp(f, arg0); } +} + +template < int i, typename function, typename T0, typename T1 > +__attribute__((always_inline)) +auto jvp(const function & f, const T0 & arg0, const T1 & arg1) { + if constexpr (i == 0) { return impl::jvp([&](T0 x){ return f(x, arg1); }, arg0); } + if constexpr (i == 1) { return impl::jvp([&](T1 x){ return f(arg0, x); }, arg1); } +} + +template < int i, typename function, typename T0, typename T1, typename T2 > +__attribute__((always_inline)) +auto jvp(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2) { + if constexpr (i == 0) { return impl::jvp([&](const T0 & x){ return f(x, arg1, arg2); }, arg0); } + if constexpr (i == 1) { return impl::jvp([&](const T1 & x){ return f(arg0, x, arg2); }, arg1); } + if constexpr (i == 2) { return impl::jvp([&](const T2 & x){ return f(arg0, arg1, x); }, arg2); } +} + +template < int i, typename function, typename T0, typename T1, typename T2, typename T3 > +__attribute__((always_inline)) +auto jvp(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2, const T3 & arg3) { + if constexpr (i == 0) { return impl::jvp([&](const T0 & x){ return f(x, arg1, arg2, arg3); }, arg0); } + if constexpr (i == 1) { return impl::jvp([&](const T1 & x){ return f(arg0, x, arg2, arg3); }, arg1); } + if constexpr (i == 2) { return impl::jvp([&](const T2 & x){ return f(arg0, arg1, x, arg3); }, arg2); } + if constexpr (i == 3) { return impl::jvp([&](const T3 & x){ return f(arg0, arg1, arg2, x); }, arg3); } +} + +template < int i, typename function, typename T0, typename T1, typename T2, typename T3, typename T4 > +__attribute__((always_inline)) +auto jvp(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2, const T3 & arg3, const T4 & arg4) { + if constexpr (i == 0) { return impl::jvp([&](const T0 & x){ return f(x, arg1, arg2, arg3, arg4); }, arg0); } + if constexpr (i == 1) { return impl::jvp([&](const T1 & x){ return f(arg0, x, arg2, arg3, arg4); }, arg1); } + if constexpr (i == 2) { return impl::jvp([&](const T2 & x){ return f(arg0, arg1, x, arg3, arg4); }, arg2); } + if constexpr (i == 3) { return impl::jvp([&](const T3 & x){ return f(arg0, arg1, arg2, x, arg4); }, arg3); } + if constexpr (i == 4) { return impl::jvp([&](const T4 & x){ return f(arg0, arg1, arg2, arg3, x); }, arg4); } +} + +template < int i, typename function, typename T0, typename T1, typename T2, typename T3, typename T4, typename T5 > +__attribute__((always_inline)) +auto jvp(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2, const T3 & arg3, const T4 & arg4, const T5 & arg5) { + if constexpr (i == 0) { return impl::jvp([&](const T0 & x){ return f(x, arg1, arg2, arg3, arg4, arg5); }, arg0); } + if constexpr (i == 1) { return impl::jvp([&](const T1 & x){ return f(arg0, x, arg2, arg3, arg4, arg5); }, arg1); } + if constexpr (i == 2) { return impl::jvp([&](const T2 & x){ return f(arg0, arg1, x, arg3, arg4, arg5); }, arg2); } + if constexpr (i == 3) { return impl::jvp([&](const T3 & x){ return f(arg0, arg1, arg2, x, arg4, arg5); }, arg3); } + if constexpr (i == 4) { return impl::jvp([&](const T4 & x){ return f(arg0, arg1, arg2, arg3, x, arg5); }, arg4); } + if constexpr (i == 5) { return impl::jvp([&](const T5 & x){ return f(arg0, arg1, arg2, arg3, arg4, x); }, arg5); } +} + +//////////////////////////////////////////////////////////////////////////////// + +template < int i, typename function, typename T0 > +__attribute__((always_inline)) +auto jacfwd(const function & f, const T0 & arg0) { + if constexpr (i == 0) { return impl::jacfwd(f, arg0); } +} + +template < int i, typename function, typename T0, typename T1 > +__attribute__((always_inline)) +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1) { + if constexpr (i == 0) { return impl::jacfwd([&](T0 x){ return f(x, arg1); }, arg0); } + if constexpr (i == 1) { return impl::jacfwd([&](T1 x){ return f(arg0, x); }, arg1); } +} + +template < int i, typename function, typename T0, typename T1, typename T2 > +__attribute__((always_inline)) +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2) { + if constexpr (i == 0) { return impl::jacfwd([&](T0 x){ return f(x, arg1, arg2); }, arg0); } + if constexpr (i == 1) { return impl::jacfwd([&](T1 x){ return f(arg0, x, arg2); }, arg1); } + if constexpr (i == 2) { return impl::jacfwd([&](T2 x){ return f(arg0, arg1, x); }, arg2); } +} + +template < int i, typename function, typename T0, typename T1, typename T2, typename T3 > +__attribute__((always_inline)) +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2, const T3 & arg3) { + if constexpr (i == 0) { return impl::jacfwd([&](T0 x){ return f(x, arg1, arg2, arg3); }, arg0); } + if constexpr (i == 1) { return impl::jacfwd([&](T1 x){ return f(arg0, x, arg2, arg3); }, arg1); } + if constexpr (i == 2) { return impl::jacfwd([&](T2 x){ return f(arg0, arg1, x, arg3); }, arg2); } + if constexpr (i == 3) { return impl::jacfwd([&](T3 x){ return f(arg0, arg1, arg2, x); }, arg3); } +} + +template < int i, typename function, typename T0, typename T1, typename T2, typename T3, typename T4 > +__attribute__((always_inline)) +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2, const T3 & arg3, const T4 & arg4) { + if constexpr (i == 0) { return impl::jacfwd([&](T0 x){ return f(x, arg1, arg2, arg3, arg4); }, arg0); } + if constexpr (i == 1) { return impl::jacfwd([&](T1 x){ return f(arg0, x, arg2, arg3, arg4); }, arg1); } + if constexpr (i == 2) { return impl::jacfwd([&](T2 x){ return f(arg0, arg1, x, arg3, arg4); }, arg2); } + if constexpr (i == 3) { return impl::jacfwd([&](T3 x){ return f(arg0, arg1, arg2, x, arg4); }, arg3); } + if constexpr (i == 4) { return impl::jacfwd([&](T4 x){ return f(arg0, arg1, arg2, arg3, x); }, arg4); } +} + +template < int i, typename function, typename T0, typename T1, typename T2, typename T3, typename T4, typename T5 > +__attribute__((always_inline)) +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1, const T2 & arg2, const T3 & arg3, const T4 & arg4, const T5 & arg5) { + if constexpr (i == 0) { return impl::jacfwd([&](T0 x){ return f(x, arg1, arg2, arg3, arg4, arg5); }, arg0); } + if constexpr (i == 1) { return impl::jacfwd([&](T1 x){ return f(arg0, x, arg2, arg3, arg4, arg5); }, arg1); } + if constexpr (i == 2) { return impl::jacfwd([&](T2 x){ return f(arg0, arg1, x, arg3, arg4, arg5); }, arg2); } + if constexpr (i == 3) { return impl::jacfwd([&](T3 x){ return f(arg0, arg1, arg2, x, arg4, arg5); }, arg3); } + if constexpr (i == 4) { return impl::jacfwd([&](T4 x){ return f(arg0, arg1, arg2, arg3, x, arg5); }, arg4); } + if constexpr (i == 5) { return impl::jacfwd([&](T5 x){ return f(arg0, arg1, arg2, arg3, arg4, x); }, arg5); } +} + +} diff --git a/src/serac/numerics/functional/tensor.hpp b/src/serac/numerics/functional/tensor.hpp index b3733d20e..db4e9687c 100644 --- a/src/serac/numerics/functional/tensor.hpp +++ b/src/serac/numerics/functional/tensor.hpp @@ -1810,6 +1810,32 @@ SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor& df_dx, c return total; } +/** + * @overload + * @note for a vector-valued function of a tensor, the chain rule contracts over all indices of dx + */ +template +SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor< tensor< double, n ... >, m >& df_dx, const tensor& dx) +{ + tensor total{}; + for (int i = 0; i < m; i++) { + total[i] = chain_rule(df_dx[i], dx); + } + return total; +} + +template +SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor< tensor< double, n ... >, m1, m2 >& df_dx, const tensor& dx) +{ + tensor total{}; + for (int i1 = 0; i1 < m1; i1++) { + for (int i2 = 0; i2 < m2; i2++) { + total[i1][i2] = chain_rule(df_dx[i1][i2], dx); + } + } + return total; +} + /** * @overload * @note for a vector-valued function of a tensor, the chain rule contracts over all indices of dx diff --git a/src/serac/numerics/functional/tests/CMakeLists.txt b/src/serac/numerics/functional/tests/CMakeLists.txt index bb0f64633..aed5bf936 100644 --- a/src/serac/numerics/functional/tests/CMakeLists.txt +++ b/src/serac/numerics/functional/tests/CMakeLists.txt @@ -68,3 +68,23 @@ if(ENABLE_CUDA) DEPENDS_ON gtest serac_functional serac_state ${functional_depends}) endif() + +target_compile_options(functional_basic_h1_scalar PUBLIC -mllvm -enzyme-loose-types) + +add_executable(enzyme_test enzyme_test.cpp) +target_link_libraries(enzyme_test PUBLIC serac_functional) + +add_executable(enzyme_jvp_test enzyme_jvp_test.cpp) +target_link_libraries(enzyme_jvp_test PUBLIC serac_functional) + +add_executable(enzyme_test_tuples enzyme_test_tuples.cpp) +target_link_libraries(enzyme_test_tuples PUBLIC serac_functional) + +add_executable(enzyme_test_tuples_of_tuples enzyme_test_tuples_of_tuples.cpp) +target_link_libraries(enzyme_test_tuples_of_tuples PUBLIC serac_functional) + +add_executable(enzyme_reproducer enzyme_reproducer.cpp) +target_link_libraries(enzyme_reproducer PUBLIC serac_functional) + +add_executable(enzyme_reproducer_rbr enzyme_reproducer_rbr.cpp) +target_link_libraries(enzyme_reproducer_rbr PUBLIC serac_functional) diff --git a/src/serac/numerics/functional/tests/enzyme_jvp_perf_test.cxx b/src/serac/numerics/functional/tests/enzyme_jvp_perf_test.cxx new file mode 100644 index 000000000..eb77915c2 --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_jvp_perf_test.cxx @@ -0,0 +1,69 @@ +#include + +#include "serac/numerics/functional/tuple.hpp" +#include "serac/numerics/functional/tensor.hpp" +#include "serac/physics/materials/solid_material.hpp" +#include "serac/numerics/functional/enzyme_wrapper.hpp" + +using namespace serac; + +template < int i, typename material_model, typename ... arg_types > +auto precompute_gradient(material_model mat, const std::vector< arg_types > & ... args) { + using output_type = decltype(mat(args[0] ...)); + uint32_t n = + +} + +template < int i, typename material_model, typename ... arg_types, typename darg_type > +double jvp_precomputed_gradient( std::vector< arg_types > & ) { + +} + +int main() { + + auto f = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return tuple{dot(du_dx, z * u), z * (du_dx + transpose(du_dx)) - outer(u, u)}; + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto f_jvp0 = [](double z, + double dz, + const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return tuple{dot(du_dx, u) * dz, (du_dx + transpose(du_dx)) * dz}; + }; + + auto f_jvp1 = [](double z, + const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement, + const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & ddisplacement) { + auto [u, du_dx] = displacement; + auto [du, ddu_dx] = ddisplacement; + vec3 df1 = dot(du_dx, du) * z + dot(ddu_dx, u) * z; + mat3 df2 = outer(du, u) - outer(u, du) + (ddu_dx + transpose(ddu_dx)) * z; + return tuple{df1, df2}; + }; + +//////////////////////////////////////////////////////////////////////////////// + + double eps = 1.0e-6; + + double z = 3.0; + double dz = 1.4; + + auto displacement = tuple { + tensor{{1.0, 1.0, 1.0}}, + tensor{{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}} + }; + + auto ddisplacement = tuple { + tensor{{0.1, 0.8, -0.3}}, + tensor{{{0.2, -0.4, 0.2}, {0.1, 0.8, 0.5}, {0.3, 0.7, 1.8}}} + }; + + std::cout << "expected: " << f_jvp0(z, dz, displacement) << std::endl; + std::cout << "enzyme: " << jvp<0>(f, z, displacement)(dz) << std::endl; + std::cout << "finite_difference: " << ((f(z + eps * dz, displacement) - f(z, displacement)) / eps) << std::endl; + +} diff --git a/src/serac/numerics/functional/tests/enzyme_jvp_test.cpp b/src/serac/numerics/functional/tests/enzyme_jvp_test.cpp new file mode 100644 index 000000000..04204cfa4 --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_jvp_test.cpp @@ -0,0 +1,57 @@ +#include + +#include "serac/numerics/functional/dual.hpp" +#include "serac/numerics/functional/tuple.hpp" +#include "serac/numerics/functional/tensor.hpp" +#include "serac/numerics/functional/enzyme_wrapper.hpp" + +using namespace serac; + +int main() { + + auto f = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return tuple{dot(du_dx, z * u), z * (du_dx + transpose(du_dx)) - outer(u, u)}; + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto f_jvp0 = [](double z, + double dz, + const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return tuple{dot(du_dx, u) * dz, (du_dx + transpose(du_dx)) * dz}; + }; + + auto f_jvp1 = [](double z, + const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement, + const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & ddisplacement) { + auto [u, du_dx] = displacement; + auto [du, ddu_dx] = ddisplacement; + vec3 df1 = dot(du_dx, du) * z + dot(ddu_dx, u) * z; + mat3 df2 = outer(du, u) - outer(u, du) + (ddu_dx + transpose(ddu_dx)) * z; + return tuple{df1, df2}; + }; + +//////////////////////////////////////////////////////////////////////////////// + + double eps = 1.0e-6; + + double z = 3.0; + double dz = 1.4; + + auto displacement = tuple { + tensor{{1.0, 1.0, 1.0}}, + tensor{{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}} + }; + + auto ddisplacement = tuple { + tensor{{0.1, 0.8, -0.3}}, + tensor{{{0.2, -0.4, 0.2}, {0.1, 0.8, 0.5}, {0.3, 0.7, 1.8}}} + }; + + std::cout << "expected: " << f_jvp0(z, dz, displacement) << std::endl; + std::cout << "enzyme: " << jvp<0>(f, z, displacement)(dz) << std::endl; + std::cout << "finite_difference: " << ((f(z + eps * dz, displacement) - f(z, displacement)) / eps) << std::endl; + +} diff --git a/src/serac/numerics/functional/tests/enzyme_reproducer.cpp b/src/serac/numerics/functional/tests/enzyme_reproducer.cpp new file mode 100644 index 000000000..ba8155a5c --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_reproducer.cpp @@ -0,0 +1,142 @@ +#include +#include + +//////////////////////////////////////////////////////////////////////////////// + +template < typename T, int m, int n > +struct tensor { + T & operator()(int i, int j) { return values[i][j]; } + const T & operator()(int i, int j) const { return values[i][j]; } + T values[m][n]; +}; + +template < int m, int n > +tensor operator*(double scale, const tensor & A) { + tensor scaled{}; + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + scaled(i,j) = scale * A(i,j); + } + } + return scaled; +} + +template < typename T, int m, int n > +std::ostream& operator<<(std::ostream & out, const tensor & A) { + out << "{"; + for (int i = 0; i < m; i++) { + out << "{"; + for (int j = 0; j < n; j++) { + out << A(i,j); + if (j != n - 1) { out << ", "; } + } + out << "}"; + if (i != m - 1) { out << ", "; } + } + out << "}"; + return out; +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace impl { + + constexpr int vsize(const double &) { return 1; } + + template < int m, int n > + constexpr int vsize(const tensor< double, m, n > &) { return m * n; } + + template < int m, int n, int p, int q > + constexpr int vsize(const tensor< tensor< double, p, q >, m, n > &) { return m * n * p * q; } + + template < typename T1, typename T2 > + struct outer_prod; + + template < int m, int n > + struct outer_prod< double, tensor< double, m, n > >{ + using type = tensor; + }; + + template < int m, int n > + struct outer_prod< tensor< double, m, n >, double >{ + using type = tensor; + }; + + template < int m, int n, int p, int q > + struct outer_prod< tensor< double, m, n >, tensor< double, p, q > >{ + using type = tensor, m, n>; + }; +} + +//////////////////////////////////////////////////////////////////////////////// + +template< typename output_type, typename function, typename ... arg_types > +void wrapper(output_type & output, const function & f, const arg_types & ... args) { + output = f(args...); +} + +template < typename function, typename input_type > +auto jacfwd(const function & f, const input_type & x) { + using output_type = decltype(f(x)); + using jac_type = typename impl::outer_prod::type; + void * func_ptr = reinterpret_cast(wrapper< output_type, function, input_type >); + + constexpr int m = impl::vsize(output_type{}); + jac_type J{}; + double * J_ptr = reinterpret_cast(&J); + + constexpr int n = impl::vsize(input_type{}); + input_type dx{}; + double * dx_ptr = reinterpret_cast(&dx); + + for (int j = 0; j < n; j++) { + dx_ptr[j] = 1.0; + + output_type unused{}; + output_type df_dxj{}; + __enzyme_fwddiff(func_ptr, + enzyme_dupnoneed, &unused, &df_dxj, + enzyme_const, reinterpret_cast(&f), + enzyme_dup, &x, &dx + ); + + double * df_dxj_ptr = reinterpret_cast(&df_dxj); + for (int i = 0; i < m; i++) { + J_ptr[i * n + j] = df_dxj_ptr[i]; + } + + dx_ptr[j] = 0.0; + } + + return J; +} + +template < int i, typename function, typename T0, typename T1 > +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1) { + if constexpr (i == 0) { return jacfwd([&](T0 x){ return f(x, arg1); }, arg0); } + if constexpr (i == 1) { return jacfwd([&](T1 x){ return f(arg0, x); }, arg1); } +} + +//////////////////////////////////////////////////////////////////////////////// + +int main() { + + auto f = [=](double z, const tensor< double, 3, 3 > & du_dx) { return z * du_dx; }; + + double z = 3.0; + tensor du_dx = {{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}}; + + std::cout << "f(x, du_dx): " << f(z, du_dx) << std::endl; + std::cout << "expected: {{3., 6., 9.}, {6., 9., 3.}, {3., 1.5, 0.6}}" << std::endl; + std::cout << std::endl; + + auto df_darg0 = jacfwd<0>(f, z, du_dx); + std::cout << "df_dz: " << df_darg0 << std::endl; + std::cout << "expected: {{1., 2., 3.}, {2., 3., 1.}, {1., 0.5, 0.2}}" << std::endl; + std::cout << std::endl; + + auto df_darg1 = jacfwd<1>(f, z, du_dx); + std::cout << "df_d(du_dx): " << df_darg1 << std::endl; + std::cout << "expected: {{{{3., 0, 0}, {0, 0, 0}, {0, 0, 0}}, {{0, 3., 0}, {0, 0, 0}, {0, 0, 0}}, {{0, 0, 3.}, {0, 0, 0}, {0, 0, 0}}}, {{{0, 0, 0}, {3., 0, 0}, {0, 0, 0}}, {{0, 0, 0}, {0, 3., 0}, {0, 0, 0}}, {{0, 0, 0}, {0, 0, 3.}, {0, 0, 0}}}, {{{0, 0, 0}, {0, 0, 0}, {3., 0, 0}}, {{0, 0, 0}, {0, 0, 0}, {0, 3., 0}}, {{0, 0, 0}, {0, 0, 0}, {0, 0, 3.}}}}" << std::endl; + +} diff --git a/src/serac/numerics/functional/tests/enzyme_reproducer_rbr.cpp b/src/serac/numerics/functional/tests/enzyme_reproducer_rbr.cpp new file mode 100644 index 000000000..ac75d954e --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_reproducer_rbr.cpp @@ -0,0 +1,141 @@ +#include +#include + +extern int enzyme_active_return; + +//////////////////////////////////////////////////////////////////////////////// + +template < typename T, int m, int n > +struct tensor { + T & operator()(int i, int j) { return values[i][j]; } + const T & operator()(int i, int j) const { return values[i][j]; } + T values[m][n]; +}; + +template < int m, int n > +tensor operator*(double scale, const tensor & A) { + tensor scaled{}; + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + scaled(i,j) = scale * A(i,j); + } + } + return scaled; +} + +template < typename T, int m, int n > +std::ostream& operator<<(std::ostream & out, const tensor & A) { + out << "{"; + for (int i = 0; i < m; i++) { + out << "{"; + for (int j = 0; j < n; j++) { + out << A(i,j); + if (j != n - 1) { out << ", "; } + } + out << "}"; + if (i != m - 1) { out << ", "; } + } + out << "}"; + return out; +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace impl { + + constexpr int vsize(const double &) { return 1; } + + template < int m, int n > + constexpr int vsize(const tensor< double, m, n > &) { return m * n; } + + template < int m, int n, int p, int q > + constexpr int vsize(const tensor< tensor< double, p, q >, m, n > &) { return m * n * p * q; } + + template < typename T1, typename T2 > + struct outer_prod; + + template < int m, int n > + struct outer_prod< double, tensor< double, m, n > >{ + using type = tensor; + }; + + template < int m, int n > + struct outer_prod< tensor< double, m, n >, double >{ + using type = tensor; + }; + + template < int m, int n, int p, int q > + struct outer_prod< tensor< double, m, n >, tensor< double, p, q > >{ + using type = tensor, m, n>; + }; +} + +//////////////////////////////////////////////////////////////////////////////// + +template< typename T, typename ... arg_types > +auto wrapper(const T & f, const arg_types & ... args) { + return f(args...); +} + +template < typename function, typename input_type > +auto jacfwd(const function & f, const input_type & x) { + using output_type = decltype(f(x)); + using jac_type = typename impl::outer_prod::type; + void * func_ptr = reinterpret_cast(wrapper< function, input_type >); + + constexpr int m = impl::vsize(output_type{}); + jac_type J{}; + double * J_ptr = reinterpret_cast(&J); + + constexpr int n = impl::vsize(input_type{}); + input_type dx{}; + double * dx_ptr = reinterpret_cast(&dx); + + for (int j = 0; j < n; j++) { + dx_ptr[j] = 1.0; + + output_type df_dxj = __enzyme_fwddiff(func_ptr, + enzyme_const, reinterpret_cast(&f), + enzyme_dup, &x, &dx + ); + + double * df_dxj_ptr = reinterpret_cast(&df_dxj); + for (int i = 0; i < m; i++) { + J_ptr[i * n + j] = df_dxj_ptr[i]; + } + + dx_ptr[j] = 0.0; + } + + return J; +} + +template < int i, typename function, typename T0, typename T1 > +auto jacfwd(const function & f, const T0 & arg0, const T1 & arg1) { + if constexpr (i == 0) { return jacfwd([&](T0 x){ return f(x, arg1); }, arg0); } + if constexpr (i == 1) { return jacfwd([&](T1 x){ return f(arg0, x); }, arg1); } +} + +//////////////////////////////////////////////////////////////////////////////// + +int main() { + + auto f = [=](double z, const tensor< double, 3, 3 > & du_dx) { return z * du_dx; }; + + double z = 3.0; + tensor du_dx = {{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}}; + + std::cout << "f(x, du_dx): " << f(z, du_dx) << std::endl; + std::cout << "expected: {{3., 6., 9.}, {6., 9., 3.}, {3., 1.5, 0.6}}" << std::endl; + std::cout << std::endl; + + auto df_darg0 = jacfwd<0>(f, z, du_dx); + std::cout << "df_dz: " << df_darg0 << std::endl; + std::cout << "expected: {{1., 2., 3.}, {2., 3., 1.}, {1., 0.5, 0.2}}" << std::endl; + std::cout << std::endl; + + auto df_darg1 = jacfwd<1>(f, z, du_dx); + std::cout << "df_d(du_dx): " << df_darg1 << std::endl; + std::cout << "expected: {{{{3., 0, 0}, {0, 0, 0}, {0, 0, 0}}, {{0, 3., 0}, {0, 0, 0}, {0, 0, 0}}, {{0, 0, 3.}, {0, 0, 0}, {0, 0, 0}}}, {{{0, 0, 0}, {3., 0, 0}, {0, 0, 0}}, {{0, 0, 0}, {0, 3., 0}, {0, 0, 0}}, {{0, 0, 0}, {0, 0, 3.}, {0, 0, 0}}}, {{{0, 0, 0}, {0, 0, 0}, {3., 0, 0}}, {{0, 0, 0}, {0, 0, 0}, {0, 3., 0}}, {{0, 0, 0}, {0, 0, 0}, {0, 0, 3.}}}}" << std::endl; + +} diff --git a/src/serac/numerics/functional/tests/enzyme_test.cpp b/src/serac/numerics/functional/tests/enzyme_test.cpp new file mode 100644 index 000000000..94441ab7c --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_test.cpp @@ -0,0 +1,113 @@ +#include + +#include "serac/numerics/functional/dual.hpp" +#include "serac/numerics/functional/tuple.hpp" +#include "serac/numerics/functional/tensor.hpp" +#include "serac/numerics/functional/enzyme_wrapper.hpp" + +template < typename T > +struct undefined; + +using namespace serac; + +int main() { + + auto f = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return z * (du_dx + transpose(du_dx)) - outer(u, u); + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto dfdz = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return (du_dx + transpose(du_dx)); + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto dfdu = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto u = get<0>(displacement); + tensor output{}; + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k) = - (u(i) * (j == k) + u(j) * (i == k)); + } + } + } + return output; + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto dfddudx = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > &) { + tensor output{}; + for (int l = 0; l < 3; l++) { + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k,l) = z * ((i==k) * (j==l) + (j==k) * (i==l)); + } + } + } + } + return output; + }; + +//////////////////////////////////////////////////////////////////////////////// + + double z = 3.0; + auto displacement = tuple { + tensor{{1.0, 1.0, 1.0}}, + tensor{{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}} + }; + + auto df_dz = jacfwd<0>(f, z, displacement); + std::cout << "df_dz: " << df_dz << std::endl; + std::cout << "expected: " << dfdz(z, displacement) << std::endl; + std::cout << std::endl; + + auto df_ddisp = jacfwd<1>(f, z, displacement); + std::cout << "df_du: "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + std::cout << get<0>(df_ddisp(i,j)); + if (j != 2) { std::cout << ","; } + } + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dfdu(z, displacement) << std::endl; + std::cout << std::endl; + + std::cout << "df_d(du_dx): "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + std::cout << get<1>(df_ddisp(i,j)); + if (j != 2) { std::cout << ","; } + } + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dfddudx(z, displacement) << std::endl; + std::cout << std::endl; + + //tuple< double, vec3 > x; + //serac::tuple< + // serac::tuple, + // serac::tuple + //> + //auto x_dx = make_dual(x); + //undefined< decltype(get_gradient(x_dx)) > foo; + + //serac::tuple< + // serac::tuple>, + // serac::tuple, serac::tensor, 3> + //> + //using derivative_type = impl::nested< tuple< double, vec3 >, tuple< double, vec3 > >::type; + //undefined< derivative_type > foo; + +} diff --git a/src/serac/numerics/functional/tests/enzyme_test_tuples.cpp b/src/serac/numerics/functional/tests/enzyme_test_tuples.cpp new file mode 100644 index 000000000..49f9e942c --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_test_tuples.cpp @@ -0,0 +1,92 @@ +#include + +#include + +#include "serac/numerics/functional/tuple.hpp" +#include "serac/numerics/functional/tensor.hpp" +#include "serac/numerics/functional/enzyme_wrapper.hpp" + +using namespace serac; + +int main() { + + auto f = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return z * (du_dx + transpose(du_dx)) - outer(u, u); + }; + + auto dfdz = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return (du_dx + transpose(du_dx)); + }; + + auto dfdu = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto u = get<0>(displacement); + tensor output{}; + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k) = - (u(i) * (j == k) + u(j) * (i == k)); + } + } + } + return output; + }; + + auto dfddudx = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > &) { + tensor output{}; + for (int l = 0; l < 3; l++) { + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k,l) = z * ((i==k) * (j==l) + (j==k) * (i==l)); + } + } + } + } + return output; + }; + + double z = 3.0; + auto displacement = tuple { + tensor{{1.0, 1.0, 1.0}}, + tensor{{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}} + }; + + auto df_dz = jacfwd<0>(f, z, displacement); + std::cout << "df_dz: " << df_dz << std::endl; + std::cout << "expected: " << dfdz(z, displacement) << std::endl; + std::cout << std::endl; + + auto df_ddisp = jacfwd<1>(f, z, displacement); + std::cout << "df_du: "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + std::cout << "{"; + for (int j = 0; j < 3; j++) { + std::cout << get<0>(df_ddisp(i,j)); + if (j != 2) { std::cout << ","; } + } + std::cout << "}"; + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dfdu(z, displacement) << std::endl; + std::cout << std::endl; + + std::cout << "df_d(du_dx): "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + std::cout << "{"; + for (int j = 0; j < 3; j++) { + std::cout << get<1>(df_ddisp(i,j)); + if (j != 2) { std::cout << ","; } + } + std::cout << "}"; + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dfddudx(z, displacement) << std::endl; + std::cout << std::endl; + +} diff --git a/src/serac/numerics/functional/tests/enzyme_test_tuples_of_tuples.cpp b/src/serac/numerics/functional/tests/enzyme_test_tuples_of_tuples.cpp new file mode 100644 index 000000000..4222dda9b --- /dev/null +++ b/src/serac/numerics/functional/tests/enzyme_test_tuples_of_tuples.cpp @@ -0,0 +1,147 @@ +#include + +#include + +#include "serac/numerics/functional/tuple.hpp" +#include "serac/numerics/functional/tensor.hpp" +#include "serac/numerics/functional/enzyme_wrapper.hpp" + +using namespace serac; + +int main() { + + auto fg = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return tuple{dot(du_dx, z * u), z * (du_dx + transpose(du_dx)) - outer(u, u)}; + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto dfdz = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return dot(du_dx, u); + }; + + auto dgdz = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto [u, du_dx] = displacement; + return (du_dx + transpose(du_dx)); + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto dfdu = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + return get<1>(displacement) * z; + }; + + auto dgdu = [](double, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto u = get<0>(displacement); + tensor output{}; + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k) = - (u(i) * (j == k) + u(j) * (i == k)); + } + } + } + return output; + }; + +//////////////////////////////////////////////////////////////////////////////// + + auto dfddudx = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > & displacement) { + auto u = get<0>(displacement); + tensor output{}; + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k) = z * u[k] * (i == j); + } + } + } + return output; + }; + + auto dgddudx = [](double z, const tuple< tensor< double, 3 >, tensor< double, 3, 3 > > &) { + tensor output{}; + for (int l = 0; l < 3; l++) { + for (int k = 0; k < 3; k++) { + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + output(i,j,k,l) = z * ((i==k) * (j==l) + (j==k) * (i==l)); + } + } + } + } + return output; + }; + +//////////////////////////////////////////////////////////////////////////////// + + double z = 3.0; + auto displacement = tuple { + tensor{{1.0, 1.0, 1.0}}, + tensor{{{1.0, 2.0, 3.0}, {2.0, 3.0, 1.0}, {1.0, 0.5, 0.2}}} + }; + + auto [df_dz, dg_dz] = jacfwd<0>(fg, z, displacement); + std::cout << "df_dz: " << df_dz << std::endl; + std::cout << "expected: " << dfdz(z, displacement) << std::endl; + std::cout << std::endl; + + std::cout << "df_dz: " << dg_dz << std::endl; + std::cout << "expected: " << dgdz(z, displacement) << std::endl; + std::cout << std::endl; + + auto [df_ddisp, dg_ddisp] = jacfwd<1>(fg, z, displacement); + std::cout << "df_du: "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + std::cout << get<0>(df_ddisp)(i); + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dfdu(z, displacement) << std::endl; + std::cout << std::endl; + + std::cout << "df_d(du_dx): "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + std::cout << get<1>(df_ddisp)(i); + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dfddudx(z, displacement) << std::endl; + std::cout << std::endl; + + std::cout << "dg_du: "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + std::cout << "{"; + for (int j = 0; j < 3; j++) { + std::cout << get<0>(dg_ddisp)(i,j); + if (j != 2) { std::cout << ","; } + } + std::cout << "}"; + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dgdu(z, displacement) << std::endl; + std::cout << std::endl; + + + std::cout << "dg_d(du_dx): "; + std::cout << "{"; + for (int i = 0; i < 3; i++) { + std::cout << "{"; + for (int j = 0; j < 3; j++) { + std::cout << get<1>(dg_ddisp)(i,j); + if (j != 2) { std::cout << ","; } + } + std::cout << "}"; + if (i != 2) { std::cout << ","; } + } + std::cout << "}" << std::endl; + std::cout << "expected: " << dgddudx(z, displacement) << std::endl; + std::cout << std::endl; + +} diff --git a/src/serac/numerics/functional/tests/functional_basic_h1_scalar.cpp b/src/serac/numerics/functional/tests/functional_basic_h1_scalar.cpp index 8238748b5..6db8f20a2 100644 --- a/src/serac/numerics/functional/tests/functional_basic_h1_scalar.cpp +++ b/src/serac/numerics/functional/tests/functional_basic_h1_scalar.cpp @@ -55,11 +55,12 @@ void thermal_test_impl(std::unique_ptr& mesh) residual.AddDomainIntegral( Dimension{}, DependsOn<0>{}, - [=](double /*t*/, auto position, auto temperature) { + [=](double /*t*/, tuple< tensor, tensor > position, tuple< double, tensor > temperature) { auto [X, dX_dxi] = position; auto [u, du_dx] = temperature; - auto source = d00 * u + dot(d01, du_dx) - 0.0 * (100 * X[0] * X[1]); - auto flux = d10 * u + dot(d11, du_dx); + double source = d00 * u + dot(d01, du_dx) - 0.0 * (100 * X[0] * X[1]); + tensor flux = d10 * u + dot(d11, du_dx); + flux[0] += X[1]; return serac::tuple{source, flux}; }, *mesh); diff --git a/src/serac/physics/state/finite_element_vector.hpp b/src/serac/physics/state/finite_element_vector.hpp index f35d9c898..ac82fac50 100644 --- a/src/serac/physics/state/finite_element_vector.hpp +++ b/src/serac/physics/state/finite_element_vector.hpp @@ -18,7 +18,7 @@ #include "mfem.hpp" #include "serac/infrastructure/variant.hpp" -#include "serac/numerics/functional/functional.hpp" +#include "serac/numerics/functional/finite_element.hpp" namespace serac {