-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #367 from bluescarni/pr/fast_math_vec
Low-precision vector math support
- Loading branch information
Showing
29 changed files
with
2,060 additions
and
1,642 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
// Copyright 2020, 2021, 2022, 2023 Francesco Biscani ([email protected]), Dario Izzo ([email protected]) | ||
// | ||
// This file is part of the heyoka library. | ||
// | ||
// This Source Code Form is subject to the terms of the Mozilla | ||
// Public License v. 2.0. If a copy of the MPL was not distributed | ||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
#include <algorithm> | ||
#include <cmath> | ||
#include <ios> | ||
#include <iostream> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include <boost/math/constants/constants.hpp> | ||
#include <boost/program_options.hpp> | ||
|
||
#include <fmt/core.h> | ||
|
||
#include <spdlog/spdlog.h> | ||
#include <spdlog/stopwatch.h> | ||
|
||
#include <heyoka/expression.hpp> | ||
#include <heyoka/kw.hpp> | ||
#include <heyoka/llvm_state.hpp> | ||
#include <heyoka/logging.hpp> | ||
#include <heyoka/math/kepE.hpp> | ||
#include <stdexcept> | ||
|
||
using namespace heyoka; | ||
|
||
int main(int argc, char *argv[]) | ||
{ | ||
namespace po = boost::program_options; | ||
|
||
double ecc{}; | ||
unsigned seed{}; | ||
bool fast_math{}; | ||
|
||
po::options_description desc("Options"); | ||
|
||
desc.add_options()("help", "produce help message")("ecc", po::value<double>(&ecc)->default_value(0.1), | ||
"eccentricity")( | ||
"seed", po::value<unsigned>(&seed)->default_value(42u), | ||
"random seed")("fast-math", po::value<bool>(&fast_math)->default_value(true), "fast math mode"); | ||
|
||
po::variables_map vm; | ||
po::store(po::parse_command_line(argc, argv, desc), vm); | ||
po::notify(vm); | ||
|
||
if (vm.count("help") != 0u) { | ||
std::cout << desc << "\n"; | ||
return 0; | ||
} | ||
|
||
if (!std::isfinite(ecc) || ecc < 0 || ecc >= 1) { | ||
throw std::invalid_argument(fmt::format("Invalid eccentricity value: {}", ecc)); | ||
} | ||
|
||
constexpr auto N = 1'000'000ul; | ||
|
||
std::cout << std::boolalpha; | ||
std::cout << "Eccentricity: " << ecc << '\n'; | ||
std::cout << "fast_math : " << fast_math << '\n'; | ||
std::cout << "N : " << N << "\n\n"; | ||
|
||
// RNG setup. | ||
std::mt19937 rng(seed); | ||
std::uniform_real_distribution<double> Mdist(0, 2 * boost::math::constants::pi<double>()); | ||
|
||
// Data setup. | ||
std::vector<double> e_vec, M_vec, out_vec, out_vec_batch; | ||
e_vec.resize(N, ecc); | ||
M_vec.resize(N); | ||
out_vec.resize(N); | ||
out_vec_batch.resize(N); | ||
std::generate(M_vec.begin(), M_vec.end(), [&rng, &Mdist]() { return Mdist(rng); }); | ||
|
||
// cfunc setup. | ||
auto [e, M] = make_vars("e", "M"); | ||
|
||
llvm_state s{kw::fast_math = fast_math}; | ||
const auto batch_size = recommended_simd_size<double>(); | ||
add_cfunc<double>(s, "f_scalar", {kepE(e, M)}, kw::vars = {e, M}); | ||
add_cfunc<double>(s, "f_batch", {kepE(e, M)}, kw::vars = {e, M}, kw::batch_size = batch_size); | ||
s.compile(); | ||
|
||
auto *f_sc = reinterpret_cast<void (*)(double *, const double *, const double *, const double *)>( | ||
s.jit_lookup("f_scalar")); | ||
auto *f_ba | ||
= reinterpret_cast<void (*)(double *, const double *, const double *, const double *)>(s.jit_lookup("f_batch")); | ||
|
||
// Fetch the logger. | ||
create_logger(); | ||
set_logger_level_trace(); | ||
auto logger = spdlog::get("heyoka"); | ||
|
||
// Scalar runtime. | ||
spdlog::stopwatch sw; | ||
|
||
for (auto i = 0ull; i < N; ++i) { | ||
double ins[] = {e_vec[i], M_vec[i]}; | ||
f_sc(out_vec.data() + i, ins, nullptr, nullptr); | ||
} | ||
|
||
logger->trace("Scalar run took: {}s", sw); | ||
|
||
std::vector<double> batch_buffer(batch_size * 2ul); | ||
auto *batch_b_ptr = batch_buffer.data(); | ||
|
||
sw.reset(); | ||
|
||
for (auto i = 0ull; i < N - N % batch_size; i += batch_size) { | ||
std::copy(e_vec.data() + i, e_vec.data() + i + batch_size, batch_b_ptr); | ||
std::copy(M_vec.data() + i, M_vec.data() + i + batch_size, batch_b_ptr + batch_size); | ||
f_ba(out_vec_batch.data() + i, batch_b_ptr, nullptr, nullptr); | ||
} | ||
|
||
logger->trace("Batch run took: {}s", sw); | ||
|
||
std::cout.precision(16); | ||
for (auto i = 0u; i < 20u; ++i) { | ||
std::cout << out_vec[i] << " vs " << out_vec_batch[i] << '\n'; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
.. _tut_single_precision: | ||
|
||
Computations in single precision | ||
================================ | ||
|
||
.. versionadded:: 3.2.0 | ||
|
||
In previous tutorials we saw how heyoka, in addition to the standard | ||
`double precision <https://en.wikipedia.org/wiki/Double-precision_floating-point_format>`__, | ||
also supports computations in :ref:`extended precision <tut_extended_precision>` and | ||
:ref:`arbitrary precision <tut_arbitrary_precision>`. Starting with version 3.2.0, heyoka | ||
supports also computations in `single precision <https://en.wikipedia.org/wiki/Single-precision_floating-point_format>`__. | ||
|
||
Single-precision computations can lead to substantial performance benefits when high accuracy is not required. | ||
In particular, single-precision :ref:`batch mode <tut_batch_mode>` can use a SIMD width twice larger | ||
than double precision, leading to an increase by a factor of 2 of the computational throughput. | ||
In scalar computations, the use of single precision reduces by half the memory usage with respect to double precision, | ||
which can help alleviating performance issues in large ODE systems. This can be particularly noticeable in applications such as | ||
:external:ref:`neural ODEs <tut_neural_ode>`. | ||
|
||
In C++, single-precision values are usually represented via the standard floating-point type ``float``. | ||
Correspondingly, and similarly to what explained in the :ref:`extended precision <tut_extended_precision>` | ||
tutorial, single-precision computations are activated by passing the ``float`` template parameter to functions | ||
and classes in the heyoka API. | ||
|
||
A simple example | ||
---------------- | ||
|
||
In order to verify that heyoka indeed is able to work in single precision, we will be monitoring the evolution of the energy constant | ||
in a low-precision numerical integration of the simple pendulum. | ||
|
||
Let us begin as usual with the definition of the dynamical equations and the creation of the integrator object: | ||
|
||
.. literalinclude:: ../tutorial/single_precision.cpp | ||
:language: c++ | ||
:lines: 18-29 | ||
|
||
In order to activate single precision, we created an integrator object of type ``taylor_adaptive<float>`` - that is, | ||
we specified ``float``, instead of the usual ``double``, as the (only) template parameter for the ``taylor_adaptive`` class template. | ||
Note that we specified a single-precision initial state via the use of the ``f`` suffix for the numerical constants. | ||
Note also that, when operating in single precision, | ||
*all* numerical values encapsulated in an integrator are represented in single precision - this includes not only the state vector, | ||
but also the time coordinate, the tolerance, the Taylor coefficients, etc. Similarly to double-precision integrators, the default value | ||
of the tolerance is the machine epsilon of ``float``. | ||
|
||
Next, we define a small helper function that will allow us to monitor the evolution of the energy constant | ||
throughout the integration: | ||
|
||
.. literalinclude:: ../tutorial/single_precision.cpp | ||
:language: c++ | ||
:lines: 31-37 | ||
|
||
Before starting the integration, we compute and store the initial energy for later use: | ||
|
||
.. literalinclude:: ../tutorial/single_precision.cpp | ||
:language: c++ | ||
:lines: 39-40 | ||
|
||
We can now begin a step-by-step integration. At the end of each step, we will be computing | ||
and printing to screen the relative energy error: | ||
|
||
.. literalinclude:: ../tutorial/single_precision.cpp | ||
:language: c++ | ||
:lines: 42-49 | ||
|
||
.. code-block:: console | ||
Relative energy error: 1.48183e-07 | ||
Relative energy error: 5.29227e-08 | ||
Relative energy error: 6.08611e-08 | ||
Relative energy error: 1.79937e-07 | ||
Relative energy error: 1.74645e-07 | ||
Relative energy error: 2.24921e-07 | ||
Relative energy error: 2.4609e-07 | ||
Relative energy error: 1.1643e-07 | ||
Relative energy error: 1.79937e-07 | ||
Relative energy error: 1.40245e-07 | ||
Relative energy error: 2.54029e-07 | ||
Relative energy error: 1.84899e-07 | ||
Relative energy error: 1.83245e-07 | ||
Relative energy error: 1.56122e-07 | ||
Relative energy error: 2.22275e-07 | ||
Relative energy error: 1.61414e-07 | ||
Relative energy error: 2.11691e-07 | ||
Relative energy error: 2.88428e-07 | ||
Relative energy error: 2.93721e-07 | ||
Relative energy error: 1.82583e-07 | ||
The console output indeed confirms that energy is conserved at the level of the epsilon of the | ||
single-precision format (that is, :math:`\sim 10^{-7}`). | ||
|
||
Other classes and functions | ||
--------------------------- | ||
|
||
Besides the adaptive integrator, several other classes and functions in heyoka can be used in single precision. | ||
|
||
The :ref:`event classes <tut_events>`, for instance, can be constructed in single precision by passing ``float`` | ||
as the template parameter (instead of ``double``). Note that the precision of an event | ||
must match the precision of the integrator object in which the event is used, otherwise an error will be produced | ||
at compilation time. | ||
|
||
Full code listing | ||
----------------- | ||
|
||
.. literalinclude:: ../tutorial/single_precision.cpp | ||
:language: c++ | ||
:lines: 9- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.