diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index eb180d01..a8913ae7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -482,6 +482,24 @@ test:nvcc-package: needs: - build:nvcc-cmake-minimum +test:cpp-wrapper: + stage: test + extends: + - .rocm:cmake-minimum + - .rules:test + needs: + - build:rocm-cmake-minimum + script: + - cd $CI_PROJECT_DIR/build + - $SUDO_CMD dpkg -i rocrand*.deb + - cmake + -S $CI_PROJECT_DIR/test/cpp_wrapper/ + -B $CI_PROJECT_DIR/build_cpp_wrapper_test + - cmake --build $CI_PROJECT_DIR/build_cpp_wrapper_test + - cd $CI_PROJECT_DIR/build_cpp_wrapper_test + - ctest --output-on-failure + - $SUDO_CMD dpkg -r rocrand-benchmarks rocrand-clients rocrand-dev rocrand-tests rocrand + .test:install: stage: test extends: diff --git a/CHANGELOG.md b/CHANGELOG.md index 56172edb..fc8dd28e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,15 @@ Documentation for rocRAND is available at * C++ wrapper: * `lfsr113_engine` now also supports being constructed with a seed of type `unsigned long long`, not only `uint4`. * added optional order parameter to constructor of `mt19937_engine` +* Added the following functions for the `ROCRAND_RNG_PSEUDO_MTGP32` generator: + * `rocrand_normal2` + * `rocrand_normal_double2` + * `rocrand_log_normal2` + * `rocrand_log_normal_double2` +* Added `rocrand_create_generator_host_blocking` which dispatches without stream semantics. +* Added host-side generator for `ROCRAND_RNG_PSEUDO_MTGP32`. +* Added offset and skipahead functionality to LFSR113 generator. +* Added dynamic ordering for architecture `gfx1102`. ### Changes @@ -39,6 +48,8 @@ Documentation for rocRAND is available at * Building rocRAND should be faster on machines with multiple CPU cores as the library has been split to multiple compilation units. * C++ wrapper: the `min()` and `max()` member functions of the generators and distributions are now `static constexpr`. +* Rename and unify the existing ROCRAND_DETAIL_.*_BM_NOT_IN_STATE to ROCRAND_DETAIL_BM_NOT_IN_STATE +* Static & dynamic library: moved all internal symbols to namespaces to avoid potential symbol name collisions when linking. ### Deprecations @@ -47,12 +58,17 @@ Documentation for rocRAND is available at * `rocrand_device::threefry2x64_20_engine::threefry2x64_20_state` * `rocrand_device::threefry4x32_20_engine::threefry4x32_20_state` * `rocrand_device::threefry4x64_20_engine::threefry4x64_20_state` +* Deprecated internal header: src/rng/distribution/distributions.hpp +* Deprecated internal header: src/rng/device_engines.hpp ### Removals * Removed references to and workarounds for deprecated hcc. * Support for HIP-CPU +### Known issues +- SOBOL64 and SCRAMBLED_SOBOL64 generate poisson-distributed `unsigned long long int` numbers instead of `unsigned int`. This will be fixed in the next major release. + ## (Unreleased) rocRAND-3.0.0 for ROCm 6.0.0 ### Additions diff --git a/benchmark/tuning/benchmark_tuning.cpp b/benchmark/tuning/benchmark_tuning.cpp index e1ec817b..42c8fa0b 100644 --- a/benchmark/tuning/benchmark_tuning.cpp +++ b/benchmark/tuning/benchmark_tuning.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -51,32 +51,28 @@ int main(int argc, char** argv) add_common_benchmark_rocrand_info(); std::vector benchmarks; - benchmark_tuning::add_all_benchmarks_for_generator( - benchmarks, - config); - benchmark_tuning::add_all_benchmarks_for_generator( - benchmarks, - config); - benchmark_tuning::add_all_benchmarks_for_generator( - benchmarks, - config); - benchmark_tuning::add_all_benchmarks_for_generator( - benchmarks, - config); - benchmark_tuning::add_all_benchmarks_for_generator( + benchmark_tuning::add_all_benchmarks_for_generator< + benchmark_tuning::lfsr113_generator_template>(benchmarks, config); + benchmark_tuning::add_all_benchmarks_for_generator< + benchmark_tuning::mrg31k3p_generator_template>(benchmarks, config); + benchmark_tuning::add_all_benchmarks_for_generator< + benchmark_tuning::mrg32k3a_generator_template>(benchmarks, config); + benchmark_tuning::add_all_benchmarks_for_generator< + benchmark_tuning::mt19937_generator_template>(benchmarks, config); + benchmark_tuning::add_all_benchmarks_for_generator( benchmarks, config); benchmark_tuning::add_all_benchmarks_for_generator< - benchmark_tuning::rocrand_philox4x32_10_template>(benchmarks, config); + benchmark_tuning::philox4x32_10_generator_template>(benchmarks, config); benchmark_tuning::add_all_benchmarks_for_generator< - benchmark_tuning::rocrand_threefry2x32_20_template>(benchmarks, config); + benchmark_tuning::threefry2x32_20_generator_template>(benchmarks, config); benchmark_tuning::add_all_benchmarks_for_generator< - benchmark_tuning::rocrand_threefry2x64_20_template>(benchmarks, config); + benchmark_tuning::threefry2x64_20_generator_template>(benchmarks, config); benchmark_tuning::add_all_benchmarks_for_generator< - benchmark_tuning::rocrand_threefry4x32_20_template>(benchmarks, config); + benchmark_tuning::threefry4x32_20_generator_template>(benchmarks, config); benchmark_tuning::add_all_benchmarks_for_generator< - benchmark_tuning::rocrand_threefry4x64_20_template>(benchmarks, config); - benchmark_tuning::add_all_benchmarks_for_generator( + benchmark_tuning::threefry4x64_20_generator_template>(benchmarks, config); + benchmark_tuning::add_all_benchmarks_for_generator( benchmarks, config); diff --git a/benchmark/tuning/benchmark_tuning.hpp b/benchmark/tuning/benchmark_tuning.hpp index 41163ff5..271ebaed 100644 --- a/benchmark/tuning/benchmark_tuning.hpp +++ b/benchmark/tuning/benchmark_tuning.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -56,7 +56,7 @@ struct distribution_input template class GeneratorTemplate> using distribution_input_t = typename distribution_input::type; -using rocrand_host::detail::generator_config; +using rocrand_impl::host::generator_config; /// @brief Provides a way to opt out from benchmarking certain configs for certain generators and types /// @note See benchmarked_generators.hpp for specializations @@ -140,14 +140,15 @@ class generator_benchmark_factory else if constexpr(std::is_integral_v) { using uniform_distribution_t - = uniform_distribution>; + = rocrand_impl::host::uniform_distribution>; add_benchmarks_impl(); if constexpr(std::is_same_v) { // The poisson distribution is only supported for unsigned int. - using poisson_distribution_t - = rocrand_poisson_distribution; + using poisson_distribution_t = rocrand_impl::host::poisson_distribution< + rocrand_impl::host::DISCRETE_METHOD_ALIAS>; add_benchmarks_impl(); } } @@ -155,22 +156,23 @@ class generator_benchmark_factory { // float, double and half support these distributions only. using uniform_distribution_t - = uniform_distribution>; + = rocrand_impl::host::uniform_distribution>; add_benchmarks_impl(); constexpr rocrand_rng_type rng_type - = rocrand_host::detail::gen_template_type_v; + = rocrand_impl::host::gen_template_type_v; - using normal_distribution_t - = normal_distribution, - normal_distribution_max_input_width>; + using normal_distribution_t = rocrand_impl::host::normal_distribution< + T, + distribution_input_t, + rocrand_impl::host::normal_distribution_max_input_width>; add_benchmarks_impl(); - using log_normal_distribution_t - = log_normal_distribution, - log_normal_distribution_max_input_width>; + using log_normal_distribution_t = rocrand_impl::host::log_normal_distribution< + T, + distribution_input_t, + rocrand_impl::host::log_normal_distribution_max_input_width>; add_benchmarks_impl(); } } @@ -183,7 +185,7 @@ class generator_benchmark_factory // The elements of the arrays can be controlled with CMake cache variables // BENCHMARK_TUNING_THREAD_OPTIONS and BENCHMARK_TUNING_BLOCK_OPTIONS static constexpr inline auto s_param_combinations - = cpp_utils::numeric_combinations(thread_options, block_options); + = rocrand_impl::cpp_utils::numeric_combinations(thread_options, block_options); template static std::string get_benchmark_name() @@ -220,8 +222,7 @@ class generator_benchmark_factory if constexpr(grid_size < min_benchmarked_grid_size) return; - using ConfigProvider - = rocrand_host::detail::static_config_provider; + using ConfigProvider = rocrand_impl::host::static_config_provider; if constexpr(config_filter::is_enabled( ConfigProvider::static_config)) diff --git a/benchmark/tuning/benchmark_tuning_lfsr113.cpp b/benchmark/tuning/benchmark_tuning_lfsr113.cpp index cfd517c0..d9908377 100644 --- a/benchmark/tuning/benchmark_tuning_lfsr113.cpp +++ b/benchmark/tuning/benchmark_tuning_lfsr113.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_mrg31k3p.cpp b/benchmark/tuning/benchmark_tuning_mrg31k3p.cpp index 67a06361..0be750a3 100644 --- a/benchmark/tuning/benchmark_tuning_mrg31k3p.cpp +++ b/benchmark/tuning/benchmark_tuning_mrg31k3p.cpp @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_mrg32k3a.cpp b/benchmark/tuning/benchmark_tuning_mrg32k3a.cpp index 1258b80b..9d2c8f54 100644 --- a/benchmark/tuning/benchmark_tuning_mrg32k3a.cpp +++ b/benchmark/tuning/benchmark_tuning_mrg32k3a.cpp @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_mt19937.cpp b/benchmark/tuning/benchmark_tuning_mt19937.cpp index c014dd93..2e066da7 100644 --- a/benchmark/tuning/benchmark_tuning_mt19937.cpp +++ b/benchmark/tuning/benchmark_tuning_mt19937.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_mtgp32.cpp b/benchmark/tuning/benchmark_tuning_mtgp32.cpp index 1e4c7bf3..101a4b37 100644 --- a/benchmark/tuning/benchmark_tuning_mtgp32.cpp +++ b/benchmark/tuning/benchmark_tuning_mtgp32.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_philox.cpp b/benchmark/tuning/benchmark_tuning_philox.cpp index 0b07fae5..61eed079 100644 --- a/benchmark/tuning/benchmark_tuning_philox.cpp +++ b/benchmark/tuning/benchmark_tuning_philox.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_threefry2x32_20.cpp b/benchmark/tuning/benchmark_tuning_threefry2x32_20.cpp index b0af119a..f6125db6 100644 --- a/benchmark/tuning/benchmark_tuning_threefry2x32_20.cpp +++ b/benchmark/tuning/benchmark_tuning_threefry2x32_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_threefry2x64_20.cpp b/benchmark/tuning/benchmark_tuning_threefry2x64_20.cpp index dc890d3b..60a74dc2 100644 --- a/benchmark/tuning/benchmark_tuning_threefry2x64_20.cpp +++ b/benchmark/tuning/benchmark_tuning_threefry2x64_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_threefry4x32_20.cpp b/benchmark/tuning/benchmark_tuning_threefry4x32_20.cpp index a3d2bba3..4b4fe171 100644 --- a/benchmark/tuning/benchmark_tuning_threefry4x32_20.cpp +++ b/benchmark/tuning/benchmark_tuning_threefry4x32_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_threefry4x64_20.cpp b/benchmark/tuning/benchmark_tuning_threefry4x64_20.cpp index 57575258..3ab11d2b 100644 --- a/benchmark/tuning/benchmark_tuning_threefry4x64_20.cpp +++ b/benchmark/tuning/benchmark_tuning_threefry4x64_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmark_tuning_xorwow.cpp b/benchmark/tuning/benchmark_tuning_xorwow.cpp index a99a46f2..7be40f46 100644 --- a/benchmark/tuning/benchmark_tuning_xorwow.cpp +++ b/benchmark/tuning/benchmark_tuning_xorwow.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ namespace benchmark_tuning { -template void add_all_benchmarks_for_generator( +template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/benchmarked_generators.hpp b/benchmark/tuning/benchmarked_generators.hpp index db4d4194..16031a85 100644 --- a/benchmark/tuning/benchmarked_generators.hpp +++ b/benchmark/tuning/benchmarked_generators.hpp @@ -40,23 +40,31 @@ // mt19937 needs to be included, as access to threads_per_generator is needed #include "rng/mt19937.hpp" +namespace rocrand_impl::host +{ + template -class rocrand_lfsr113_template; +class lfsr113_generator_template; template -class rocrand_mrg_template; +class mrg_generator_template; -template -class rocrand_mtgp32_template; +template +class mtgp32_generator_template; template -class rocrand_philox4x32_10_template; +class philox4x32_10_generator_template; template -class rocrand_threefry_template; +class threefry_generator_template; template -class rocrand_xorwow_template; +class xorwow_generator_template; + +template +struct threefry_device_engine; + +} // namespace rocrand_impl::host // Further forward declarations namespace rocrand_device @@ -69,12 +77,6 @@ class threefry4x32_20_engine; class threefry4x64_20_engine; } // namespace rocrand_device -namespace rocrand_host::detail -{ -template -struct threefry_device_engine; -} // namespace rocrand_host::detail - namespace benchmark_tuning { @@ -83,96 +85,111 @@ namespace benchmark_tuning // both host and device systems template -using rocrand_lfsr113_template = ::rocrand_lfsr113_template; +using lfsr113_generator_template + = rocrand_impl::host::lfsr113_generator_template; template -using rocrand_mrg31k3p_template = :: - rocrand_mrg_template; +using mrg31k3p_generator_template + = rocrand_impl::host::mrg_generator_template; template -using rocrand_mrg32k3a_template = :: - rocrand_mrg_template; +using mrg32k3a_generator_template + = rocrand_impl::host::mrg_generator_template; template -using rocrand_mtgp32_template = ::rocrand_mtgp32_template; +using mtgp32_generator_template + = rocrand_impl::host::mtgp32_generator_template; template -using rocrand_mt19937_template = ::rocrand_mt19937_template; +using mt19937_generator_template = rocrand_impl::host::mt19937_generator_template; template -using rocrand_philox4x32_10_template - = ::rocrand_philox4x32_10_template; +using philox4x32_10_generator_template + = rocrand_impl::host::philox4x32_10_generator_template; template -using rocrand_threefry2x32_20_template = ::rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, +using threefry2x32_20_generator_template = rocrand_impl::host::threefry_generator_template< + rocrand_impl::system::device_system, + rocrand_impl::host::threefry_device_engine, ConfigProvider>; template -using rocrand_threefry2x64_20_template = ::rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, +using threefry2x64_20_generator_template = rocrand_impl::host::threefry_generator_template< + rocrand_impl::system::device_system, + rocrand_impl::host::threefry_device_engine, ConfigProvider>; template -using rocrand_threefry4x32_20_template = ::rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, +using threefry4x32_20_generator_template = rocrand_impl::host::threefry_generator_template< + rocrand_impl::system::device_system, + rocrand_impl::host::threefry_device_engine, ConfigProvider>; template -using rocrand_threefry4x64_20_template = ::rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, +using threefry4x64_20_generator_template = rocrand_impl::host::threefry_generator_template< + rocrand_impl::system::device_system, + rocrand_impl::host::threefry_device_engine, ConfigProvider>; template -using rocrand_xorwow_template = ::rocrand_xorwow_template; +using xorwow_generator_template + = rocrand_impl::host::xorwow_generator_template; template<> -struct output_type_supported : public std::false_type +struct output_type_supported + : public std::false_type {}; template<> -struct output_type_supported : public std::false_type +struct output_type_supported + : public std::false_type {}; template<> -struct output_type_supported : public std::false_type +struct output_type_supported + : public std::false_type {}; template<> -struct output_type_supported : public std::false_type +struct output_type_supported : public std::false_type {}; template<> -struct output_type_supported : public std::false_type +struct output_type_supported + : public std::false_type {}; template<> -struct output_type_supported +struct output_type_supported : public std::false_type {}; template<> -struct output_type_supported +struct output_type_supported : public std::false_type {}; template<> -struct output_type_supported +struct output_type_supported : public std::false_type {}; template<> -struct output_type_supported : public std::false_type +struct output_type_supported : public std::false_type {}; template -struct config_filter +struct config_filter { - static constexpr bool is_enabled(rocrand_host::detail::generator_config config) + static constexpr bool is_enabled(rocrand_impl::host::generator_config config) { // The current implementation of MTGP32 requires a fixed block size, // and the grid size is also limited. @@ -181,58 +198,59 @@ struct config_filter }; template -struct config_filter +struct config_filter { - static constexpr bool is_enabled(rocrand_host::detail::generator_config config) + static constexpr bool is_enabled(rocrand_impl::host::generator_config config) { - return (config.blocks * config.threads / rocrand_mt19937::threads_per_generator) + return (config.blocks * config.threads + / rocrand_impl::host::mt19937_octo_engine::threads_per_generator) <= mt19937_jumps_radix * mt19937_jumps_radix; } }; template<> -struct distribution_input +struct distribution_input { using type = unsigned long long; }; template<> -struct distribution_input +struct distribution_input { using type = unsigned long long; }; -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); -extern template void add_all_benchmarks_for_generator( +extern template void add_all_benchmarks_for_generator( std::vector& benchmarks, const benchmark_config& config); } // namespace benchmark_tuning diff --git a/benchmark/tuning/distribution_traits.hpp b/benchmark/tuning/distribution_traits.hpp index 7f248628..87baebcb 100644 --- a/benchmark/tuning/distribution_traits.hpp +++ b/benchmark/tuning/distribution_traits.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -110,7 +110,7 @@ struct distribution_name }; template -struct distribution_name> +struct distribution_name> { std::string operator()() { @@ -119,7 +119,7 @@ struct distribution_name> }; template -struct distribution_name> +struct distribution_name> { std::string operator()() { @@ -128,7 +128,7 @@ struct distribution_name> }; template -struct distribution_name> +struct distribution_name> { std::string operator()() { @@ -137,7 +137,8 @@ struct distribution_name> }; template<> -struct distribution_name> +struct distribution_name< + rocrand_impl::host::poisson_distribution> { std::string operator()() { @@ -154,42 +155,44 @@ struct default_distribution }; template -struct default_distribution> +struct default_distribution> { auto operator()(const benchmark_config& /*config*/) { - return uniform_distribution{}; + return rocrand_impl::host::uniform_distribution{}; } }; template -struct default_distribution> +struct default_distribution> { auto operator()(const benchmark_config& /*config*/) { const T mean = 0; const T stddev = 1; - return normal_distribution(mean, stddev); + return rocrand_impl::host::normal_distribution(mean, stddev); } }; template -struct default_distribution> +struct default_distribution> { auto operator()(const benchmark_config& /*config*/) { const T mean = 0; const T stddev = 1; - return log_normal_distribution(mean, stddev); + return rocrand_impl::host::log_normal_distribution(mean, stddev); } }; template<> -struct default_distribution> +struct default_distribution< + rocrand_impl::host::poisson_distribution> { auto operator()(const benchmark_config& config) { - return rocrand_poisson_distribution(config.lambda); + return rocrand_impl::host::poisson_distribution( + config.lambda); } }; diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 661f9460..ade28385 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -893,7 +893,7 @@ EXCLUDE_PATTERNS = # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = rocrand_device::* \ - rocrand_host::* + rocrand_impl::* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include diff --git a/docs/programmers-guide.rst b/docs/programmers-guide.rst index 0e8bcda4..035d93ec 100644 --- a/docs/programmers-guide.rst +++ b/docs/programmers-guide.rst @@ -43,7 +43,7 @@ rocRAND generators can be configured to change how results are ordered in global `ROCRAND_ORDERING_PSEUDO_DEFAULT` and `ROCRAND_ORDERING_QUASI_DEFAULT` are the default ordering for pseudo- and quasi-random number generators respectively. `ROCRAND_ORDERING_PSEUDO_DEFAULT` is currently the same as `ROCRAND_ORDERING_PSEUDO_BEST` and `ROCRAND_ORDERING_PSEUDO_LEGACY`. -`ROCRAND_ORDERING_PSEUDO_DYNAMIC` indicates that rocRAND may change the output ordering such that the best performance is obtained for a particular generator on a particular GPU. Using this ordering, the generated sequences can vary between different GPU models and rocRAND versions. More information about generating such configurations can be found at :doc:`dynamic_ordering_configuration`. +`ROCRAND_ORDERING_PSEUDO_DYNAMIC` indicates that rocRAND may change the output ordering such that the best performance is obtained for a particular generator on a particular GPU. Using this ordering, the generated sequences can vary between different GPU models and rocRAND versions. More information about generating such configurations can be found at :doc:`dynamic_ordering_configuration`. `ROCRAND_ORDERING_PSEUDO_DYNAMIC` is not supported for generators created with `rocrand_create_generator_host`. `ROCRAND_ORDERING_PSEUDO_LEGACY` indicates that rocRAND should generate values in a way that is backward compatible. When it is set, rocRAND generates exactly the same sequences across releases. diff --git a/library/include/rocrand/rocrand.h b/library/include/rocrand/rocrand.h index 12f84a89..0f159b13 100644 --- a/library/include/rocrand/rocrand.h +++ b/library/include/rocrand/rocrand.h @@ -164,11 +164,11 @@ rocrand_status ROCRANDAPI rocrand_create_generator(rocrand_generator * generator, rocrand_rng_type rng_type); /** - * \brief Creates a new random number generator. + * \brief Creates a new host random number generator. * * Creates a new pseudo random number generator of type \p rng_type * and returns it in \p generator. This generator is executed on the host rather than - * on a device. + * on a device, and it is enqueued on the stream associated with the generator. * * Values for \p rng_type are: * - ROCRAND_RNG_PSEUDO_XORWOW @@ -199,6 +199,14 @@ rocrand_create_generator(rocrand_generator * generator, rocrand_rng_type rng_typ rocrand_status ROCRANDAPI rocrand_create_generator_host(rocrand_generator* generator, rocrand_rng_type rng_type); +/** + * \brief Creates a new host random number generator, similar to `rocrand_create_generator_host`. + * The exception is that, instead of enqueuing the host function in the stream, + * execution happens synchronously with respect to the calling thread and the stream is ignored. + */ +rocrand_status ROCRANDAPI rocrand_create_generator_host_blocking(rocrand_generator* generator, + rocrand_rng_type rng_type); + /** * \brief Destroys random number generator. * diff --git a/library/include/rocrand/rocrand_common.h b/library/include/rocrand/rocrand_common.h index d7bf0e87..2f16c6e4 100644 --- a/library/include/rocrand/rocrand_common.h +++ b/library/include/rocrand/rocrand_common.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -35,14 +35,12 @@ #define ROCRAND_SQRT2 (1.4142135f) #define ROCRAND_SQRT2_DOUBLE (1.4142135623730951) +#include + #include #define ROCRAND_KERNEL __global__ static -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #if __HIP_DEVICE_COMPILE__ \ && (defined(__HIP_PLATFORM_AMD__) \ || (defined(__HIP_PLATFORM_NVCC__) && (__CUDA_ARCH__ >= 530))) @@ -115,8 +113,8 @@ namespace detail { #endif #endif -FQUALIFIERS -unsigned long long mad_u64_u32(const unsigned int x, const unsigned int y, const unsigned long long z) +__forceinline__ __device__ __host__ unsigned long long + mad_u64_u32(const unsigned int x, const unsigned int y, const unsigned long long z) { #if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_DEVICE_COMPILE__) \ && defined(ROCRAND_ENABLE_INLINE_ASM) @@ -158,41 +156,35 @@ unsigned long long mad_u64_u32(const unsigned int x, const unsigned int y, const template struct engine_boxmuller_helper { - static FQUALIFIERS - bool has_float(const Engine * engine) + static __forceinline__ __device__ __host__ bool has_float(const Engine* engine) { return engine->m_state.boxmuller_float_state != 0; } - static FQUALIFIERS - float get_float(Engine * engine) + static __forceinline__ __device__ __host__ float get_float(Engine* engine) { engine->m_state.boxmuller_float_state = 0; return engine->m_state.boxmuller_float; } - static FQUALIFIERS - void save_float(Engine * engine, float f) + static __forceinline__ __device__ __host__ void save_float(Engine* engine, float f) { engine->m_state.boxmuller_float_state = 1; engine->m_state.boxmuller_float = f; } - static FQUALIFIERS - bool has_double(const Engine * engine) + static __forceinline__ __device__ __host__ bool has_double(const Engine* engine) { return engine->m_state.boxmuller_double_state != 0; } - static FQUALIFIERS - float get_double(Engine * engine) + static __forceinline__ __device__ __host__ float get_double(Engine* engine) { engine->m_state.boxmuller_double_state = 0; return engine->m_state.boxmuller_double; } - static FQUALIFIERS - void save_double(Engine * engine, double d) + static __forceinline__ __device__ __host__ void save_double(Engine* engine, double d) { engine->m_state.boxmuller_double_state = 1; engine->m_state.boxmuller_double = d; @@ -200,17 +192,18 @@ struct engine_boxmuller_helper }; template -FQUALIFIERS void split_ull(T& lo, T& hi, unsigned long long int val); +__forceinline__ __device__ __host__ void split_ull(T& lo, T& hi, unsigned long long int val); template<> -FQUALIFIERS void split_ull(unsigned int& lo, unsigned int& hi, unsigned long long int val) +__forceinline__ __device__ __host__ void + split_ull(unsigned int& lo, unsigned int& hi, unsigned long long int val) { lo = val & 0xFFFFFFFF; hi = (val >> 32) & 0xFFFFFFFF; } template<> -FQUALIFIERS void +__forceinline__ __device__ __host__ void split_ull(unsigned long long int& lo, unsigned long long int& hi, unsigned long long int val) { lo = val; diff --git a/library/include/rocrand/rocrand_discrete.h b/library/include/rocrand/rocrand_discrete.h index 65f8a288..1dc591e0 100644 --- a/library/include/rocrand/rocrand_discrete.h +++ b/library/include/rocrand/rocrand_discrete.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_DISCRETE_H_ #define ROCRAND_DISCRETE_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ __host__ -#endif // FQUALIFIERS - #include #include "rocrand/rocrand_lfsr113.h" @@ -43,8 +39,6 @@ #include "rocrand/rocrand_xorwow.h" #include "rocrand/rocrand_discrete_types.h" -#include "rocrand/rocrand_normal.h" -#include "rocrand/rocrand_uniform.h" // Alias method // @@ -57,11 +51,12 @@ namespace rocrand_device { namespace detail { -FQUALIFIERS unsigned int discrete_alias(const double x, - const unsigned int size, - const unsigned int offset, - const unsigned int* __restrict__ alias, - const double* __restrict__ probability) +__forceinline__ __device__ __host__ unsigned int + discrete_alias(const double x, + const unsigned int size, + const unsigned int offset, + const unsigned int* __restrict__ alias, + const double* __restrict__ probability) { // Calculate value using Alias table @@ -73,13 +68,14 @@ FQUALIFIERS unsigned int discrete_alias(const double x, return offset + (y < probability[i] ? i : alias[i]); } -FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_alias(const double x, const rocrand_discrete_distribution_st& dis) { return discrete_alias(x, dis.size, dis.offset, dis.alias, dis.probability); } -FQUALIFIERS -unsigned int discrete_alias(const unsigned int r, const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_alias(const unsigned int r, const rocrand_discrete_distribution_st& dis) { constexpr double inv_double_32 = ROCRAND_2POW32_INV_DOUBLE; const double x = r * inv_double_32; @@ -87,26 +83,26 @@ unsigned int discrete_alias(const unsigned int r, const rocrand_discrete_distrib } // To prevent ambiguity compile error when compiler is facing the type "unsigned long"!!! -FQUALIFIERS unsigned int discrete_alias(const unsigned long r, - const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_alias(const unsigned long r, const rocrand_discrete_distribution_st& dis) { constexpr double inv_double_32 = ROCRAND_2POW32_INV_DOUBLE; const double x = r * inv_double_32; return discrete_alias(x, dis); } -FQUALIFIERS unsigned int discrete_alias(const unsigned long long int r, - const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_alias(const unsigned long long int r, const rocrand_discrete_distribution_st& dis) { constexpr double inv_double_64 = ROCRAND_2POW64_INV_DOUBLE; const double x = r * inv_double_64; return discrete_alias(x, dis); } -FQUALIFIERS unsigned int discrete_cdf(const double x, - const unsigned int size, - const unsigned int offset, - const double* __restrict__ cdf) +__forceinline__ __device__ __host__ unsigned int discrete_cdf(const double x, + const unsigned int size, + const unsigned int offset, + const double* __restrict__ cdf) { // Calculate value using binary search in CDF @@ -130,13 +126,14 @@ FQUALIFIERS unsigned int discrete_cdf(const double x, return offset + min; } -FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis) { return discrete_cdf(x, dis.size, dis.offset, dis.cdf); } -FQUALIFIERS -unsigned int discrete_cdf(const unsigned int r, const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_cdf(const unsigned int r, const rocrand_discrete_distribution_st& dis) { constexpr double inv_double_32 = ROCRAND_2POW32_INV_DOUBLE; const double x = r * inv_double_32; @@ -144,16 +141,16 @@ unsigned int discrete_cdf(const unsigned int r, const rocrand_discrete_distribut } // To prevent ambiguity compile error when compiler is facing the type "unsigned long"!!! -FQUALIFIERS unsigned int discrete_cdf(const unsigned long r, - const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_cdf(const unsigned long r, const rocrand_discrete_distribution_st& dis) { constexpr double inv_double_32 = ROCRAND_2POW32_INV_DOUBLE; const double x = r * inv_double_32; return discrete_cdf(x, dis); } -FQUALIFIERS unsigned int discrete_cdf(const unsigned long long int r, - const rocrand_discrete_distribution_st& dis) +__forceinline__ __device__ __host__ unsigned int + discrete_cdf(const unsigned long long int r, const rocrand_discrete_distribution_st& dis) { constexpr double inv_double_64 = ROCRAND_2POW64_INV_DOUBLE; const double x = r * inv_double_64; @@ -180,8 +177,9 @@ FQUALIFIERS unsigned int discrete_cdf(const unsigned long long int r, * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_philox4x32_10 * state, const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_philox4x32_10* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_alias(rocrand(state), *discrete_distribution); } @@ -198,8 +196,8 @@ unsigned int rocrand_discrete(rocrand_state_philox4x32_10 * state, const rocrand * * \return Four unsigned int values distributed according to \p discrete_distribution as \p uint4 */ -FQUALIFIERS -uint4 rocrand_discrete4(rocrand_state_philox4x32_10 * state, const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ uint4 rocrand_discrete4( + rocrand_state_philox4x32_10* state, const rocrand_discrete_distribution discrete_distribution) { const uint4 u4 = rocrand4(state); return uint4 { @@ -222,8 +220,9 @@ uint4 rocrand_discrete4(rocrand_state_philox4x32_10 * state, const rocrand_discr * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_mrg31k3p* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_mrg31k3p* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_alias(rocrand(state), *discrete_distribution); } @@ -240,8 +239,9 @@ FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_mrg31k3p* st * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_mrg32k3a * state, const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_mrg32k3a* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_alias(rocrand(state), *discrete_distribution); } @@ -258,8 +258,9 @@ unsigned int rocrand_discrete(rocrand_state_mrg32k3a * state, const rocrand_disc * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_xorwow * state, const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_xorwow* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_alias(rocrand(state), *discrete_distribution); } @@ -276,8 +277,9 @@ unsigned int rocrand_discrete(rocrand_state_xorwow * state, const rocrand_discre * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_mtgp32 * state, const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ unsigned int + rocrand_discrete(rocrand_state_mtgp32* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -294,8 +296,9 @@ unsigned int rocrand_discrete(rocrand_state_mtgp32 * state, const rocrand_discre * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_sobol32 * state, const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_sobol32* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -312,9 +315,9 @@ unsigned int rocrand_discrete(rocrand_state_sobol32 * state, const rocrand_discr * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_scrambled_sobol32* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_scrambled_sobol32* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -331,8 +334,9 @@ unsigned int rocrand_discrete(rocrand_state_scrambled_sobol32* state, * * \return unsigned long long int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_sobol64* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_sobol64* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -349,8 +353,9 @@ FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_sobol64* st * * \return unsigned long long int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_scrambled_sobol64* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_scrambled_sobol64* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -367,9 +372,9 @@ FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_scrambled_sobol64* st * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS -unsigned int rocrand_discrete(rocrand_state_lfsr113* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_lfsr113* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -386,8 +391,9 @@ unsigned int rocrand_discrete(rocrand_state_lfsr113* state, * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry2x32_20* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_threefry2x32_20* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -404,8 +410,9 @@ FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry2x32_20* st * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry2x64_20* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_threefry2x64_20* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -422,8 +429,9 @@ FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry2x64_20* st * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry4x32_20* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_threefry4x32_20* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } @@ -440,8 +448,9 @@ FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry4x32_20* st * * \return unsigned int value distributed according to \p discrete_distribution */ -FQUALIFIERS unsigned int rocrand_discrete(rocrand_state_threefry4x64_20* state, - const rocrand_discrete_distribution discrete_distribution) +__forceinline__ __device__ __host__ unsigned int + rocrand_discrete(rocrand_state_threefry4x64_20* state, + const rocrand_discrete_distribution discrete_distribution) { return rocrand_device::detail::discrete_cdf(rocrand(state), *discrete_distribution); } diff --git a/library/include/rocrand/rocrand_kernel.h b/library/include/rocrand/rocrand_kernel.h index 7b3b2441..fa0ec8de 100644 --- a/library/include/rocrand/rocrand_kernel.h +++ b/library/include/rocrand/rocrand_kernel.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_KERNEL_H_ #define ROCRAND_KERNEL_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_common.h" #include "rocrand/rocrand_lfsr113.h" #include "rocrand/rocrand_mrg31k3p.h" diff --git a/library/include/rocrand/rocrand_lfsr113.h b/library/include/rocrand/rocrand_lfsr113.h index e0717ecc..62844a69 100644 --- a/library/include/rocrand/rocrand_lfsr113.h +++ b/library/include/rocrand/rocrand_lfsr113.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,11 +21,8 @@ #ifndef ROCRAND_LFSR113_H_ #define ROCRAND_LFSR113_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_common.h" +#include "rocrand/rocrand_lfsr113_precomputed.h" /** \rocrand_internal \addtogroup rocranddevice * @@ -47,6 +44,30 @@ namespace rocrand_device { +namespace detail +{ + +__forceinline__ __device__ __host__ void mul_mat_vec_inplace(const unsigned int* m, uint4* z) +{ + unsigned int v[4] = {z->x, z->y, z->z, z->w}; + unsigned int r[LFSR113_N] = {0}; + for(int ij = 0; ij < LFSR113_N * LFSR113_M; ij++) + { + const int i = ij / LFSR113_M; + const int j = ij % LFSR113_M; + const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0; + for(int k = 0; k < LFSR113_N; k++) + { + r[k] ^= b & m[i * LFSR113_M * LFSR113_N + j * LFSR113_N + k]; + } + } + // Copy result into z + z->x = r[0]; + z->y = r[1]; + z->z = r[2]; + z->w = r[3]; +} +} // end namespace detail class lfsr113_engine { @@ -58,58 +79,71 @@ class lfsr113_engine }; /// Initializes the internal state of the PRNG using - /// seed value \p seed, goes to \p subsequence -th subsequence + /// seed value \p seed, goes to \p subsequence -th subsequence, + /// and skips \p offset random numbers. /// /// A subsequence is 2^55 numbers long. - FQUALIFIERS - lfsr113_engine(const uint4 seed = {ROCRAND_LFSR113_DEFAULT_SEED_X, - ROCRAND_LFSR113_DEFAULT_SEED_Y, - ROCRAND_LFSR113_DEFAULT_SEED_Z, - ROCRAND_LFSR113_DEFAULT_SEED_W}, - const unsigned int subsequence = 0) + __forceinline__ __device__ __host__ lfsr113_engine(const uint4 seed + = {ROCRAND_LFSR113_DEFAULT_SEED_X, + ROCRAND_LFSR113_DEFAULT_SEED_Y, + ROCRAND_LFSR113_DEFAULT_SEED_Z, + ROCRAND_LFSR113_DEFAULT_SEED_W}, + const unsigned int subsequence = 0, + const unsigned long long offset = 0) { - this->seed(seed, subsequence); + this->seed(seed, subsequence, offset); } /// Reinitializes the internal state of the PRNG using new - /// seed value \p seed_value, skips \p subsequence subsequences. + /// seed value \p seed_value, skips \p subsequence subsequences + /// and skips \p offset random numbers. /// /// A subsequence is 2^55 numbers long. - FQUALIFIERS - void seed(uint4 seed_value, const unsigned long long subsequence) + __forceinline__ __device__ __host__ void seed(uint4 seed_value, + const unsigned long long subsequence, + const unsigned long long offset = 0) { m_state.subsequence = seed_value; reset_start_subsequence(); discard_subsequence(subsequence); + discard(offset); } /// Advances the internal state to skip one number. - FQUALIFIERS - void discard() + __forceinline__ __device__ __host__ void discard() { discard_state(); } + /// Advances the internal state to skip \p offset numbers. + __forceinline__ __device__ __host__ void discard(unsigned long long offset) + { +#ifdef __HIP_DEVICE_COMPILE__ + jump(offset, d_lfsr113_jump_matrices); +#else + jump(offset, h_lfsr113_jump_matrices); +#endif + } + /// Advances the internal state to skip \p subsequence subsequences. /// A subsequence is 2^55 numbers long. - FQUALIFIERS - void discard_subsequence(unsigned int subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned int subsequence) { - for(unsigned int i = 0; i < subsequence; i++) - { - reset_next_subsequence(); - } +// Discard n * 2^55 samples +#ifdef __HIP_DEVICE_COMPILE__ + jump(subsequence, d_lfsr113_sequence_jump_matrices); +#else + jump(subsequence, h_lfsr113_sequence_jump_matrices); +#endif } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return next(); } - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { unsigned int b; @@ -130,8 +164,7 @@ class lfsr113_engine protected: /// Resets the state to the start of the current subsequence. - FQUALIFIERS - void reset_start_subsequence() + __forceinline__ __device__ __host__ void reset_start_subsequence() { m_state.z.x = m_state.subsequence.x; m_state.z.y = m_state.subsequence.y; @@ -139,70 +172,40 @@ class lfsr113_engine m_state.z.w = m_state.subsequence.w; } - /// Advances the subsequence by one and sets the state to the start of that subsequence. - FQUALIFIERS - void reset_next_subsequence() + // Advances the internal state to the next state. + __forceinline__ __device__ __host__ void discard_state() { - /* The following operations make the jump ahead with - 2 ^ 55 iterations for every component of the generator. - The internal state after the jump, however, is slightly different - from 2 ^ 55 iterations since it ignores the state in - which are found the first bits of each components, - since they are ignored in the recurrence.The state becomes - identical to what one would with normal iterations - after a call nextValue().*/ - int z, b; - - z = m_state.subsequence.x & 0xFFFFFFFE; - b = (z << 6) ^ z; - - z = (z) ^ (z << 3) ^ (z << 4) ^ (z << 6) ^ (z << 7) ^ (z << 8) ^ (z << 10) ^ (z << 11) - ^ (z << 13) ^ (z << 14) ^ (z << 16) ^ (z << 17) ^ (z << 18) ^ (z << 22) ^ (z << 24) - ^ (z << 25) ^ (z << 26) ^ (z << 28) ^ (z << 30); - - z ^= ((b >> 1) & 0x7FFFFFFF) ^ ((b >> 3) & 0x1FFFFFFF) ^ ((b >> 5) & 0x07FFFFFF) - ^ ((b >> 6) & 0x03FFFFFF) ^ ((b >> 7) & 0x01FFFFFF) ^ ((b >> 9) & 0x007FFFFF) - ^ ((b >> 13) & 0x0007FFFF) ^ ((b >> 14) & 0x0003FFFF) ^ ((b >> 15) & 0x0001FFFF) - ^ ((b >> 17) & 0x00007FFF) ^ ((b >> 18) & 0x00003FFF) ^ ((b >> 20) & 0x00000FFF) - ^ ((b >> 21) & 0x000007FF) ^ ((b >> 23) & 0x000001FF) ^ ((b >> 24) & 0x000000FF) - ^ ((b >> 25) & 0x0000007F) ^ ((b >> 26) & 0x0000003F) ^ ((b >> 27) & 0x0000001F) - ^ ((b >> 30) & 0x00000003); - m_state.subsequence.x = z; - - z = m_state.subsequence.y & 0xFFFFFFF8; - b = z ^ (z << 1); - b ^= (b << 2); - b ^= (b << 4); - b ^= (b << 8); - - b <<= 8; - b ^= (z << 22) ^ (z << 25) ^ (z << 27); - if((z & 0x80000000) != 0) - b ^= 0xABFFF000; - if((z & 0x40000000) != 0) - b ^= 0x55FFF800; - z = b ^ ((z >> 7) & 0x01FFFFFF) ^ ((z >> 20) & 0x00000FFF) ^ ((z >> 21) & 0x000007FF); - m_state.subsequence.y = z; - - z = m_state.subsequence.z & 0xFFFFFFF0; - b = (z << 13) ^ z; - z = ((b >> 3) & 0x1FFFFFFF) ^ ((b >> 17) & 0x00007FFF) ^ (z << 10) ^ (z << 11) ^ (z << 25); - m_state.subsequence.z = z; - - z = m_state.subsequence.w & 0xFFFFFF80; - b = (z << 3) ^ z; - z = (z << 14) ^ (z << 16) ^ (z << 20) ^ ((b >> 5) & 0x07FFFFFF) ^ ((b >> 9) & 0x007FFFFF) - ^ ((b >> 11) & 0x001FFFFF); - m_state.subsequence.w = z; - - reset_start_subsequence(); + this->next(); } - // Advances the internal state to the next state. - FQUALIFIERS - void discard_state() + __forceinline__ __device__ __host__ void + jump(unsigned long long v, + const unsigned int (&jump_matrices)[LFSR113_JUMP_MATRICES][LFSR113_SIZE]) { - this->next(); + // x~(n + v) = (A^v mod m)x~n mod m + // The matrix (A^v mod m) can be precomputed for selected values of v. + // + // For LFSR113_JUMP_LOG2 = 2 + // lfsr113_jump_matrices contains precomputed matrices: + // A^1, A^4, A^16... + // + // For LFSR113_JUMP_LOG2 = 2 and LFSR113_SEQUENCE_JUMP_LOG2 = 55 + // lfsr113_sequence_jump_matrices contains precomputed matrices: + // A^(1 * 2^55), A^(4 * 2^55), A^(16 * 2^55)... + // + // Intermediate powers can be calculated as multiplication of the powers above. + + unsigned int mi = 0; + while(v > 0) + { + const unsigned int is = static_cast(v) & ((1 << LFSR113_JUMP_LOG2) - 1); + for(unsigned int i = 0; i < is; i++) + { + detail::mul_mat_vec_inplace(jump_matrices[mi], &m_state.z); + } + mi++; + v >>= LFSR113_JUMP_LOG2; + } } protected: @@ -231,12 +234,31 @@ typedef rocrand_device::lfsr113_engine rocrand_state_lfsr113; * \param subsequence - Subsequence to start at * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ void + rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113* state) { *state = rocrand_state_lfsr113(seed, subsequence); } +/** + * \brief Initializes LFSR113 state. + * + * Initializes the LFSR113 generator \p state with the given + * \p seed, \p subsequence, and \p offset. + * + * \param seed - Value to use as a seed + * \param subsequence - Subsequence to start at + * \param offset - Absolute offset into subsequence + * \param state - Pointer to state to initialize + */ +__forceinline__ __device__ __host__ void rocrand_init(const uint4 seed, + const unsigned int subsequence, + const unsigned long long offset, + rocrand_state_lfsr113* state) +{ + *state = rocrand_state_lfsr113(seed, subsequence, offset); +} + /** * \brief Returns uniformly distributed random unsigned int value * from [0; 2^32 - 1] range. @@ -249,12 +271,55 @@ void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_stat * * \return Pseudorandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113* state) { return state->next(); } +/** + * \brief Updates LFSR113 state to skip ahead by \p offset elements. + * + * Updates the LFSR113 state in \p state to skip ahead by \p offset elements. + * + * \param offset - Number of elements to skip + * \param state - Pointer to state to update + */ +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_lfsr113* state) +{ + return state->discard(offset); +} + +/** + * \brief Updates LFSR113 state to skip ahead by \p subsequence subsequences. + * + * Updates the LFSR113 \p state to skip ahead by \p subsequence subsequences. + * Each subsequence is 2^55 numbers long. + * + * \param subsequence - Number of subsequences to skip + * \param state - Pointer to state to update + */ +__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned int subsequence, + rocrand_state_lfsr113* state) +{ + return state->discard_subsequence(subsequence); +} + +/** + * \brief Updates LFSR113 state to skip ahead by \p sequence sequences. + * + * Updates the LFSR113 \p state to skip ahead by \p sequence sequences. + * For LFSR113 each sequence is 2^55 numbers long (equal to the size of a subsequence). + * + * \param sequence - Number of sequences to skip + * \param state - Pointer to state to update + */ +__forceinline__ __device__ __host__ void skipahead_sequence(unsigned int sequence, + rocrand_state_lfsr113* state) +{ + return state->discard_subsequence(sequence); +} + /** @} */ // end of group rocranddevice #endif // ROCRAND_LFSR113_H_ diff --git a/library/include/rocrand/rocrand_lfsr113_precomputed.h b/library/include/rocrand/rocrand_lfsr113_precomputed.h new file mode 100644 index 00000000..34de13ad --- /dev/null +++ b/library/include/rocrand/rocrand_lfsr113_precomputed.h @@ -0,0 +1,4406 @@ +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ROCRAND_LFSR113_PRECOMPUTED_H_ +#define ROCRAND_LFSR113_PRECOMPUTED_H_ + +// Auto-generated file. Do not edit! +// Generated by tools/lfsr113_precomputed_generator + +#define LFSR113_N 4 +#define LFSR113_M 32 +#define LFSR113_SIZE (LFSR113_M * LFSR113_N * LFSR113_N) +#define LFSR113_JUMP_MATRICES 32 +#define LFSR113_JUMP_LOG2 2 + +// clang-format off +static const __device__ unsigned int d_lfsr113_jump_matrices[LFSR113_JUMP_MATRICES][LFSR113_SIZE] = { + { + 0, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, + 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554433, 0, 0, 0, + 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, 536870928, 0, 0, 0, + 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, 260, 0, 0, 0, + 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, 4160, 0, 0, 0, + 8320, 0, 0, 0, 16640, 0, 0, 0, 33280, 0, 0, 0, 66560, 0, 0, 0, + 133120, 0, 0, 0, 266240, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, + 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, + 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, + 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, + 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, + 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, + 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554432, 0, 0, + 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, 0, 536870917, 0, 0, + 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, + 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, 0, 0, 262152, 0, + 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, + 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67108864, 0, + 0, 0, 134217728, 0, 0, 0, 268435457, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, + 0, 0, 2147483656, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 64, 0, + 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, + 0, 0, 0, 2097152, 0, 0, 0, 4194305, 0, 0, 0, 8388610, 0, 0, 0, 16777220, + 0, 0, 0, 33554441, 0, 0, 0, 67108882, 0, 0, 0, 134217764, 0, 0, 0, 268435528, + 0, 0, 0, 536871056, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + 0, 0, 0, 2304, 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, + 0, 0, 0, 36864, 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, + 0, 0, 0, 589824, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, + }, + { + 0, 0, 0, 0, 8390656, 0, 0, 0, 16781312, 0, 0, 0, 33562625, 0, 0, 0, + 67125250, 0, 0, 0, 134250500, 0, 0, 0, 268501000, 0, 0, 0, 537002000, 0, 0, 0, + 1074004000, 0, 0, 0, 2148008001, 0, 0, 0, 1048706, 0, 0, 0, 2097412, 0, 0, 0, + 4194824, 0, 0, 0, 8389648, 0, 0, 0, 16779296, 0, 0, 0, 33558593, 0, 0, 0, + 67117186, 0, 0, 0, 134234372, 0, 0, 0, 268468744, 0, 0, 0, 536937488, 0, 0, 0, + 1073874976, 0, 0, 0, 2147749953, 0, 0, 0, 532610, 0, 0, 0, 1065220, 0, 0, 0, + 2130440, 0, 0, 0, 4260880, 0, 0, 0, 131104, 0, 0, 0, 262208, 0, 0, 0, + 524416, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195328, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 131092, 0, 0, 0, 262184, 0, 0, 0, 524368, 0, 0, 0, 1048736, 0, + 0, 0, 2097472, 0, 0, 0, 4194944, 0, 0, 0, 8389888, 0, 0, 0, 16779776, 0, + 0, 0, 33559552, 0, 0, 0, 67119104, 0, 0, 0, 134238208, 0, 0, 0, 268476416, 0, + 0, 0, 536952832, 0, 0, 0, 1073905665, 0, 0, 0, 2147811330, 0, 0, 0, 524304, 0, + 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, 0, 0, 8388864, 0, + 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67110912, 0, 0, 0, 134221824, 0, + 0, 0, 268443649, 0, 0, 0, 536887298, 0, 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33280, + 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, 532480, + 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259841, 0, 0, 0, 8519682, + 0, 0, 0, 17039364, 0, 0, 0, 34078729, 0, 0, 0, 68157458, 0, 0, 0, 136314916, + 0, 0, 0, 272629833, 0, 0, 0, 545259666, 0, 0, 0, 1090519332, 0, 0, 0, 2181038665, + 0, 0, 0, 67110034, 0, 0, 0, 134220068, 0, 0, 0, 268440136, 0, 0, 0, 536880272, + 0, 0, 0, 1073760544, 0, 0, 0, 2147487808, 0, 0, 0, 8320, 0, 0, 0, 16640, + }, + { + 0, 0, 0, 0, 134300932, 0, 0, 0, 268601864, 0, 0, 0, 537203728, 0, 0, 0, + 1074407456, 0, 0, 0, 2148814913, 0, 0, 0, 2662530, 0, 0, 0, 5325060, 0, 0, 0, + 10650120, 0, 0, 0, 21300240, 0, 0, 0, 42600481, 0, 0, 0, 85200962, 0, 0, 0, + 170401925, 0, 0, 0, 340803850, 0, 0, 0, 681607700, 0, 0, 0, 1363215400, 0, 0, 0, + 2726430800, 0, 0, 0, 1157894304, 0, 0, 0, 2315788608, 0, 0, 0, 336609920, 0, 0, 0, + 673219840, 0, 0, 0, 1346439680, 0, 0, 0, 2692879361, 0, 0, 0, 1090791426, 0, 0, 0, + 2181582852, 0, 0, 0, 68198408, 0, 0, 0, 2098452, 0, 0, 0, 4196904, 0, 0, 0, + 8393808, 0, 0, 0, 16787616, 0, 0, 0, 33575233, 0, 0, 0, 67150466, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 655360, 0, 0, 0, 1310720, 0, 0, + 0, 2621440, 0, 0, 0, 5242880, 0, 0, 0, 10485760, 0, 0, 0, 20971520, 0, 0, + 0, 41943040, 0, 0, 0, 83886080, 0, 0, 0, 167772161, 0, 0, 0, 335544322, 0, 0, + 0, 671088644, 0, 0, 0, 1342177288, 0, 0, 0, 2684354577, 0, 0, 0, 1073741858, 0, 0, + 0, 2147483716, 0, 0, 0, 136, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 335546385, 0, 0, 0, 671092770, 0, 0, 0, 1342185541, 0, 0, 0, 2684371082, 0, + 0, 0, 1073774869, 0, 0, 0, 2147549738, 0, 0, 0, 132180, 0, 0, 0, 264360, 0, + 0, 0, 528720, 0, 0, 0, 1057440, 0, 0, 0, 2114880, 0, 0, 0, 4229761, 0, + 0, 0, 8459522, 0, 0, 0, 16919044, 0, 0, 0, 33838088, 0, 0, 0, 269000704, 0, + 0, 0, 538001408, 0, 0, 0, 1076002817, 0, 0, 0, 2152005634, 0, 0, 0, 9043972, 0, + 0, 0, 18087944, 0, 0, 0, 36175888, 0, 0, 0, 72351776, 0, 0, 0, 144703552, 0, + 0, 0, 289407105, 0, 0, 0, 578814210, 0, 0, 0, 1157628420, 0, 0, 0, 2315256840, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180224, + 0, 0, 0, 360448, 0, 0, 0, 720896, 0, 0, 0, 1441792, 0, 0, 0, 2883584, + 0, 0, 0, 5767169, 0, 0, 0, 11534338, 0, 0, 0, 23068677, 0, 0, 0, 46137354, + 0, 0, 0, 92274708, 0, 0, 0, 184549417, 0, 0, 0, 369098835, 0, 0, 0, 738197670, + 0, 0, 0, 1476395340, 0, 0, 0, 2952790680, 0, 0, 0, 1610614064, 0, 0, 0, 3221228128, + 0, 0, 0, 2147488960, 0, 0, 0, 10624, 0, 0, 0, 21248, 0, 0, 0, 42496, + 0, 0, 0, 84992, 0, 0, 0, 22528, 0, 0, 0, 45056, 0, 0, 0, 90112, + }, + { + 0, 0, 0, 0, 826542200, 0, 0, 0, 1653084401, 0, 0, 0, 3306168803, 0, 0, 0, + 2317370310, 0, 0, 0, 339773324, 0, 0, 0, 679546648, 0, 0, 0, 1359093296, 0, 0, 0, + 2718186592, 0, 0, 0, 1141405888, 0, 0, 0, 2282811777, 0, 0, 0, 270656258, 0, 0, 0, + 541312516, 0, 0, 0, 1082625032, 0, 0, 0, 2165250065, 0, 0, 0, 35532835, 0, 0, 0, + 71065670, 0, 0, 0, 142131340, 0, 0, 0, 284262680, 0, 0, 0, 568525360, 0, 0, 0, + 1137050721, 0, 0, 0, 2274101442, 0, 0, 0, 253235589, 0, 0, 0, 506471179, 0, 0, 0, + 1012942358, 0, 0, 0, 2025884716, 0, 0, 0, 3234140193, 0, 0, 0, 2173313091, 0, 0, 0, + 51658887, 0, 0, 0, 103317775, 0, 0, 0, 206635550, 0, 0, 0, 413271100, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8421376, 0, 0, + 0, 16842752, 0, 0, 0, 33685504, 0, 0, 0, 67371008, 0, 0, 0, 134742017, 0, 0, + 0, 269484034, 0, 0, 0, 538968069, 0, 0, 0, 1077936138, 0, 0, 0, 2155872276, 0, 0, + 0, 16777256, 0, 0, 0, 33554512, 0, 0, 0, 67109024, 0, 0, 0, 134218049, 0, 0, + 0, 268436098, 0, 0, 0, 536872197, 0, 0, 0, 1073744394, 0, 0, 0, 2147488788, 0, 0, + 0, 10280, 0, 0, 0, 20560, 0, 0, 0, 41120, 0, 0, 0, 82240, 0, 0, + 0, 164480, 0, 0, 0, 328960, 0, 0, 0, 657920, 0, 0, 0, 1315840, 0, 0, + 0, 2631680, 0, 0, 0, 5263360, 0, 0, 0, 2105344, 0, 0, 0, 4210688, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 539164932, 0, 0, 0, 1078329864, 0, 0, 0, 2156659728, 0, 0, 0, 18352160, 0, + 0, 0, 36704320, 0, 0, 0, 73408640, 0, 0, 0, 146817280, 0, 0, 0, 293634560, 0, + 0, 0, 587269120, 0, 0, 0, 1174538240, 0, 0, 0, 2349076480, 0, 0, 0, 403185665, 0, + 0, 0, 806371331, 0, 0, 0, 1612742663, 0, 0, 0, 3225485326, 0, 0, 0, 2694906136, 0, + 0, 0, 1094844976, 0, 0, 0, 2189689952, 0, 0, 0, 84412608, 0, 0, 0, 168825216, 0, + 0, 0, 337650433, 0, 0, 0, 675300866, 0, 0, 0, 1350601732, 0, 0, 0, 2701203464, 0, + 0, 0, 1107439632, 0, 0, 0, 2214879264, 0, 0, 0, 134791233, 0, 0, 0, 269582466, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37905416, + 0, 0, 0, 75810832, 0, 0, 0, 151621664, 0, 0, 0, 303243329, 0, 0, 0, 606486658, + 0, 0, 0, 1212973317, 0, 0, 0, 2425946634, 0, 0, 0, 556925972, 0, 0, 0, 1113851944, + 0, 0, 0, 2227703889, 0, 0, 0, 160440482, 0, 0, 0, 320880965, 0, 0, 0, 641761930, + 0, 0, 0, 1283523860, 0, 0, 0, 2567047720, 0, 0, 0, 839128145, 0, 0, 0, 1678256290, + 0, 0, 0, 3356512580, 0, 0, 0, 2418057864, 0, 0, 0, 541148433, 0, 0, 0, 1082296866, + 0, 0, 0, 2164593732, 0, 0, 0, 4738177, 0, 0, 0, 9476354, 0, 0, 0, 18952708, + }, + { + 0, 0, 0, 0, 2499687121, 0, 0, 0, 704406946, 0, 0, 0, 1408813893, 0, 0, 0, + 2817627786, 0, 0, 0, 1340288277, 0, 0, 0, 2680576554, 0, 0, 0, 1066185813, 0, 0, 0, + 2132371627, 0, 0, 0, 4264743254, 0, 0, 0, 4234519213, 0, 0, 0, 4174071131, 0, 0, 0, + 4053174967, 0, 0, 0, 3811382638, 0, 0, 0, 3327797980, 0, 0, 0, 2360628665, 0, 0, 0, + 426290034, 0, 0, 0, 852580069, 0, 0, 0, 1705160138, 0, 0, 0, 3410320276, 0, 0, 0, + 2525673256, 0, 0, 0, 756379216, 0, 0, 0, 1512758433, 0, 0, 0, 3025516867, 0, 0, 0, + 1756066438, 0, 0, 0, 3512132877, 0, 0, 0, 911472843, 0, 0, 0, 1822945686, 0, 0, 0, + 3645891373, 0, 0, 0, 2996815450, 0, 0, 0, 1698663604, 0, 0, 0, 3397327208, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 591396869, 0, 0, + 0, 1182793738, 0, 0, 0, 2365587477, 0, 0, 0, 436207659, 0, 0, 0, 872415319, 0, 0, + 0, 1744830638, 0, 0, 0, 3489661276, 0, 0, 0, 2684355257, 0, 0, 0, 1073743218, 0, 0, + 0, 2147486436, 0, 0, 0, 5576, 0, 0, 0, 11152, 0, 0, 0, 22304, 0, 0, + 0, 44608, 0, 0, 0, 89216, 0, 0, 0, 178432, 0, 0, 0, 356864, 0, 0, + 0, 713728, 0, 0, 0, 1427456, 0, 0, 0, 2854912, 0, 0, 0, 5709824, 0, 0, + 0, 11419648, 0, 0, 0, 22839296, 0, 0, 0, 45678592, 0, 0, 0, 91357184, 0, 0, + 0, 182714369, 0, 0, 0, 365428738, 0, 0, 0, 147849217, 0, 0, 0, 295698434, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 490817699, 0, 0, 0, 981635398, 0, 0, 0, 1963270797, 0, 0, 0, 3926541594, 0, + 0, 0, 3558115892, 0, 0, 0, 2821264488, 0, 0, 0, 1347561681, 0, 0, 0, 2695123362, 0, + 0, 0, 1095279429, 0, 0, 0, 2190558858, 0, 0, 0, 86150421, 0, 0, 0, 172300842, 0, + 0, 0, 344601685, 0, 0, 0, 689203370, 0, 0, 0, 1378406740, 0, 0, 0, 3104893450, 0, + 0, 0, 1914819604, 0, 0, 0, 3829639209, 0, 0, 0, 3364311122, 0, 0, 0, 2433654948, 0, + 0, 0, 572342600, 0, 0, 0, 1144685201, 0, 0, 0, 2289370402, 0, 0, 0, 283773509, 0, + 0, 0, 567547018, 0, 0, 0, 1135094036, 0, 0, 0, 2270188072, 0, 0, 0, 245408849, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2731147994, + 0, 0, 0, 1167328692, 0, 0, 0, 2334657385, 0, 0, 0, 374347474, 0, 0, 0, 748694948, + 0, 0, 0, 1497389897, 0, 0, 0, 2994779795, 0, 0, 0, 1694592294, 0, 0, 0, 3389184589, + 0, 0, 0, 2483401882, 0, 0, 0, 671836468, 0, 0, 0, 1343672936, 0, 0, 0, 2687345872, + 0, 0, 0, 1079724449, 0, 0, 0, 2159448898, 0, 0, 0, 23930501, 0, 0, 0, 47861002, + 0, 0, 0, 95722004, 0, 0, 0, 191444008, 0, 0, 0, 382888016, 0, 0, 0, 765776032, + 0, 0, 0, 1531552064, 0, 0, 0, 341393499, 0, 0, 0, 682786998, 0, 0, 0, 1365573997, + }, + { + 0, 0, 0, 0, 2271571420, 0, 0, 0, 248175545, 0, 0, 0, 496351090, 0, 0, 0, + 992702181, 0, 0, 0, 1985404363, 0, 0, 0, 3970808727, 0, 0, 0, 3646650159, 0, 0, 0, + 2998333022, 0, 0, 0, 1701698748, 0, 0, 0, 3403397496, 0, 0, 0, 2511827697, 0, 0, 0, + 728688099, 0, 0, 0, 1457376199, 0, 0, 0, 2914752399, 0, 0, 0, 1534537503, 0, 0, 0, + 3069075006, 0, 0, 0, 1843182716, 0, 0, 0, 3686365432, 0, 0, 0, 3077763568, 0, 0, 0, + 1860559841, 0, 0, 0, 3721119683, 0, 0, 0, 3147272070, 0, 0, 0, 1999576845, 0, 0, 0, + 3999153690, 0, 0, 0, 3703340085, 0, 0, 0, 1042126263, 0, 0, 0, 2084252526, 0, 0, 0, + 4168505053, 0, 0, 0, 4042042811, 0, 0, 0, 3789118327, 0, 0, 0, 3283269358, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 661652741, 0, 0, + 0, 1323305483, 0, 0, 0, 2646610967, 0, 0, 0, 998254638, 0, 0, 0, 1996509277, 0, 0, + 0, 3993018554, 0, 0, 0, 3691069813, 0, 0, 0, 3087172330, 0, 0, 0, 1879377365, 0, 0, + 0, 3758754731, 0, 0, 0, 3222542166, 0, 0, 0, 2150117036, 0, 0, 0, 5266776, 0, 0, + 0, 10533552, 0, 0, 0, 21067104, 0, 0, 0, 42134208, 0, 0, 0, 84268416, 0, 0, + 0, 168536833, 0, 0, 0, 337073666, 0, 0, 0, 674147332, 0, 0, 0, 1348294664, 0, 0, + 0, 2696589329, 0, 0, 0, 1098211362, 0, 0, 0, 2196422724, 0, 0, 0, 97878152, 0, 0, + 0, 195756305, 0, 0, 0, 391512610, 0, 0, 0, 165413185, 0, 0, 0, 330826370, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2541187255, 0, 0, 0, 787407215, 0, 0, 0, 1574814430, 0, 0, 0, 3149628860, 0, + 0, 0, 2004290425, 0, 0, 0, 4008580850, 0, 0, 0, 3722194405, 0, 0, 0, 3149421515, 0, + 0, 0, 2003875734, 0, 0, 0, 4007751468, 0, 0, 0, 3720535640, 0, 0, 0, 3146103984, 0, + 0, 0, 1997240673, 0, 0, 0, 3994481347, 0, 0, 0, 3693995398, 0, 0, 0, 791460795, 0, + 0, 0, 1582921591, 0, 0, 0, 3165843182, 0, 0, 0, 2036719068, 0, 0, 0, 4073438136, 0, + 0, 0, 3851908976, 0, 0, 0, 3408850657, 0, 0, 0, 2522734018, 0, 0, 0, 750500741, 0, + 0, 0, 1501001483, 0, 0, 0, 3002002966, 0, 0, 0, 1709038637, 0, 0, 0, 3418077275, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3732057110, + 0, 0, 0, 3169146925, 0, 0, 0, 2043326555, 0, 0, 0, 4086653111, 0, 0, 0, 3878338927, + 0, 0, 0, 3461710558, 0, 0, 0, 2628453820, 0, 0, 0, 961940345, 0, 0, 0, 1923880691, + 0, 0, 0, 3847761383, 0, 0, 0, 3400555471, 0, 0, 0, 2506143647, 0, 0, 0, 717319998, + 0, 0, 0, 1434639996, 0, 0, 0, 2869279993, 0, 0, 0, 1443592691, 0, 0, 0, 2887185382, + 0, 0, 0, 1479403468, 0, 0, 0, 2958806937, 0, 0, 0, 1622646578, 0, 0, 0, 3245293157, + 0, 0, 0, 2195619018, 0, 0, 0, 3687732610, 0, 0, 0, 3080497925, 0, 0, 0, 1866028555, + }, + { + 0, 0, 0, 0, 1739616249, 0, 0, 0, 3479232498, 0, 0, 0, 2663497700, 0, 0, 0, + 1032028104, 0, 0, 0, 2064056209, 0, 0, 0, 4128112418, 0, 0, 0, 3961257541, 0, 0, 0, + 3627547787, 0, 0, 0, 2960128279, 0, 0, 0, 1625289262, 0, 0, 0, 3250578525, 0, 0, 0, + 2206189754, 0, 0, 0, 117412213, 0, 0, 0, 234824426, 0, 0, 0, 469648853, 0, 0, 0, + 939297707, 0, 0, 0, 1878595415, 0, 0, 0, 3757190830, 0, 0, 0, 3219414364, 0, 0, 0, + 2143861433, 0, 0, 0, 4287722866, 0, 0, 0, 4280478436, 0, 0, 0, 4265989576, 0, 0, 0, + 4237011857, 0, 0, 0, 4179056419, 0, 0, 0, 2510209471, 0, 0, 0, 725451647, 0, 0, 0, + 1450903295, 0, 0, 0, 2901806591, 0, 0, 0, 1508645886, 0, 0, 0, 3017291772, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2815455249, 0, 0, + 0, 1335943203, 0, 0, 0, 2671886407, 0, 0, 0, 1048805518, 0, 0, 0, 2097611036, 0, 0, + 0, 4195222072, 0, 0, 0, 4095476849, 0, 0, 0, 3895986402, 0, 0, 0, 3497005508, 0, 0, + 0, 2699043721, 0, 0, 0, 1103120146, 0, 0, 0, 2206240292, 0, 0, 0, 117513288, 0, 0, + 0, 235026577, 0, 0, 0, 470053155, 0, 0, 0, 940106310, 0, 0, 0, 1880212621, 0, 0, + 0, 3760425243, 0, 0, 0, 3225883190, 0, 0, 0, 2156799084, 0, 0, 0, 18630872, 0, 0, + 0, 37261744, 0, 0, 0, 74523488, 0, 0, 0, 149046977, 0, 0, 0, 298093954, 0, 0, + 0, 596187909, 0, 0, 0, 1192375818, 0, 0, 0, 703863812, 0, 0, 0, 1407727624, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2614536692, 0, 0, 0, 934106089, 0, 0, 0, 1868212179, 0, 0, 0, 3736424359, 0, + 0, 0, 3177881422, 0, 0, 0, 2060795549, 0, 0, 0, 4121591099, 0, 0, 0, 3948214903, 0, + 0, 0, 3601462510, 0, 0, 0, 2907957725, 0, 0, 0, 1520948154, 0, 0, 0, 3041896308, 0, + 0, 0, 1788825320, 0, 0, 0, 3577650640, 0, 0, 0, 2860333984, 0, 0, 0, 3475824309, 0, + 0, 0, 2656681322, 0, 0, 0, 1018395349, 0, 0, 0, 2036790698, 0, 0, 0, 4073581396, 0, + 0, 0, 3852195497, 0, 0, 0, 3409423699, 0, 0, 0, 2523880103, 0, 0, 0, 752792911, 0, + 0, 0, 1505585823, 0, 0, 0, 3011171646, 0, 0, 0, 1727375997, 0, 0, 0, 3454751994, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2033130968, + 0, 0, 0, 4066261936, 0, 0, 0, 3837556576, 0, 0, 0, 3380145857, 0, 0, 0, 2465324418, + 0, 0, 0, 635681541, 0, 0, 0, 1271363082, 0, 0, 0, 2542726165, 0, 0, 0, 790485035, + 0, 0, 0, 1580970071, 0, 0, 0, 3161940143, 0, 0, 0, 2028912991, 0, 0, 0, 4057825983, + 0, 0, 0, 3820684671, 0, 0, 0, 3346402046, 0, 0, 0, 2397836796, 0, 0, 0, 500706297, + 0, 0, 0, 1001412595, 0, 0, 0, 2002825190, 0, 0, 0, 4005650380, 0, 0, 0, 3716333464, + 0, 0, 0, 3137699633, 0, 0, 0, 254141371, 0, 0, 0, 508282742, 0, 0, 0, 1016565484, + }, + { + 0, 0, 0, 0, 1915396941, 0, 0, 0, 3830793883, 0, 0, 0, 3366620471, 0, 0, 0, + 2438273647, 0, 0, 0, 581579999, 0, 0, 0, 1163159998, 0, 0, 0, 2326319996, 0, 0, 0, + 357672696, 0, 0, 0, 715345393, 0, 0, 0, 1430690786, 0, 0, 0, 2861381572, 0, 0, 0, + 1427795848, 0, 0, 0, 2855591696, 0, 0, 0, 1416216096, 0, 0, 0, 2832432193, 0, 0, 0, + 1369897090, 0, 0, 0, 2739794180, 0, 0, 0, 1184621065, 0, 0, 0, 2369242131, 0, 0, 0, + 443516967, 0, 0, 0, 887033934, 0, 0, 0, 1774067868, 0, 0, 0, 3548135736, 0, 0, 0, + 2801304176, 0, 0, 0, 1307641056, 0, 0, 0, 3922242189, 0, 0, 0, 3549517082, 0, 0, 0, + 2804066868, 0, 0, 0, 1313166441, 0, 0, 0, 2626332883, 0, 0, 0, 957698470, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1432822912, 0, 0, + 0, 2865645824, 0, 0, 0, 1436324352, 0, 0, 0, 2872648704, 0, 0, 0, 1450330112, 0, 0, + 0, 2900660224, 0, 0, 0, 1506353153, 0, 0, 0, 3012706307, 0, 0, 0, 1730445319, 0, 0, + 0, 3460890639, 0, 0, 0, 2626813983, 0, 0, 0, 958660670, 0, 0, 0, 1917321341, 0, 0, + 0, 3834642683, 0, 0, 0, 3374318071, 0, 0, 0, 2453668846, 0, 0, 0, 612370397, 0, 0, + 0, 1224740795, 0, 0, 0, 2449481590, 0, 0, 0, 603995885, 0, 0, 0, 1207991771, 0, 0, + 0, 2415983542, 0, 0, 0, 536999789, 0, 0, 0, 1073999578, 0, 0, 0, 2147999156, 0, 0, + 0, 1031016, 0, 0, 0, 2062032, 0, 0, 0, 1431947552, 0, 0, 0, 2863895104, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2983242506, 0, 0, 0, 1671517716, 0, 0, 0, 3343035433, 0, 0, 0, 2391103570, 0, + 0, 0, 487239844, 0, 0, 0, 974479689, 0, 0, 0, 1948959378, 0, 0, 0, 3897918757, 0, + 0, 0, 3500870219, 0, 0, 0, 2706773142, 0, 0, 0, 1118578988, 0, 0, 0, 2237157976, 0, + 0, 0, 179348657, 0, 0, 0, 358697315, 0, 0, 0, 717394631, 0, 0, 0, 3830812293, 0, + 0, 0, 3366657290, 0, 0, 0, 2438347285, 0, 0, 0, 581727274, 0, 0, 0, 1163454549, 0, + 0, 0, 2326909099, 0, 0, 0, 358850902, 0, 0, 0, 717701804, 0, 0, 0, 1435403608, 0, + 0, 0, 2870807216, 0, 0, 0, 1446647137, 0, 0, 0, 2893294274, 0, 0, 0, 1491621253, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2199499341, + 0, 0, 0, 104031387, 0, 0, 0, 208062775, 0, 0, 0, 416125551, 0, 0, 0, 832251102, + 0, 0, 0, 1664502205, 0, 0, 0, 3329004410, 0, 0, 0, 2363041525, 0, 0, 0, 431115754, + 0, 0, 0, 862231508, 0, 0, 0, 1724463016, 0, 0, 0, 3448926032, 0, 0, 0, 2602884769, + 0, 0, 0, 910802242, 0, 0, 0, 1821604484, 0, 0, 0, 3643208968, 0, 0, 0, 2991450640, + 0, 0, 0, 1687933984, 0, 0, 0, 3375867968, 0, 0, 0, 2456768640, 0, 0, 0, 618569985, + 0, 0, 0, 1237139970, 0, 0, 0, 274937417, 0, 0, 0, 549874835, 0, 0, 0, 1099749670, + }, + { + 0, 0, 0, 0, 4038889453, 0, 0, 0, 3782811611, 0, 0, 0, 3270655926, 0, 0, 0, + 2246344557, 0, 0, 0, 197721819, 0, 0, 0, 395443639, 0, 0, 0, 790887279, 0, 0, 0, + 1581774559, 0, 0, 0, 3163549119, 0, 0, 0, 2032130942, 0, 0, 0, 4064261884, 0, 0, 0, + 3833556473, 0, 0, 0, 3372145651, 0, 0, 0, 2449324007, 0, 0, 0, 603680719, 0, 0, 0, + 1207361439, 0, 0, 0, 2414722878, 0, 0, 0, 534478461, 0, 0, 0, 1068956923, 0, 0, 0, + 2137913847, 0, 0, 0, 4275827694, 0, 0, 0, 4256688093, 0, 0, 0, 4218408890, 0, 0, 0, + 4141850484, 0, 0, 0, 3988733673, 0, 0, 0, 734196287, 0, 0, 0, 1468392575, 0, 0, 0, + 2936785150, 0, 0, 0, 1578603005, 0, 0, 0, 3157206011, 0, 0, 0, 2019444726, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2740852545, 0, 0, + 0, 1186737794, 0, 0, 0, 2373475589, 0, 0, 0, 451983883, 0, 0, 0, 903967767, 0, 0, + 0, 1807935534, 0, 0, 0, 3615871068, 0, 0, 0, 2936774840, 0, 0, 0, 1578582385, 0, 0, + 0, 3157164770, 0, 0, 0, 2019362244, 0, 0, 0, 4038724489, 0, 0, 0, 3782481683, 0, 0, + 0, 3269996070, 0, 0, 0, 2245024844, 0, 0, 0, 195082393, 0, 0, 0, 390164786, 0, 0, + 0, 780329572, 0, 0, 0, 1560659145, 0, 0, 0, 3121318290, 0, 0, 0, 1947669285, 0, 0, + 0, 3895338570, 0, 0, 0, 3495709844, 0, 0, 0, 2696452393, 0, 0, 0, 1097937490, 0, 0, + 0, 2195874980, 0, 0, 0, 96782664, 0, 0, 0, 2832696784, 0, 0, 0, 1370426272, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2797805596, 0, 0, 0, 1300643896, 0, 0, 0, 2601287792, 0, 0, 0, 907608289, 0, + 0, 0, 1815216578, 0, 0, 0, 3630433157, 0, 0, 0, 2965899019, 0, 0, 0, 1636830742, 0, + 0, 0, 3273661484, 0, 0, 0, 2252355672, 0, 0, 0, 209744048, 0, 0, 0, 419488096, 0, + 0, 0, 838976192, 0, 0, 0, 1677952385, 0, 0, 0, 3355904770, 0, 0, 0, 919418393, 0, + 0, 0, 1838836786, 0, 0, 0, 3677673572, 0, 0, 0, 3060379848, 0, 0, 0, 1825792400, 0, + 0, 0, 3651584800, 0, 0, 0, 3008202304, 0, 0, 0, 1721437312, 0, 0, 0, 3442874624, 0, + 0, 0, 2590781953, 0, 0, 0, 886596611, 0, 0, 0, 1773193223, 0, 0, 0, 3546386446, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4196822677, + 0, 0, 0, 4098678059, 0, 0, 0, 3902388822, 0, 0, 0, 3509810348, 0, 0, 0, 2724653400, + 0, 0, 0, 1154339505, 0, 0, 0, 2308679010, 0, 0, 0, 322390725, 0, 0, 0, 644781450, + 0, 0, 0, 1289562901, 0, 0, 0, 2579125802, 0, 0, 0, 863284308, 0, 0, 0, 1726568616, + 0, 0, 0, 3453137233, 0, 0, 0, 2611307171, 0, 0, 0, 927647046, 0, 0, 0, 1855294093, + 0, 0, 0, 3710588186, 0, 0, 0, 3126209076, 0, 0, 0, 1957450856, 0, 0, 0, 3914901713, + 0, 0, 0, 3534836131, 0, 0, 0, 1598344658, 0, 0, 0, 3196689317, 0, 0, 0, 2098411338, + }, + { + 0, 0, 0, 0, 3824344628, 0, 0, 0, 3353721960, 0, 0, 0, 2412476624, 0, 0, 0, + 529985953, 0, 0, 0, 1059971907, 0, 0, 0, 2119943815, 0, 0, 0, 4239887631, 0, 0, 0, + 4184807967, 0, 0, 0, 4074648638, 0, 0, 0, 3854329981, 0, 0, 0, 3413692666, 0, 0, 0, + 2532418036, 0, 0, 0, 769868776, 0, 0, 0, 1539737553, 0, 0, 0, 3079475106, 0, 0, 0, + 1863982917, 0, 0, 0, 3727965834, 0, 0, 0, 3160964373, 0, 0, 0, 2026961450, 0, 0, 0, + 4053922901, 0, 0, 0, 3812878506, 0, 0, 0, 3330789716, 0, 0, 0, 2366612137, 0, 0, 0, + 438256979, 0, 0, 0, 876513958, 0, 0, 0, 2341456760, 0, 0, 0, 387946225, 0, 0, 0, + 775892451, 0, 0, 0, 1551784902, 0, 0, 0, 3103569805, 0, 0, 0, 1912172314, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3220345106, 0, 0, + 0, 2145722916, 0, 0, 0, 4291445832, 0, 0, 0, 4287924368, 0, 0, 0, 4280881440, 0, 0, + 0, 4266795584, 0, 0, 0, 4238623872, 0, 0, 0, 4182280448, 0, 0, 0, 4069593601, 0, 0, + 0, 3844219907, 0, 0, 0, 3393472519, 0, 0, 0, 2491977742, 0, 0, 0, 688988188, 0, 0, + 0, 1377976376, 0, 0, 0, 2755952753, 0, 0, 0, 1216938211, 0, 0, 0, 2433876422, 0, 0, + 0, 572785549, 0, 0, 0, 1145571098, 0, 0, 0, 2291142197, 0, 0, 0, 287317098, 0, 0, + 0, 574634197, 0, 0, 0, 1149268394, 0, 0, 0, 2298536789, 0, 0, 0, 302106282, 0, 0, + 0, 604212565, 0, 0, 0, 1208425131, 0, 0, 0, 805086276, 0, 0, 0, 1610172553, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3324387232, 0, 0, 0, 2353807168, 0, 0, 0, 412647041, 0, 0, 0, 825294082, 0, + 0, 0, 1650588165, 0, 0, 0, 3301176331, 0, 0, 0, 2307385367, 0, 0, 0, 319803438, 0, + 0, 0, 639606877, 0, 0, 0, 1279213754, 0, 0, 0, 2558427508, 0, 0, 0, 821887721, 0, + 0, 0, 1643775442, 0, 0, 0, 3287550884, 0, 0, 0, 2280134472, 0, 0, 0, 3388354864, 0, + 0, 0, 2481742433, 0, 0, 0, 668517571, 0, 0, 0, 1337035143, 0, 0, 0, 2674070287, 0, + 0, 0, 1053173279, 0, 0, 0, 2106346559, 0, 0, 0, 4212693118, 0, 0, 0, 4130418941, 0, + 0, 0, 3965870586, 0, 0, 0, 3636773876, 0, 0, 0, 2978580456, 0, 0, 0, 1662193616, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2034053848, + 0, 0, 0, 4068107696, 0, 0, 0, 3841248097, 0, 0, 0, 3387528899, 0, 0, 0, 2480090502, + 0, 0, 0, 665213709, 0, 0, 0, 1330427418, 0, 0, 0, 2660854837, 0, 0, 0, 1026742378, + 0, 0, 0, 2053484756, 0, 0, 0, 4106969513, 0, 0, 0, 3918971730, 0, 0, 0, 3542976165, + 0, 0, 0, 2790985034, 0, 0, 0, 1287002772, 0, 0, 0, 2574005545, 0, 0, 0, 853043794, + 0, 0, 0, 1706087588, 0, 0, 0, 3412175176, 0, 0, 0, 2529383056, 0, 0, 0, 763798816, + 0, 0, 0, 1527597633, 0, 0, 0, 3475482203, 0, 0, 0, 2655997110, 0, 0, 0, 1017026924, + }, + { + 0, 0, 0, 0, 1099834596, 0, 0, 0, 2199669192, 0, 0, 0, 104371089, 0, 0, 0, + 208742178, 0, 0, 0, 417484356, 0, 0, 0, 834968712, 0, 0, 0, 1669937425, 0, 0, 0, + 3339874850, 0, 0, 0, 2384782404, 0, 0, 0, 474597512, 0, 0, 0, 949195024, 0, 0, 0, + 1898390048, 0, 0, 0, 3796780096, 0, 0, 0, 3298592897, 0, 0, 0, 2302218499, 0, 0, 0, + 309469703, 0, 0, 0, 618939406, 0, 0, 0, 1237878812, 0, 0, 0, 2475757624, 0, 0, 0, + 656547953, 0, 0, 0, 1313095907, 0, 0, 0, 2626191815, 0, 0, 0, 957416334, 0, 0, 0, + 1914832669, 0, 0, 0, 3829665339, 0, 0, 0, 2298886291, 0, 0, 0, 302805287, 0, 0, 0, + 605610574, 0, 0, 0, 1211221148, 0, 0, 0, 2422442297, 0, 0, 0, 549917298, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 348140706, 0, 0, + 0, 696281412, 0, 0, 0, 1392562824, 0, 0, 0, 2785125649, 0, 0, 0, 1275284003, 0, 0, + 0, 2550568007, 0, 0, 0, 806168719, 0, 0, 0, 1612337439, 0, 0, 0, 3224674878, 0, 0, + 0, 2154382460, 0, 0, 0, 13797624, 0, 0, 0, 27595248, 0, 0, 0, 55190496, 0, 0, + 0, 110380992, 0, 0, 0, 220761985, 0, 0, 0, 441523971, 0, 0, 0, 883047943, 0, 0, + 0, 1766095886, 0, 0, 0, 3532191772, 0, 0, 0, 2769416249, 0, 0, 0, 1243865203, 0, 0, + 0, 2487730406, 0, 0, 0, 680493516, 0, 0, 0, 1360987032, 0, 0, 0, 2721974065, 0, 0, + 0, 1148980834, 0, 0, 0, 2297961669, 0, 0, 0, 87035176, 0, 0, 0, 174070353, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 470870896, 0, 0, 0, 941741792, 0, 0, 0, 1883483584, 0, 0, 0, 3766967168, 0, + 0, 0, 3238967041, 0, 0, 0, 2182966786, 0, 0, 0, 70966277, 0, 0, 0, 141932555, 0, + 0, 0, 283865111, 0, 0, 0, 567730223, 0, 0, 0, 1135460447, 0, 0, 0, 2270920895, 0, + 0, 0, 246874494, 0, 0, 0, 493748989, 0, 0, 0, 987497979, 0, 0, 0, 1772675207, 0, + 0, 0, 3545350414, 0, 0, 0, 2795733533, 0, 0, 0, 1296499770, 0, 0, 0, 2592999541, 0, + 0, 0, 891031787, 0, 0, 0, 1782063574, 0, 0, 0, 3564127149, 0, 0, 0, 2833287003, 0, + 0, 0, 1371606711, 0, 0, 0, 2743213422, 0, 0, 0, 1191459548, 0, 0, 0, 2382919096, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30521863, + 0, 0, 0, 61043727, 0, 0, 0, 122087454, 0, 0, 0, 244174909, 0, 0, 0, 488349818, + 0, 0, 0, 976699637, 0, 0, 0, 1953399275, 0, 0, 0, 3906798551, 0, 0, 0, 3518629806, + 0, 0, 0, 2742292316, 0, 0, 0, 1189617336, 0, 0, 0, 2379234673, 0, 0, 0, 463502051, + 0, 0, 0, 927004102, 0, 0, 0, 1854008205, 0, 0, 0, 3708016410, 0, 0, 0, 3121065525, + 0, 0, 0, 1947163754, 0, 0, 0, 3894327508, 0, 0, 0, 3493687720, 0, 0, 0, 2692408145, + 0, 0, 0, 1089848995, 0, 0, 0, 2151298880, 0, 0, 0, 7630465, 0, 0, 0, 15260931, + }, + { + 0, 0, 0, 0, 3400190380, 0, 0, 0, 2505413465, 0, 0, 0, 715859635, 0, 0, 0, + 1431719270, 0, 0, 0, 2863438540, 0, 0, 0, 1431909784, 0, 0, 0, 2863819568, 0, 0, 0, + 1432671840, 0, 0, 0, 2865343680, 0, 0, 0, 1435720064, 0, 0, 0, 2871440128, 0, 0, 0, + 1447912961, 0, 0, 0, 2895825923, 0, 0, 0, 1496684550, 0, 0, 0, 2993369100, 0, 0, 0, + 1691770904, 0, 0, 0, 3383541809, 0, 0, 0, 2472116322, 0, 0, 0, 649265349, 0, 0, 0, + 1298530698, 0, 0, 0, 2597061396, 0, 0, 0, 899155496, 0, 0, 0, 1798310993, 0, 0, 0, + 3596621986, 0, 0, 0, 2898276677, 0, 0, 0, 2469047078, 0, 0, 0, 643126861, 0, 0, 0, + 1286253722, 0, 0, 0, 2572507445, 0, 0, 0, 850047595, 0, 0, 0, 1700095190, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2692483345, 0, 0, + 0, 1089999394, 0, 0, 0, 2179998788, 0, 0, 0, 65030280, 0, 0, 0, 130060560, 0, 0, + 0, 260121121, 0, 0, 0, 520242243, 0, 0, 0, 1040484486, 0, 0, 0, 2080968972, 0, 0, + 0, 4161937944, 0, 0, 0, 4028908593, 0, 0, 0, 3762849891, 0, 0, 0, 3230732486, 0, 0, + 0, 2166497676, 0, 0, 0, 38028056, 0, 0, 0, 76056112, 0, 0, 0, 152112225, 0, 0, + 0, 304224450, 0, 0, 0, 608448901, 0, 0, 0, 1216897803, 0, 0, 0, 2433795606, 0, 0, + 0, 572623917, 0, 0, 0, 1145247834, 0, 0, 0, 2290495669, 0, 0, 0, 286024042, 0, 0, + 0, 572048085, 0, 0, 0, 1144096170, 0, 0, 0, 673120836, 0, 0, 0, 1346241672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 27673132, 0, 0, 0, 55346265, 0, 0, 0, 110692530, 0, 0, 0, 221385060, 0, + 0, 0, 442770121, 0, 0, 0, 885540243, 0, 0, 0, 1771080487, 0, 0, 0, 3542160975, 0, + 0, 0, 2789354654, 0, 0, 0, 1283742012, 0, 0, 0, 2567484024, 0, 0, 0, 840000753, 0, + 0, 0, 1680001507, 0, 0, 0, 3360003015, 0, 0, 0, 2425038735, 0, 0, 0, 548408626, 0, + 0, 0, 1096817252, 0, 0, 0, 2193634504, 0, 0, 0, 92301712, 0, 0, 0, 184603425, 0, + 0, 0, 369206850, 0, 0, 0, 738413700, 0, 0, 0, 1476827400, 0, 0, 0, 2953654801, 0, + 0, 0, 1612342306, 0, 0, 0, 3224684613, 0, 0, 0, 2154401931, 0, 0, 0, 13836566, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33833481, + 0, 0, 0, 67666962, 0, 0, 0, 135333924, 0, 0, 0, 270667848, 0, 0, 0, 541335697, + 0, 0, 0, 1082671394, 0, 0, 0, 2165342788, 0, 0, 0, 35718281, 0, 0, 0, 71436563, + 0, 0, 0, 142873126, 0, 0, 0, 285746252, 0, 0, 0, 571492505, 0, 0, 0, 1142985010, + 0, 0, 0, 2285970021, 0, 0, 0, 276972746, 0, 0, 0, 553945492, 0, 0, 0, 1107890985, + 0, 0, 0, 2215781970, 0, 0, 0, 136596644, 0, 0, 0, 273193289, 0, 0, 0, 546386578, + 0, 0, 0, 1092773156, 0, 0, 0, 2151712833, 0, 0, 0, 8458370, 0, 0, 0, 16916740, + }, + { + 0, 0, 0, 0, 2928597988, 0, 0, 0, 1562228680, 0, 0, 0, 3124457360, 0, 0, 0, + 1953947424, 0, 0, 0, 3907894849, 0, 0, 0, 3520822403, 0, 0, 0, 2746677510, 0, 0, 0, + 1198387725, 0, 0, 0, 2396775450, 0, 0, 0, 498583604, 0, 0, 0, 997167209, 0, 0, 0, + 1994334419, 0, 0, 0, 3988668839, 0, 0, 0, 3682370382, 0, 0, 0, 3069773468, 0, 0, 0, + 1844579640, 0, 0, 0, 3689159280, 0, 0, 0, 3083351264, 0, 0, 0, 1871735233, 0, 0, 0, + 3743470466, 0, 0, 0, 3191973636, 0, 0, 0, 2088979976, 0, 0, 0, 4177959953, 0, 0, 0, + 4060952610, 0, 0, 0, 3826937925, 0, 0, 0, 1723480943, 0, 0, 0, 3446961887, 0, 0, 0, + 2598956478, 0, 0, 0, 902945660, 0, 0, 0, 1805891321, 0, 0, 0, 3611782642, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1473249760, 0, 0, + 0, 2946499520, 0, 0, 0, 1598031745, 0, 0, 0, 3196063490, 0, 0, 0, 2097159684, 0, 0, + 0, 4194319368, 0, 0, 0, 4093671441, 0, 0, 0, 3892375586, 0, 0, 0, 3489783876, 0, 0, + 0, 2684600457, 0, 0, 0, 1074233618, 0, 0, 0, 2148467236, 0, 0, 0, 1967176, 0, 0, + 0, 3934352, 0, 0, 0, 7868704, 0, 0, 0, 15737408, 0, 0, 0, 31474816, 0, 0, + 0, 62949632, 0, 0, 0, 125899264, 0, 0, 0, 251798529, 0, 0, 0, 503597059, 0, 0, + 0, 1007194118, 0, 0, 0, 2014388236, 0, 0, 0, 4028776473, 0, 0, 0, 3762585651, 0, 0, + 0, 3230204006, 0, 0, 0, 2165440716, 0, 0, 0, 1442054264, 0, 0, 0, 2884108528, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1749068055, 0, 0, 0, 3498136111, 0, 0, 0, 2701304927, 0, 0, 0, 1107642558, 0, + 0, 0, 2215285117, 0, 0, 0, 135602938, 0, 0, 0, 271205877, 0, 0, 0, 542411755, 0, + 0, 0, 1084823510, 0, 0, 0, 2169647020, 0, 0, 0, 44326744, 0, 0, 0, 88653489, 0, + 0, 0, 177306978, 0, 0, 0, 354613956, 0, 0, 0, 709227913, 0, 0, 0, 1019953669, 0, + 0, 0, 2039907338, 0, 0, 0, 4079814677, 0, 0, 0, 3864662058, 0, 0, 0, 3434356820, 0, + 0, 0, 2573746345, 0, 0, 0, 852525394, 0, 0, 0, 1705050788, 0, 0, 0, 3410101576, 0, + 0, 0, 2525235857, 0, 0, 0, 755504418, 0, 0, 0, 1511008837, 0, 0, 0, 3022017675, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67141650, + 0, 0, 0, 134283300, 0, 0, 0, 268566600, 0, 0, 0, 537133200, 0, 0, 0, 1074266400, + 0, 0, 0, 2148532800, 0, 0, 0, 2098304, 0, 0, 0, 4196609, 0, 0, 0, 8393218, + 0, 0, 0, 16786436, 0, 0, 0, 33572873, 0, 0, 0, 67145746, 0, 0, 0, 134291492, + 0, 0, 0, 268582984, 0, 0, 0, 537165968, 0, 0, 0, 1074331936, 0, 0, 0, 2148663872, + 0, 0, 0, 2360448, 0, 0, 0, 4720897, 0, 0, 0, 9441794, 0, 0, 0, 18883588, + 0, 0, 0, 37767176, 0, 0, 0, 8392706, 0, 0, 0, 16785412, 0, 0, 0, 33570825, + }, + { + 0, 0, 0, 0, 1874778401, 0, 0, 0, 3749556802, 0, 0, 0, 3204146308, 0, 0, 0, + 2113325320, 0, 0, 0, 4226650640, 0, 0, 0, 4158333984, 0, 0, 0, 4021700672, 0, 0, 0, + 3748434048, 0, 0, 0, 3201900800, 0, 0, 0, 2108834304, 0, 0, 0, 4217668608, 0, 0, 0, + 4140369920, 0, 0, 0, 3985772545, 0, 0, 0, 3676577794, 0, 0, 0, 3058188292, 0, 0, 0, + 1821409288, 0, 0, 0, 3642818577, 0, 0, 0, 2990669858, 0, 0, 0, 1686372420, 0, 0, 0, + 3372744841, 0, 0, 0, 2450522386, 0, 0, 0, 606077476, 0, 0, 0, 1212154952, 0, 0, 0, + 2424309905, 0, 0, 0, 553652514, 0, 0, 0, 767490916, 0, 0, 0, 1534981833, 0, 0, 0, + 3069963666, 0, 0, 0, 1844960036, 0, 0, 0, 3689920072, 0, 0, 0, 3084872848, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1442838528, 0, 0, + 0, 2885677056, 0, 0, 0, 1476386816, 0, 0, 0, 2952773632, 0, 0, 0, 1610579969, 0, 0, + 0, 3221159938, 0, 0, 0, 2147352580, 0, 0, 0, 4294705160, 0, 0, 0, 4294443024, 0, 0, + 0, 4293918752, 0, 0, 0, 4292870208, 0, 0, 0, 4290773120, 0, 0, 0, 4286578944, 0, 0, + 0, 4278190592, 0, 0, 0, 4261413888, 0, 0, 0, 4227860480, 0, 0, 0, 4160753664, 0, 0, + 0, 4026540033, 0, 0, 0, 3758112771, 0, 0, 0, 3221258246, 0, 0, 0, 2147549196, 0, 0, + 0, 131096, 0, 0, 0, 262192, 0, 0, 0, 524384, 0, 0, 0, 1048768, 0, 0, + 0, 2097536, 0, 0, 0, 4195072, 0, 0, 0, 1434451456, 0, 0, 0, 2868902912, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2164392972, 0, 0, 0, 33818648, 0, 0, 0, 67637296, 0, 0, 0, 135274592, 0, + 0, 0, 270549185, 0, 0, 0, 541098371, 0, 0, 0, 1082196742, 0, 0, 0, 2164393484, 0, + 0, 0, 33819672, 0, 0, 0, 67639344, 0, 0, 0, 135278688, 0, 0, 0, 270557377, 0, + 0, 0, 541114755, 0, 0, 0, 1082229511, 0, 0, 0, 2164459022, 0, 0, 0, 2198079504, 0, + 0, 0, 101191712, 0, 0, 0, 202383424, 0, 0, 0, 404766849, 0, 0, 0, 809533698, 0, + 0, 0, 1619067396, 0, 0, 0, 3238134792, 0, 0, 0, 2181302288, 0, 0, 0, 67637280, 0, + 0, 0, 135274560, 0, 0, 0, 270549121, 0, 0, 0, 541098243, 0, 0, 0, 1082196486, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2304, + 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, + 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, 0, 0, 0, 589824, + 0, 0, 0, 1179648, 0, 0, 0, 2359296, 0, 0, 0, 4718593, 0, 0, 0, 9437186, + 0, 0, 0, 18874372, 0, 0, 0, 37748744, 0, 0, 0, 75497488, 0, 0, 0, 150994976, + 0, 0, 0, 301989953, 0, 0, 0, 603979906, 0, 0, 0, 1207959812, 0, 0, 0, 2415919624, + 0, 0, 0, 536871952, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + }, + { + 0, 0, 0, 0, 882001920, 0, 0, 0, 1764003840, 0, 0, 0, 3528007680, 0, 0, 0, + 2761048065, 0, 0, 0, 1227128834, 0, 0, 0, 2454257668, 0, 0, 0, 613548040, 0, 0, 0, + 1227096080, 0, 0, 0, 2454192160, 0, 0, 0, 613417024, 0, 0, 0, 1226834048, 0, 0, 0, + 2453668096, 0, 0, 0, 612368896, 0, 0, 0, 1224737792, 0, 0, 0, 2449475584, 0, 0, 0, + 603983872, 0, 0, 0, 1207967744, 0, 0, 0, 2415935489, 0, 0, 0, 536903682, 0, 0, 0, + 1073807364, 0, 0, 0, 2147614729, 0, 0, 0, 262162, 0, 0, 0, 524324, 0, 0, 0, + 1048648, 0, 0, 0, 2097296, 0, 0, 0, 886196512, 0, 0, 0, 1772393024, 0, 0, 0, + 3544786048, 0, 0, 0, 2794604800, 0, 0, 0, 1294242304, 0, 0, 0, 2588484608, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, + 0, 32, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, + 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, + 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, + 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, + 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, + 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, + 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483652, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, + 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, 0, 0, 262152, 0, + 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, + 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67108864, 0, + 0, 0, 134217728, 0, 0, 0, 268435457, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, + 0, 0, 2147483656, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 64, 0, + 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8390658, + 0, 0, 0, 16781316, 0, 0, 0, 33562633, 0, 0, 0, 67125266, 0, 0, 0, 134250532, + 0, 0, 0, 268501064, 0, 0, 0, 537002128, 0, 0, 0, 1074004256, 0, 0, 0, 2148008512, + 0, 0, 0, 1049728, 0, 0, 0, 2099456, 0, 0, 0, 4198913, 0, 0, 0, 8397826, + 0, 0, 0, 16795652, 0, 0, 0, 33591305, 0, 0, 0, 67182610, 0, 0, 0, 134365220, + 0, 0, 0, 268730440, 0, 0, 0, 537460880, 0, 0, 0, 1074921760, 0, 0, 0, 2149843520, + 0, 0, 0, 4719745, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195329, + }, + { + 0, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, + 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, + 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, + 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, + 33554433, 0, 0, 0, 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, + 536870928, 0, 0, 0, 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, + 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 131092, 0, 0, 0, 262184, 0, 0, 0, 524368, 0, 0, 0, 1048736, 0, + 0, 0, 2097472, 0, 0, 0, 4194944, 0, 0, 0, 8389888, 0, 0, 0, 16779776, 0, + 0, 0, 33559552, 0, 0, 0, 67119104, 0, 0, 0, 134238208, 0, 0, 0, 268476416, 0, + 0, 0, 536952832, 0, 0, 0, 1073905665, 0, 0, 0, 2147811330, 0, 0, 0, 524304, 0, + 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, 0, 0, 8388864, 0, + 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67110912, 0, 0, 0, 134221824, 0, + 0, 0, 268443649, 0, 0, 0, 536887298, 0, 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144703526, + 0, 0, 0, 289407053, 0, 0, 0, 578814107, 0, 0, 0, 1157628214, 0, 0, 0, 2315256429, + 0, 0, 0, 335545562, 0, 0, 0, 671091124, 0, 0, 0, 1342182248, 0, 0, 0, 2684364496, + 0, 0, 0, 1073761696, 0, 0, 0, 2147523392, 0, 0, 0, 79488, 0, 0, 0, 158976, + 0, 0, 0, 317952, 0, 0, 0, 635904, 0, 0, 0, 1271808, 0, 0, 0, 2543616, + 0, 0, 0, 5087233, 0, 0, 0, 10174466, 0, 0, 0, 20348932, 0, 0, 0, 40697864, + 0, 0, 0, 81395729, 0, 0, 0, 18087940, 0, 0, 0, 36175881, 0, 0, 0, 72351763, + }, + { + 0, 0, 0, 0, 4160, 0, 0, 0, 8320, 0, 0, 0, 16640, 0, 0, 0, + 33280, 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, + 532480, 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259840, 0, 0, 0, + 8519680, 0, 0, 0, 17039360, 0, 0, 0, 34078721, 0, 0, 0, 68157442, 0, 0, 0, + 136314884, 0, 0, 0, 272629768, 0, 0, 0, 545259536, 0, 0, 0, 1090519072, 0, 0, 0, + 2181038144, 0, 0, 0, 67108992, 0, 0, 0, 134217984, 0, 0, 0, 268435968, 0, 0, 0, + 536871936, 0, 0, 0, 1073743872, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 335546385, 0, 0, 0, 671092770, 0, 0, 0, 1342185541, 0, 0, 0, 2684371082, 0, + 0, 0, 1073774869, 0, 0, 0, 2147549738, 0, 0, 0, 132180, 0, 0, 0, 264360, 0, + 0, 0, 528720, 0, 0, 0, 1057440, 0, 0, 0, 2114880, 0, 0, 0, 4229761, 0, + 0, 0, 8459522, 0, 0, 0, 16919044, 0, 0, 0, 33838088, 0, 0, 0, 269000704, 0, + 0, 0, 538001408, 0, 0, 0, 1076002817, 0, 0, 0, 2152005634, 0, 0, 0, 9043972, 0, + 0, 0, 18087944, 0, 0, 0, 36175888, 0, 0, 0, 72351776, 0, 0, 0, 144703552, 0, + 0, 0, 289407105, 0, 0, 0, 578814210, 0, 0, 0, 1157628420, 0, 0, 0, 2315256840, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135049252, + 0, 0, 0, 270098504, 0, 0, 0, 540197008, 0, 0, 0, 1080394017, 0, 0, 0, 2160788035, + 0, 0, 0, 26608774, 0, 0, 0, 53217549, 0, 0, 0, 106435098, 0, 0, 0, 212870196, + 0, 0, 0, 425740393, 0, 0, 0, 851480786, 0, 0, 0, 1702961572, 0, 0, 0, 3405923145, + 0, 0, 0, 2516878995, 0, 0, 0, 738790694, 0, 0, 0, 1477581388, 0, 0, 0, 2955162776, + 0, 0, 0, 1615358257, 0, 0, 0, 3230716514, 0, 0, 0, 2166465732, 0, 0, 0, 37964168, + 0, 0, 0, 75928336, 0, 0, 0, 16881156, 0, 0, 0, 33762313, 0, 0, 0, 67524626, + }, + { + 0, 0, 0, 0, 3162112, 0, 0, 0, 6324224, 0, 0, 0, 12648448, 0, 0, 0, + 25296896, 0, 0, 0, 50593793, 0, 0, 0, 101187587, 0, 0, 0, 202375174, 0, 0, 0, + 404750348, 0, 0, 0, 809500696, 0, 0, 0, 1619001392, 0, 0, 0, 3238002785, 0, 0, 0, + 2181038274, 0, 0, 0, 67109252, 0, 0, 0, 134218504, 0, 0, 0, 268437008, 0, 0, 0, + 536874016, 0, 0, 0, 1073748032, 0, 0, 0, 2147496065, 0, 0, 0, 24834, 0, 0, 0, + 49668, 0, 0, 0, 99336, 0, 0, 0, 198672, 0, 0, 0, 397344, 0, 0, 0, + 794688, 0, 0, 0, 1589376, 0, 0, 0, 49408, 0, 0, 0, 98816, 0, 0, 0, + 197632, 0, 0, 0, 395264, 0, 0, 0, 790528, 0, 0, 0, 1581056, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8704, 0, 0, + 0, 17408, 0, 0, 0, 34816, 0, 0, 0, 69632, 0, 0, 0, 139264, 0, 0, + 0, 278528, 0, 0, 0, 557056, 0, 0, 0, 1114112, 0, 0, 0, 2228224, 0, 0, + 0, 4456448, 0, 0, 0, 8912896, 0, 0, 0, 17825792, 0, 0, 0, 35651584, 0, 0, + 0, 71303168, 0, 0, 0, 142606337, 0, 0, 0, 285212674, 0, 0, 0, 570425349, 0, 0, + 0, 1140850698, 0, 0, 0, 2281701397, 0, 0, 0, 268435498, 0, 0, 0, 536870997, 0, 0, + 0, 1073741994, 0, 0, 0, 2147483988, 0, 0, 0, 680, 0, 0, 0, 1360, 0, 0, + 0, 2720, 0, 0, 0, 5440, 0, 0, 0, 2176, 0, 0, 0, 4352, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 539164932, 0, 0, 0, 1078329864, 0, 0, 0, 2156659728, 0, 0, 0, 18352160, 0, + 0, 0, 36704320, 0, 0, 0, 73408640, 0, 0, 0, 146817280, 0, 0, 0, 293634560, 0, + 0, 0, 587269120, 0, 0, 0, 1174538240, 0, 0, 0, 2349076480, 0, 0, 0, 403185665, 0, + 0, 0, 806371331, 0, 0, 0, 1612742663, 0, 0, 0, 3225485326, 0, 0, 0, 2694906136, 0, + 0, 0, 1094844976, 0, 0, 0, 2189689952, 0, 0, 0, 84412608, 0, 0, 0, 168825216, 0, + 0, 0, 337650433, 0, 0, 0, 675300866, 0, 0, 0, 1350601732, 0, 0, 0, 2701203464, 0, + 0, 0, 1107439632, 0, 0, 0, 2214879264, 0, 0, 0, 134791233, 0, 0, 0, 269582466, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3961969350, + 0, 0, 0, 3628971405, 0, 0, 0, 2962975514, 0, 0, 0, 1630983732, 0, 0, 0, 3261967464, + 0, 0, 0, 2228967633, 0, 0, 0, 162967970, 0, 0, 0, 325935940, 0, 0, 0, 651871880, + 0, 0, 0, 1303743760, 0, 0, 0, 2607487520, 0, 0, 0, 920007744, 0, 0, 0, 1840015488, + 0, 0, 0, 3680030976, 0, 0, 0, 3065094657, 0, 0, 0, 1835222019, 0, 0, 0, 3670444038, + 0, 0, 0, 3045920780, 0, 0, 0, 1796874265, 0, 0, 0, 3593748531, 0, 0, 0, 2892529767, + 0, 0, 0, 1490092239, 0, 0, 0, 1568987992, 0, 0, 0, 3137975985, 0, 0, 0, 1980984675, + }, + { + 0, 0, 0, 0, 2554888269, 0, 0, 0, 814809242, 0, 0, 0, 1629618484, 0, 0, 0, + 3259236968, 0, 0, 0, 2223506641, 0, 0, 0, 152045986, 0, 0, 0, 304091973, 0, 0, 0, + 608183946, 0, 0, 0, 1216367892, 0, 0, 0, 2432735785, 0, 0, 0, 570504275, 0, 0, 0, + 1141008550, 0, 0, 0, 2282017101, 0, 0, 0, 269066906, 0, 0, 0, 538133812, 0, 0, 0, + 1076267624, 0, 0, 0, 2152535249, 0, 0, 0, 10103202, 0, 0, 0, 20206404, 0, 0, 0, + 40412809, 0, 0, 0, 80825618, 0, 0, 0, 161651236, 0, 0, 0, 323302473, 0, 0, 0, + 646604947, 0, 0, 0, 1293209894, 0, 0, 0, 39920129, 0, 0, 0, 79840258, 0, 0, 0, + 159680516, 0, 0, 0, 319361033, 0, 0, 0, 638722067, 0, 0, 0, 1277444134, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 134299649, 0, 0, + 0, 268599298, 0, 0, 0, 537198597, 0, 0, 0, 1074397194, 0, 0, 0, 2148794388, 0, 0, + 0, 2621480, 0, 0, 0, 5242960, 0, 0, 0, 10485920, 0, 0, 0, 20971840, 0, 0, + 0, 41943680, 0, 0, 0, 83887360, 0, 0, 0, 167774721, 0, 0, 0, 335549442, 0, 0, + 0, 671098884, 0, 0, 0, 1342197768, 0, 0, 0, 2684395537, 0, 0, 0, 1073823778, 0, 0, + 0, 2147647556, 0, 0, 0, 327816, 0, 0, 0, 655632, 0, 0, 0, 1311264, 0, 0, + 0, 2622528, 0, 0, 0, 5245056, 0, 0, 0, 10490112, 0, 0, 0, 20980224, 0, 0, + 0, 41960448, 0, 0, 0, 83920896, 0, 0, 0, 33574912, 0, 0, 0, 67149824, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 490817699, 0, 0, 0, 981635398, 0, 0, 0, 1963270797, 0, 0, 0, 3926541594, 0, + 0, 0, 3558115892, 0, 0, 0, 2821264488, 0, 0, 0, 1347561681, 0, 0, 0, 2695123362, 0, + 0, 0, 1095279429, 0, 0, 0, 2190558858, 0, 0, 0, 86150421, 0, 0, 0, 172300842, 0, + 0, 0, 344601685, 0, 0, 0, 689203370, 0, 0, 0, 1378406740, 0, 0, 0, 3104893450, 0, + 0, 0, 1914819604, 0, 0, 0, 3829639209, 0, 0, 0, 3364311122, 0, 0, 0, 2433654948, 0, + 0, 0, 572342600, 0, 0, 0, 1144685201, 0, 0, 0, 2289370402, 0, 0, 0, 283773509, 0, + 0, 0, 567547018, 0, 0, 0, 1135094036, 0, 0, 0, 2270188072, 0, 0, 0, 245408849, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3548126500, + 0, 0, 0, 2801285704, 0, 0, 0, 1307604113, 0, 0, 0, 2615208226, 0, 0, 0, 935449156, + 0, 0, 0, 1870898313, 0, 0, 0, 3741796627, 0, 0, 0, 3188625959, 0, 0, 0, 2082284622, + 0, 0, 0, 4164569244, 0, 0, 0, 4034171193, 0, 0, 0, 3773375091, 0, 0, 0, 3251782887, + 0, 0, 0, 2208598479, 0, 0, 0, 122229662, 0, 0, 0, 244459325, 0, 0, 0, 488918650, + 0, 0, 0, 977837300, 0, 0, 0, 1955674600, 0, 0, 0, 3911349200, 0, 0, 0, 3527731104, + 0, 0, 0, 2760494912, 0, 0, 0, 2590999460, 0, 0, 0, 887031625, 0, 0, 0, 1774063250, + }, + { + 0, 0, 0, 0, 1164254896, 0, 0, 0, 2328509792, 0, 0, 0, 362052288, 0, 0, 0, + 724104577, 0, 0, 0, 1448209155, 0, 0, 0, 2896418311, 0, 0, 0, 1497869326, 0, 0, 0, + 2995738652, 0, 0, 0, 1696510008, 0, 0, 0, 3393020016, 0, 0, 0, 2491072737, 0, 0, 0, + 687178178, 0, 0, 0, 1374356356, 0, 0, 0, 2748712712, 0, 0, 0, 1202458129, 0, 0, 0, + 2404916258, 0, 0, 0, 514865221, 0, 0, 0, 1029730442, 0, 0, 0, 2059460885, 0, 0, 0, + 4118921771, 0, 0, 0, 3942876246, 0, 0, 0, 3590785196, 0, 0, 0, 2886603097, 0, 0, 0, + 1478238898, 0, 0, 0, 2956477797, 0, 0, 0, 622171258, 0, 0, 0, 1244342517, 0, 0, 0, + 2488685035, 0, 0, 0, 682402774, 0, 0, 0, 1364805548, 0, 0, 0, 2729611096, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336678914, 0, 0, + 0, 673357828, 0, 0, 0, 1346715656, 0, 0, 0, 2693431313, 0, 0, 0, 1091895330, 0, 0, + 0, 2183790660, 0, 0, 0, 72614024, 0, 0, 0, 145228049, 0, 0, 0, 290456098, 0, 0, + 0, 580912197, 0, 0, 0, 1161824394, 0, 0, 0, 2323648789, 0, 0, 0, 352330282, 0, 0, + 0, 704660564, 0, 0, 0, 1409321128, 0, 0, 0, 2818642256, 0, 0, 0, 1342317216, 0, 0, + 0, 2684634433, 0, 0, 0, 1074301570, 0, 0, 0, 2148603140, 0, 0, 0, 2238984, 0, 0, + 0, 4477968, 0, 0, 0, 8955936, 0, 0, 0, 17911872, 0, 0, 0, 35823744, 0, 0, + 0, 71647488, 0, 0, 0, 143294977, 0, 0, 0, 84169728, 0, 0, 0, 168339457, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2541187255, 0, 0, 0, 787407215, 0, 0, 0, 1574814430, 0, 0, 0, 3149628860, 0, + 0, 0, 2004290425, 0, 0, 0, 4008580850, 0, 0, 0, 3722194405, 0, 0, 0, 3149421515, 0, + 0, 0, 2003875734, 0, 0, 0, 4007751468, 0, 0, 0, 3720535640, 0, 0, 0, 3146103984, 0, + 0, 0, 1997240673, 0, 0, 0, 3994481347, 0, 0, 0, 3693995398, 0, 0, 0, 791460795, 0, + 0, 0, 1582921591, 0, 0, 0, 3165843182, 0, 0, 0, 2036719068, 0, 0, 0, 4073438136, 0, + 0, 0, 3851908976, 0, 0, 0, 3408850657, 0, 0, 0, 2522734018, 0, 0, 0, 750500741, 0, + 0, 0, 1501001483, 0, 0, 0, 3002002966, 0, 0, 0, 1709038637, 0, 0, 0, 3418077275, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357270623, + 0, 0, 0, 714541247, 0, 0, 0, 1429082494, 0, 0, 0, 2858164988, 0, 0, 0, 1421362680, + 0, 0, 0, 2842725361, 0, 0, 0, 1390483426, 0, 0, 0, 2780966853, 0, 0, 0, 1266966411, + 0, 0, 0, 2533932823, 0, 0, 0, 772898351, 0, 0, 0, 1545796702, 0, 0, 0, 3091593405, + 0, 0, 0, 1888219514, 0, 0, 0, 3776439028, 0, 0, 0, 3257910761, 0, 0, 0, 2220854227, + 0, 0, 0, 146741158, 0, 0, 0, 293482317, 0, 0, 0, 586964634, 0, 0, 0, 1173929269, + 0, 0, 0, 2347858538, 0, 0, 0, 44658827, 0, 0, 0, 89317655, 0, 0, 0, 178635311, + }, + { + 0, 0, 0, 0, 126298325, 0, 0, 0, 252596651, 0, 0, 0, 505193303, 0, 0, 0, + 1010386606, 0, 0, 0, 2020773212, 0, 0, 0, 4041546425, 0, 0, 0, 3788125555, 0, 0, 0, + 3281283814, 0, 0, 0, 2267600332, 0, 0, 0, 240233369, 0, 0, 0, 480466738, 0, 0, 0, + 960933476, 0, 0, 0, 1921866953, 0, 0, 0, 3843733907, 0, 0, 0, 3392500518, 0, 0, 0, + 2490033741, 0, 0, 0, 685100186, 0, 0, 0, 1370200372, 0, 0, 0, 2740400744, 0, 0, 0, + 1185834193, 0, 0, 0, 2371668387, 0, 0, 0, 448369479, 0, 0, 0, 896738958, 0, 0, 0, + 1793477917, 0, 0, 0, 3586955835, 0, 0, 0, 2887654563, 0, 0, 0, 1480341830, 0, 0, 0, + 2960683661, 0, 0, 0, 1626400026, 0, 0, 0, 3252800053, 0, 0, 0, 2210632810, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339960066, 0, 0, + 0, 679920132, 0, 0, 0, 1359840264, 0, 0, 0, 2719680529, 0, 0, 0, 1144393762, 0, 0, + 0, 2288787525, 0, 0, 0, 282607754, 0, 0, 0, 565215509, 0, 0, 0, 1130431018, 0, 0, + 0, 2260862036, 0, 0, 0, 226756777, 0, 0, 0, 453513555, 0, 0, 0, 907027111, 0, 0, + 0, 1814054222, 0, 0, 0, 3628108445, 0, 0, 0, 2961249595, 0, 0, 0, 1627531895, 0, 0, + 0, 3255063790, 0, 0, 0, 2215160284, 0, 0, 0, 135353273, 0, 0, 0, 270706546, 0, 0, + 0, 541413093, 0, 0, 0, 1082826186, 0, 0, 0, 2165652372, 0, 0, 0, 36337448, 0, 0, + 0, 72674896, 0, 0, 0, 145349793, 0, 0, 0, 84990016, 0, 0, 0, 169980033, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2614536692, 0, 0, 0, 934106089, 0, 0, 0, 1868212179, 0, 0, 0, 3736424359, 0, + 0, 0, 3177881422, 0, 0, 0, 2060795549, 0, 0, 0, 4121591099, 0, 0, 0, 3948214903, 0, + 0, 0, 3601462510, 0, 0, 0, 2907957725, 0, 0, 0, 1520948154, 0, 0, 0, 3041896308, 0, + 0, 0, 1788825320, 0, 0, 0, 3577650640, 0, 0, 0, 2860333984, 0, 0, 0, 3475824309, 0, + 0, 0, 2656681322, 0, 0, 0, 1018395349, 0, 0, 0, 2036790698, 0, 0, 0, 4073581396, 0, + 0, 0, 3852195497, 0, 0, 0, 3409423699, 0, 0, 0, 2523880103, 0, 0, 0, 752792911, 0, + 0, 0, 1505585823, 0, 0, 0, 3011171646, 0, 0, 0, 1727375997, 0, 0, 0, 3454751994, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3265725291, + 0, 0, 0, 2236483287, 0, 0, 0, 177999279, 0, 0, 0, 355998558, 0, 0, 0, 711997116, + 0, 0, 0, 1423994233, 0, 0, 0, 2847988467, 0, 0, 0, 1401009639, 0, 0, 0, 2802019279, + 0, 0, 0, 1309071263, 0, 0, 0, 2618142526, 0, 0, 0, 941317756, 0, 0, 0, 1882635512, + 0, 0, 0, 3765271025, 0, 0, 0, 3235574755, 0, 0, 0, 2176182214, 0, 0, 0, 57397132, + 0, 0, 0, 114794264, 0, 0, 0, 229588528, 0, 0, 0, 459177056, 0, 0, 0, 918354113, + 0, 0, 0, 1836708227, 0, 0, 0, 408215661, 0, 0, 0, 816431322, 0, 0, 0, 1632862645, + }, + { + 0, 0, 0, 0, 3480795388, 0, 0, 0, 2666623480, 0, 0, 0, 1038279664, 0, 0, 0, + 2076559329, 0, 0, 0, 4153118658, 0, 0, 0, 4011270020, 0, 0, 0, 3727572744, 0, 0, 0, + 3160178193, 0, 0, 0, 2025389090, 0, 0, 0, 4050778181, 0, 0, 0, 3806589066, 0, 0, 0, + 3318210837, 0, 0, 0, 2341454378, 0, 0, 0, 387941461, 0, 0, 0, 775882923, 0, 0, 0, + 1551765846, 0, 0, 0, 3103531693, 0, 0, 0, 1912096090, 0, 0, 0, 3824192180, 0, 0, 0, + 3353417064, 0, 0, 0, 2411866832, 0, 0, 0, 528766369, 0, 0, 0, 1057532739, 0, 0, 0, + 2115065479, 0, 0, 0, 4230130959, 0, 0, 0, 926802659, 0, 0, 0, 1853605319, 0, 0, 0, + 3707210639, 0, 0, 0, 3119453983, 0, 0, 0, 1943940671, 0, 0, 0, 3887881342, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1457526048, 0, 0, + 0, 2915052096, 0, 0, 0, 1535136897, 0, 0, 0, 3070273795, 0, 0, 0, 1845580294, 0, 0, + 0, 3691160589, 0, 0, 0, 3087353882, 0, 0, 0, 1879740469, 0, 0, 0, 3759480939, 0, 0, + 0, 3223994582, 0, 0, 0, 2153021868, 0, 0, 0, 11076440, 0, 0, 0, 22152880, 0, 0, + 0, 44305760, 0, 0, 0, 88611520, 0, 0, 0, 177223041, 0, 0, 0, 354446082, 0, 0, + 0, 708892164, 0, 0, 0, 1417784328, 0, 0, 0, 2835568656, 0, 0, 0, 1376170016, 0, 0, + 0, 2752340033, 0, 0, 0, 1209712771, 0, 0, 0, 2419425542, 0, 0, 0, 543883789, 0, 0, + 0, 1087767578, 0, 0, 0, 2175535156, 0, 0, 0, 1438123336, 0, 0, 0, 2876246672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2983242506, 0, 0, 0, 1671517716, 0, 0, 0, 3343035433, 0, 0, 0, 2391103570, 0, + 0, 0, 487239844, 0, 0, 0, 974479689, 0, 0, 0, 1948959378, 0, 0, 0, 3897918757, 0, + 0, 0, 3500870219, 0, 0, 0, 2706773142, 0, 0, 0, 1118578988, 0, 0, 0, 2237157976, 0, + 0, 0, 179348657, 0, 0, 0, 358697315, 0, 0, 0, 717394631, 0, 0, 0, 3830812293, 0, + 0, 0, 3366657290, 0, 0, 0, 2438347285, 0, 0, 0, 581727274, 0, 0, 0, 1163454549, 0, + 0, 0, 2326909099, 0, 0, 0, 358850902, 0, 0, 0, 717701804, 0, 0, 0, 1435403608, 0, + 0, 0, 2870807216, 0, 0, 0, 1446647137, 0, 0, 0, 2893294274, 0, 0, 0, 1491621253, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3612118070, + 0, 0, 0, 2929268845, 0, 0, 0, 1563570394, 0, 0, 0, 3127140788, 0, 0, 0, 1959314281, + 0, 0, 0, 3918628562, 0, 0, 0, 3542289829, 0, 0, 0, 2789612362, 0, 0, 0, 1284257428, + 0, 0, 0, 2568514856, 0, 0, 0, 842062417, 0, 0, 0, 1684124835, 0, 0, 0, 3368249671, + 0, 0, 0, 2441532046, 0, 0, 0, 588096797, 0, 0, 0, 1176193595, 0, 0, 0, 2352387190, + 0, 0, 0, 409807085, 0, 0, 0, 819614171, 0, 0, 0, 1639228342, 0, 0, 0, 3278456684, + 0, 0, 0, 2261946072, 0, 0, 0, 3672740230, 0, 0, 0, 3050513165, 0, 0, 0, 1806059035, + }, + { + 0, 0, 0, 0, 460238757, 0, 0, 0, 920477515, 0, 0, 0, 1840955030, 0, 0, 0, + 3681910060, 0, 0, 0, 3068852824, 0, 0, 0, 1842738352, 0, 0, 0, 3685476704, 0, 0, 0, + 3075986112, 0, 0, 0, 1857004929, 0, 0, 0, 3714009859, 0, 0, 0, 3133052422, 0, 0, 0, + 1971137548, 0, 0, 0, 3942275096, 0, 0, 0, 3589582897, 0, 0, 0, 2884198498, 0, 0, 0, + 1473429701, 0, 0, 0, 2946859402, 0, 0, 0, 1598751509, 0, 0, 0, 3197503018, 0, 0, 0, + 2100038740, 0, 0, 0, 4200077480, 0, 0, 0, 4105187665, 0, 0, 0, 3915408035, 0, 0, 0, + 3535848774, 0, 0, 0, 2776730253, 0, 0, 0, 1349368510, 0, 0, 0, 2698737021, 0, 0, 0, + 1102506746, 0, 0, 0, 2205013492, 0, 0, 0, 115059689, 0, 0, 0, 230119378, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4124527681, 0, 0, + 0, 3954088066, 0, 0, 0, 3613208836, 0, 0, 0, 2931450376, 0, 0, 0, 1567933457, 0, 0, + 0, 3135866914, 0, 0, 0, 1976766533, 0, 0, 0, 3953533066, 0, 0, 0, 3612098836, 0, 0, + 0, 2929230376, 0, 0, 0, 1563493457, 0, 0, 0, 3126986914, 0, 0, 0, 1959006533, 0, 0, + 0, 3918013066, 0, 0, 0, 3541058836, 0, 0, 0, 2787150377, 0, 0, 0, 1279333459, 0, 0, + 0, 2558666919, 0, 0, 0, 822366543, 0, 0, 0, 1644733087, 0, 0, 0, 3289466174, 0, 0, + 0, 2283965053, 0, 0, 0, 272962810, 0, 0, 0, 545925621, 0, 0, 0, 1091851242, 0, 0, + 0, 2183702484, 0, 0, 0, 72437672, 0, 0, 0, 4252357392, 0, 0, 0, 4209747488, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2797805596, 0, 0, 0, 1300643896, 0, 0, 0, 2601287792, 0, 0, 0, 907608289, 0, + 0, 0, 1815216578, 0, 0, 0, 3630433157, 0, 0, 0, 2965899019, 0, 0, 0, 1636830742, 0, + 0, 0, 3273661484, 0, 0, 0, 2252355672, 0, 0, 0, 209744048, 0, 0, 0, 419488096, 0, + 0, 0, 838976192, 0, 0, 0, 1677952385, 0, 0, 0, 3355904770, 0, 0, 0, 919418393, 0, + 0, 0, 1838836786, 0, 0, 0, 3677673572, 0, 0, 0, 3060379848, 0, 0, 0, 1825792400, 0, + 0, 0, 3651584800, 0, 0, 0, 3008202304, 0, 0, 0, 1721437312, 0, 0, 0, 3442874624, 0, + 0, 0, 2590781953, 0, 0, 0, 886596611, 0, 0, 0, 1773193223, 0, 0, 0, 3546386446, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 490940283, + 0, 0, 0, 981880567, 0, 0, 0, 1963761134, 0, 0, 0, 3927522269, 0, 0, 0, 3560077242, + 0, 0, 0, 2825187189, 0, 0, 0, 1355407083, 0, 0, 0, 2710814166, 0, 0, 0, 1126661037, + 0, 0, 0, 2253322074, 0, 0, 0, 211676852, 0, 0, 0, 423353704, 0, 0, 0, 846707408, + 0, 0, 0, 1693414817, 0, 0, 0, 3386829635, 0, 0, 0, 2478691975, 0, 0, 0, 662416654, + 0, 0, 0, 1324833308, 0, 0, 0, 2649666617, 0, 0, 0, 1004365938, 0, 0, 0, 2008731877, + 0, 0, 0, 4017463754, 0, 0, 0, 3282593007, 0, 0, 0, 2270218718, 0, 0, 0, 245470141, + }, + { + 0, 0, 0, 0, 1980210557, 0, 0, 0, 3960421115, 0, 0, 0, 3625874935, 0, 0, 0, + 2956782575, 0, 0, 0, 1618597854, 0, 0, 0, 3237195709, 0, 0, 0, 2179424123, 0, 0, 0, + 63880951, 0, 0, 0, 127761903, 0, 0, 0, 255523807, 0, 0, 0, 511047615, 0, 0, 0, + 1022095230, 0, 0, 0, 2044190460, 0, 0, 0, 4088380920, 0, 0, 0, 3881794544, 0, 0, 0, + 3468621792, 0, 0, 0, 2642276289, 0, 0, 0, 989585283, 0, 0, 0, 1979170566, 0, 0, 0, + 3958341132, 0, 0, 0, 3621714968, 0, 0, 0, 2948462640, 0, 0, 0, 1601957985, 0, 0, 0, + 3203915970, 0, 0, 0, 2112864644, 0, 0, 0, 2379751029, 0, 0, 0, 464534763, 0, 0, 0, + 929069527, 0, 0, 0, 1858139055, 0, 0, 0, 3716278111, 0, 0, 0, 3137588926, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3560175108, 0, 0, + 0, 2825382920, 0, 0, 0, 1355798544, 0, 0, 0, 2711597089, 0, 0, 0, 1128226882, 0, 0, + 0, 2256453764, 0, 0, 0, 217940233, 0, 0, 0, 435880467, 0, 0, 0, 871760935, 0, 0, + 0, 1743521871, 0, 0, 0, 3487043743, 0, 0, 0, 2679120191, 0, 0, 0, 1063273086, 0, 0, + 0, 2126546172, 0, 0, 0, 4253092344, 0, 0, 0, 4211217392, 0, 0, 0, 4127467489, 0, 0, + 0, 3959967682, 0, 0, 0, 3624968069, 0, 0, 0, 2954968843, 0, 0, 0, 1614970391, 0, 0, + 0, 3229940782, 0, 0, 0, 2164914268, 0, 0, 0, 34861240, 0, 0, 0, 69722480, 0, 0, + 0, 139444961, 0, 0, 0, 278889922, 0, 0, 0, 4111269249, 0, 0, 0, 3927571202, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3324387232, 0, 0, 0, 2353807168, 0, 0, 0, 412647041, 0, 0, 0, 825294082, 0, + 0, 0, 1650588165, 0, 0, 0, 3301176331, 0, 0, 0, 2307385367, 0, 0, 0, 319803438, 0, + 0, 0, 639606877, 0, 0, 0, 1279213754, 0, 0, 0, 2558427508, 0, 0, 0, 821887721, 0, + 0, 0, 1643775442, 0, 0, 0, 3287550884, 0, 0, 0, 2280134472, 0, 0, 0, 3388354864, 0, + 0, 0, 2481742433, 0, 0, 0, 668517571, 0, 0, 0, 1337035143, 0, 0, 0, 2674070287, 0, + 0, 0, 1053173279, 0, 0, 0, 2106346559, 0, 0, 0, 4212693118, 0, 0, 0, 4130418941, 0, + 0, 0, 3965870586, 0, 0, 0, 3636773876, 0, 0, 0, 2978580456, 0, 0, 0, 1662193616, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42029323, + 0, 0, 0, 84058646, 0, 0, 0, 168117293, 0, 0, 0, 336234586, 0, 0, 0, 672469172, + 0, 0, 0, 1344938344, 0, 0, 0, 2689876689, 0, 0, 0, 1084786082, 0, 0, 0, 2169572165, + 0, 0, 0, 44177035, 0, 0, 0, 88354071, 0, 0, 0, 176708143, 0, 0, 0, 353416286, + 0, 0, 0, 706832573, 0, 0, 0, 1413665147, 0, 0, 0, 2827330294, 0, 0, 0, 1359693292, + 0, 0, 0, 2719386585, 0, 0, 0, 1143805874, 0, 0, 0, 2287611749, 0, 0, 0, 280256202, + 0, 0, 0, 560512405, 0, 0, 0, 1078995489, 0, 0, 0, 2157990978, 0, 0, 0, 21014661, + }, + { + 0, 0, 0, 0, 893107048, 0, 0, 0, 1786214097, 0, 0, 0, 3572428195, 0, 0, 0, + 2849889095, 0, 0, 0, 1404810895, 0, 0, 0, 2809621790, 0, 0, 0, 1324276285, 0, 0, 0, + 2648552571, 0, 0, 0, 1002137847, 0, 0, 0, 2004275695, 0, 0, 0, 4008551390, 0, 0, 0, + 3722135485, 0, 0, 0, 3149303674, 0, 0, 0, 2003640053, 0, 0, 0, 4007280106, 0, 0, 0, + 3719592917, 0, 0, 0, 3144218538, 0, 0, 0, 1993469781, 0, 0, 0, 3986939563, 0, 0, 0, + 3678911830, 0, 0, 0, 3062856364, 0, 0, 0, 1830745432, 0, 0, 0, 3661490864, 0, 0, 0, + 3028014433, 0, 0, 0, 1761061570, 0, 0, 0, 3839160045, 0, 0, 0, 3383352795, 0, 0, 0, + 2471738294, 0, 0, 0, 648509293, 0, 0, 0, 1297018586, 0, 0, 0, 2594037172, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3625984309, 0, 0, + 0, 2957001323, 0, 0, 0, 1619035351, 0, 0, 0, 3238070702, 0, 0, 0, 2181174108, 0, 0, + 0, 67380920, 0, 0, 0, 134761841, 0, 0, 0, 269523682, 0, 0, 0, 539047365, 0, 0, + 0, 1078094730, 0, 0, 0, 2156189460, 0, 0, 0, 17411624, 0, 0, 0, 34823248, 0, 0, + 0, 69646496, 0, 0, 0, 139292993, 0, 0, 0, 278585986, 0, 0, 0, 557171973, 0, 0, + 0, 1114343946, 0, 0, 0, 2228687892, 0, 0, 0, 162408489, 0, 0, 0, 324816978, 0, 0, + 0, 649633957, 0, 0, 0, 1299267915, 0, 0, 0, 2598535831, 0, 0, 0, 902104367, 0, 0, + 0, 1804208734, 0, 0, 0, 3608417468, 0, 0, 0, 1980237901, 0, 0, 0, 3960475802, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 470870896, 0, 0, 0, 941741792, 0, 0, 0, 1883483584, 0, 0, 0, 3766967168, 0, + 0, 0, 3238967041, 0, 0, 0, 2182966786, 0, 0, 0, 70966277, 0, 0, 0, 141932555, 0, + 0, 0, 283865111, 0, 0, 0, 567730223, 0, 0, 0, 1135460447, 0, 0, 0, 2270920895, 0, + 0, 0, 246874494, 0, 0, 0, 493748989, 0, 0, 0, 987497979, 0, 0, 0, 1772675207, 0, + 0, 0, 3545350414, 0, 0, 0, 2795733533, 0, 0, 0, 1296499770, 0, 0, 0, 2592999541, 0, + 0, 0, 891031787, 0, 0, 0, 1782063574, 0, 0, 0, 3564127149, 0, 0, 0, 2833287003, 0, + 0, 0, 1371606711, 0, 0, 0, 2743213422, 0, 0, 0, 1191459548, 0, 0, 0, 2382919096, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 537135248, + 0, 0, 0, 1074270496, 0, 0, 0, 2148540992, 0, 0, 0, 2114688, 0, 0, 0, 4229377, + 0, 0, 0, 8458754, 0, 0, 0, 16917508, 0, 0, 0, 33835017, 0, 0, 0, 67670034, + 0, 0, 0, 135340068, 0, 0, 0, 270680136, 0, 0, 0, 541360273, 0, 0, 0, 1082720546, + 0, 0, 0, 2165441092, 0, 0, 0, 35914889, 0, 0, 0, 71829779, 0, 0, 0, 143659558, + 0, 0, 0, 287319116, 0, 0, 0, 574638232, 0, 0, 0, 1149276464, 0, 0, 0, 2298552928, + 0, 0, 0, 302138561, 0, 0, 0, 67141906, 0, 0, 0, 134283812, 0, 0, 0, 268567624, + }, + { + 0, 0, 0, 0, 2113541784, 0, 0, 0, 4227083568, 0, 0, 0, 4159199840, 0, 0, 0, + 4023432384, 0, 0, 0, 3751897472, 0, 0, 0, 3208827648, 0, 0, 0, 2122688001, 0, 0, 0, + 4245376003, 0, 0, 0, 4195784710, 0, 0, 0, 4096602125, 0, 0, 0, 3898236955, 0, 0, 0, + 3501506615, 0, 0, 0, 2708045935, 0, 0, 0, 1121124575, 0, 0, 0, 2242249151, 0, 0, 0, + 189531007, 0, 0, 0, 379062015, 0, 0, 0, 758124030, 0, 0, 0, 1516248061, 0, 0, 0, + 3032496123, 0, 0, 0, 1770024950, 0, 0, 0, 3540049900, 0, 0, 0, 2785132504, 0, 0, 0, + 1275297712, 0, 0, 0, 2550595425, 0, 0, 0, 1308092506, 0, 0, 0, 2616185012, 0, 0, 0, + 937402729, 0, 0, 0, 1874805459, 0, 0, 0, 3749610918, 0, 0, 0, 3204254540, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99817600, 0, 0, + 0, 199635201, 0, 0, 0, 399270402, 0, 0, 0, 798540804, 0, 0, 0, 1597081609, 0, 0, + 0, 3194163218, 0, 0, 0, 2093359140, 0, 0, 0, 4186718280, 0, 0, 0, 4078469265, 0, 0, + 0, 3861971235, 0, 0, 0, 3428975175, 0, 0, 0, 2562983055, 0, 0, 0, 830998815, 0, 0, + 0, 1661997631, 0, 0, 0, 3323995262, 0, 0, 0, 2353023229, 0, 0, 0, 411079163, 0, 0, + 0, 822158327, 0, 0, 0, 1644316655, 0, 0, 0, 3288633310, 0, 0, 0, 2282299325, 0, 0, + 0, 269631354, 0, 0, 0, 539262709, 0, 0, 0, 1078525418, 0, 0, 0, 2157050836, 0, 0, + 0, 19134376, 0, 0, 0, 38268752, 0, 0, 0, 24954400, 0, 0, 0, 49908800, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 27673132, 0, 0, 0, 55346265, 0, 0, 0, 110692530, 0, 0, 0, 221385060, 0, + 0, 0, 442770121, 0, 0, 0, 885540243, 0, 0, 0, 1771080487, 0, 0, 0, 3542160975, 0, + 0, 0, 2789354654, 0, 0, 0, 1283742012, 0, 0, 0, 2567484024, 0, 0, 0, 840000753, 0, + 0, 0, 1680001507, 0, 0, 0, 3360003015, 0, 0, 0, 2425038735, 0, 0, 0, 548408626, 0, + 0, 0, 1096817252, 0, 0, 0, 2193634504, 0, 0, 0, 92301712, 0, 0, 0, 184603425, 0, + 0, 0, 369206850, 0, 0, 0, 738413700, 0, 0, 0, 1476827400, 0, 0, 0, 2953654801, 0, + 0, 0, 1612342306, 0, 0, 0, 3224684613, 0, 0, 0, 2154401931, 0, 0, 0, 13836566, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, + 0, 0, 0, 2097152, 0, 0, 0, 4194305, 0, 0, 0, 8388610, 0, 0, 0, 16777220, + 0, 0, 0, 33554441, 0, 0, 0, 67108882, 0, 0, 0, 134217764, 0, 0, 0, 268435528, + 0, 0, 0, 536871056, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + 0, 0, 0, 2304, 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, + 0, 0, 0, 36864, 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, + 0, 0, 0, 589824, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, + }, + { + 0, 0, 0, 0, 307855197, 0, 0, 0, 615710394, 0, 0, 0, 1231420788, 0, 0, 0, + 2462841576, 0, 0, 0, 630715856, 0, 0, 0, 1261431713, 0, 0, 0, 2522863426, 0, 0, 0, + 750759556, 0, 0, 0, 1501519112, 0, 0, 0, 3003038224, 0, 0, 0, 1711109152, 0, 0, 0, + 3422218304, 0, 0, 0, 2549469312, 0, 0, 0, 803971329, 0, 0, 0, 1607942659, 0, 0, 0, + 3215885318, 0, 0, 0, 2136803341, 0, 0, 0, 4273606682, 0, 0, 0, 4252246069, 0, 0, 0, + 4209524842, 0, 0, 0, 4124082389, 0, 0, 0, 3953197482, 0, 0, 0, 3611427668, 0, 0, 0, + 2927888040, 0, 0, 0, 1560808784, 0, 0, 0, 2823382525, 0, 0, 0, 1351797754, 0, 0, 0, + 2703595509, 0, 0, 0, 1112223723, 0, 0, 0, 2224447447, 0, 0, 0, 153927598, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1409835056, 0, 0, + 0, 2819670112, 0, 0, 0, 1344372928, 0, 0, 0, 2688745857, 0, 0, 0, 1082524418, 0, 0, + 0, 2165048836, 0, 0, 0, 35130376, 0, 0, 0, 70260752, 0, 0, 0, 140521505, 0, 0, + 0, 281043010, 0, 0, 0, 562086021, 0, 0, 0, 1124172042, 0, 0, 0, 2248344084, 0, 0, + 0, 201720873, 0, 0, 0, 403441747, 0, 0, 0, 806883495, 0, 0, 0, 1613766991, 0, 0, + 0, 3227533982, 0, 0, 0, 2160100668, 0, 0, 0, 25234040, 0, 0, 0, 50468080, 0, 0, + 0, 100936160, 0, 0, 0, 201872321, 0, 0, 0, 403744643, 0, 0, 0, 807489287, 0, 0, + 0, 1614978575, 0, 0, 0, 3229957150, 0, 0, 0, 3573684236, 0, 0, 0, 2852401176, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1749068055, 0, 0, 0, 3498136111, 0, 0, 0, 2701304927, 0, 0, 0, 1107642558, 0, + 0, 0, 2215285117, 0, 0, 0, 135602938, 0, 0, 0, 271205877, 0, 0, 0, 542411755, 0, + 0, 0, 1084823510, 0, 0, 0, 2169647020, 0, 0, 0, 44326744, 0, 0, 0, 88653489, 0, + 0, 0, 177306978, 0, 0, 0, 354613956, 0, 0, 0, 709227913, 0, 0, 0, 1019953669, 0, + 0, 0, 2039907338, 0, 0, 0, 4079814677, 0, 0, 0, 3864662058, 0, 0, 0, 3434356820, 0, + 0, 0, 2573746345, 0, 0, 0, 852525394, 0, 0, 0, 1705050788, 0, 0, 0, 3410101576, 0, + 0, 0, 2525235857, 0, 0, 0, 755504418, 0, 0, 0, 1511008837, 0, 0, 0, 3022017675, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33280, + 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, 532480, + 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259841, 0, 0, 0, 8519682, + 0, 0, 0, 17039364, 0, 0, 0, 34078729, 0, 0, 0, 68157458, 0, 0, 0, 136314916, + 0, 0, 0, 272629833, 0, 0, 0, 545259666, 0, 0, 0, 1090519332, 0, 0, 0, 2181038665, + 0, 0, 0, 67110034, 0, 0, 0, 134220068, 0, 0, 0, 268440136, 0, 0, 0, 536880272, + 0, 0, 0, 1073760544, 0, 0, 0, 2147487808, 0, 0, 0, 8320, 0, 0, 0, 16640, + }, + { + 0, 0, 0, 0, 1769338744, 0, 0, 0, 3538677488, 0, 0, 0, 2782387681, 0, 0, 0, + 1269808067, 0, 0, 0, 2539616134, 0, 0, 0, 784264973, 0, 0, 0, 1568529946, 0, 0, 0, + 3137059892, 0, 0, 0, 1979152488, 0, 0, 0, 3958304976, 0, 0, 0, 3621642656, 0, 0, 0, + 2948318016, 0, 0, 0, 1601668737, 0, 0, 0, 3203337474, 0, 0, 0, 2111707652, 0, 0, 0, + 4223415304, 0, 0, 0, 4151863312, 0, 0, 0, 4008759328, 0, 0, 0, 3722551361, 0, 0, 0, + 3150135426, 0, 0, 0, 2005303557, 0, 0, 0, 4010607114, 0, 0, 0, 3726246932, 0, 0, 0, + 3157526569, 0, 0, 0, 2020085842, 0, 0, 0, 2577782749, 0, 0, 0, 860598203, 0, 0, 0, + 1721196407, 0, 0, 0, 3442392815, 0, 0, 0, 2589818334, 0, 0, 0, 884669372, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1430273920, 0, 0, + 0, 2860547840, 0, 0, 0, 1426128384, 0, 0, 0, 2852256768, 0, 0, 0, 1409546240, 0, 0, + 0, 2819092480, 0, 0, 0, 1343217664, 0, 0, 0, 2686435329, 0, 0, 0, 1077903362, 0, 0, + 0, 2155806724, 0, 0, 0, 16646152, 0, 0, 0, 33292304, 0, 0, 0, 66584608, 0, 0, + 0, 133169216, 0, 0, 0, 266338433, 0, 0, 0, 532676867, 0, 0, 0, 1065353734, 0, 0, + 0, 2130707468, 0, 0, 0, 4261414936, 0, 0, 0, 4227862576, 0, 0, 0, 4160757856, 0, 0, + 0, 4026548417, 0, 0, 0, 3758129539, 0, 0, 0, 3221291782, 0, 0, 0, 2147616268, 0, 0, + 0, 265240, 0, 0, 0, 530480, 0, 0, 0, 1431310304, 0, 0, 0, 2862620608, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2164392972, 0, 0, 0, 33818648, 0, 0, 0, 67637296, 0, 0, 0, 135274592, 0, + 0, 0, 270549185, 0, 0, 0, 541098371, 0, 0, 0, 1082196742, 0, 0, 0, 2164393484, 0, + 0, 0, 33819672, 0, 0, 0, 67639344, 0, 0, 0, 135278688, 0, 0, 0, 270557377, 0, + 0, 0, 541114755, 0, 0, 0, 1082229511, 0, 0, 0, 2164459022, 0, 0, 0, 2198079504, 0, + 0, 0, 101191712, 0, 0, 0, 202383424, 0, 0, 0, 404766849, 0, 0, 0, 809533698, 0, + 0, 0, 1619067396, 0, 0, 0, 3238134792, 0, 0, 0, 2181302288, 0, 0, 0, 67637280, 0, + 0, 0, 135274560, 0, 0, 0, 270549121, 0, 0, 0, 541098243, 0, 0, 0, 1082196486, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180224, + 0, 0, 0, 360448, 0, 0, 0, 720896, 0, 0, 0, 1441792, 0, 0, 0, 2883584, + 0, 0, 0, 5767169, 0, 0, 0, 11534338, 0, 0, 0, 23068677, 0, 0, 0, 46137354, + 0, 0, 0, 92274708, 0, 0, 0, 184549417, 0, 0, 0, 369098835, 0, 0, 0, 738197670, + 0, 0, 0, 1476395340, 0, 0, 0, 2952790680, 0, 0, 0, 1610614064, 0, 0, 0, 3221228128, + 0, 0, 0, 2147488960, 0, 0, 0, 10624, 0, 0, 0, 21248, 0, 0, 0, 42496, + 0, 0, 0, 84992, 0, 0, 0, 22528, 0, 0, 0, 45056, 0, 0, 0, 90112, + }, + { + 0, 0, 0, 0, 2045181768, 0, 0, 0, 4090363536, 0, 0, 0, 3885759776, 0, 0, 0, + 3476552256, 0, 0, 0, 2658137216, 0, 0, 0, 1021307136, 0, 0, 0, 2042614272, 0, 0, 0, + 4085228544, 0, 0, 0, 3875489792, 0, 0, 0, 3456012289, 0, 0, 0, 2617057282, 0, 0, 0, + 939147269, 0, 0, 0, 1878294539, 0, 0, 0, 3756589078, 0, 0, 0, 3218210860, 0, 0, 0, + 2141454425, 0, 0, 0, 4282908850, 0, 0, 0, 4270850404, 0, 0, 0, 4246733513, 0, 0, 0, + 4198499730, 0, 0, 0, 4102032165, 0, 0, 0, 3909097035, 0, 0, 0, 3523226774, 0, 0, 0, + 2751486253, 0, 0, 0, 1208005210, 0, 0, 0, 3924270077, 0, 0, 0, 3553572858, 0, 0, 0, + 2812178420, 0, 0, 0, 1329389545, 0, 0, 0, 2658779090, 0, 0, 0, 1022590884, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4294705152, 0, 0, + 0, 4294443008, 0, 0, 0, 4293918720, 0, 0, 0, 4292870144, 0, 0, 0, 4290772992, 0, 0, + 0, 4286578688, 0, 0, 0, 4278190080, 0, 0, 0, 4261412864, 0, 0, 0, 4227858432, 0, 0, + 0, 4160749568, 0, 0, 0, 4026531841, 0, 0, 0, 3758096387, 0, 0, 0, 3221225478, 0, 0, + 0, 2147483660, 0, 0, 0, 24, 0, 0, 0, 48, 0, 0, 0, 96, 0, 0, + 0, 192, 0, 0, 0, 384, 0, 0, 0, 768, 0, 0, 0, 1536, 0, 0, + 0, 3072, 0, 0, 0, 6144, 0, 0, 0, 12288, 0, 0, 0, 24576, 0, 0, + 0, 49152, 0, 0, 0, 98304, 0, 0, 0, 4294901760, 0, 0, 0, 4294836224, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, + 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, 0, 0, 262152, 0, + 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, + 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67108864, 0, + 0, 0, 134217728, 0, 0, 0, 268435457, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, + 0, 0, 2147483656, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 64, 0, + 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37905416, + 0, 0, 0, 75810832, 0, 0, 0, 151621664, 0, 0, 0, 303243329, 0, 0, 0, 606486658, + 0, 0, 0, 1212973317, 0, 0, 0, 2425946634, 0, 0, 0, 556925972, 0, 0, 0, 1113851944, + 0, 0, 0, 2227703889, 0, 0, 0, 160440482, 0, 0, 0, 320880965, 0, 0, 0, 641761930, + 0, 0, 0, 1283523860, 0, 0, 0, 2567047720, 0, 0, 0, 839128145, 0, 0, 0, 1678256290, + 0, 0, 0, 3356512580, 0, 0, 0, 2418057864, 0, 0, 0, 541148433, 0, 0, 0, 1082296866, + 0, 0, 0, 2164593732, 0, 0, 0, 4738177, 0, 0, 0, 9476354, 0, 0, 0, 18952708, + }, + { + 0, 0, 0, 0, 698651200, 0, 0, 0, 1397302401, 0, 0, 0, 2794604802, 0, 0, 0, + 1294242308, 0, 0, 0, 2588484616, 0, 0, 0, 882001936, 0, 0, 0, 1764003872, 0, 0, 0, + 3528007744, 0, 0, 0, 2761048193, 0, 0, 0, 1227129090, 0, 0, 0, 2454258180, 0, 0, 0, + 613549064, 0, 0, 0, 1227098128, 0, 0, 0, 2454196256, 0, 0, 0, 613425216, 0, 0, 0, + 1226850432, 0, 0, 0, 2453700864, 0, 0, 0, 612434432, 0, 0, 0, 1224868864, 0, 0, 0, + 2449737728, 0, 0, 0, 604508160, 0, 0, 0, 1209016320, 0, 0, 0, 2418032641, 0, 0, 0, + 541097986, 0, 0, 0, 1082195972, 0, 0, 0, 2829488713, 0, 0, 0, 1364010130, 0, 0, 0, + 2728020260, 0, 0, 0, 1161073224, 0, 0, 0, 2322146448, 0, 0, 0, 349325600, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, + 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, + 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, + 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, + 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, + 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554432, 0, 0, + 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, 0, 536870917, 0, 0, + 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 131092, 0, 0, 0, 262184, 0, 0, 0, 524368, 0, 0, 0, 1048736, 0, + 0, 0, 2097472, 0, 0, 0, 4194944, 0, 0, 0, 8389888, 0, 0, 0, 16779776, 0, + 0, 0, 33559552, 0, 0, 0, 67119104, 0, 0, 0, 134238208, 0, 0, 0, 268476416, 0, + 0, 0, 536952832, 0, 0, 0, 1073905665, 0, 0, 0, 2147811330, 0, 0, 0, 524304, 0, + 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, 0, 0, 8388864, 0, + 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67110912, 0, 0, 0, 134221824, 0, + 0, 0, 268443649, 0, 0, 0, 536887298, 0, 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2731147994, + 0, 0, 0, 1167328692, 0, 0, 0, 2334657385, 0, 0, 0, 374347474, 0, 0, 0, 748694948, + 0, 0, 0, 1497389897, 0, 0, 0, 2994779795, 0, 0, 0, 1694592294, 0, 0, 0, 3389184589, + 0, 0, 0, 2483401882, 0, 0, 0, 671836468, 0, 0, 0, 1343672936, 0, 0, 0, 2687345872, + 0, 0, 0, 1079724449, 0, 0, 0, 2159448898, 0, 0, 0, 23930501, 0, 0, 0, 47861002, + 0, 0, 0, 95722004, 0, 0, 0, 191444008, 0, 0, 0, 382888016, 0, 0, 0, 765776032, + 0, 0, 0, 1531552064, 0, 0, 0, 341393499, 0, 0, 0, 682786998, 0, 0, 0, 1365573997, + }, + { + 0, 0, 0, 0, 1226833920, 0, 0, 0, 2453667840, 0, 0, 0, 612368384, 0, 0, 0, + 1224736768, 0, 0, 0, 2449473536, 0, 0, 0, 603979776, 0, 0, 0, 1207959552, 0, 0, 0, + 2415919105, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, 0, 0, 2147483657, 0, 0, 0, + 18, 0, 0, 0, 36, 0, 0, 0, 72, 0, 0, 0, 144, 0, 0, 0, + 288, 0, 0, 0, 576, 0, 0, 0, 1152, 0, 0, 0, 2304, 0, 0, 0, + 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, 0, 0, 0, + 73728, 0, 0, 0, 147456, 0, 0, 0, 1227128832, 0, 0, 0, 2454257664, 0, 0, 0, + 613548032, 0, 0, 0, 1227096064, 0, 0, 0, 2454192128, 0, 0, 0, 613416960, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 335546385, 0, 0, 0, 671092770, 0, 0, 0, 1342185541, 0, 0, 0, 2684371082, 0, + 0, 0, 1073774869, 0, 0, 0, 2147549738, 0, 0, 0, 132180, 0, 0, 0, 264360, 0, + 0, 0, 528720, 0, 0, 0, 1057440, 0, 0, 0, 2114880, 0, 0, 0, 4229761, 0, + 0, 0, 8459522, 0, 0, 0, 16919044, 0, 0, 0, 33838088, 0, 0, 0, 269000704, 0, + 0, 0, 538001408, 0, 0, 0, 1076002817, 0, 0, 0, 2152005634, 0, 0, 0, 9043972, 0, + 0, 0, 18087944, 0, 0, 0, 36175888, 0, 0, 0, 72351776, 0, 0, 0, 144703552, 0, + 0, 0, 289407105, 0, 0, 0, 578814210, 0, 0, 0, 1157628420, 0, 0, 0, 2315256840, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3732057110, + 0, 0, 0, 3169146925, 0, 0, 0, 2043326555, 0, 0, 0, 4086653111, 0, 0, 0, 3878338927, + 0, 0, 0, 3461710558, 0, 0, 0, 2628453820, 0, 0, 0, 961940345, 0, 0, 0, 1923880691, + 0, 0, 0, 3847761383, 0, 0, 0, 3400555471, 0, 0, 0, 2506143647, 0, 0, 0, 717319998, + 0, 0, 0, 1434639996, 0, 0, 0, 2869279993, 0, 0, 0, 1443592691, 0, 0, 0, 2887185382, + 0, 0, 0, 1479403468, 0, 0, 0, 2958806937, 0, 0, 0, 1622646578, 0, 0, 0, 3245293157, + 0, 0, 0, 2195619018, 0, 0, 0, 3687732610, 0, 0, 0, 3080497925, 0, 0, 0, 1866028555, + }, + { + 0, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, + 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554433, 0, 0, 0, + 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, 536870928, 0, 0, 0, + 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, 260, 0, 0, 0, + 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, 4160, 0, 0, 0, + 8320, 0, 0, 0, 16640, 0, 0, 0, 33280, 0, 0, 0, 66560, 0, 0, 0, + 133120, 0, 0, 0, 266240, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, + 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 655360, 0, 0, 0, 1310720, 0, 0, + 0, 2621440, 0, 0, 0, 5242880, 0, 0, 0, 10485760, 0, 0, 0, 20971520, 0, 0, + 0, 41943040, 0, 0, 0, 83886080, 0, 0, 0, 167772161, 0, 0, 0, 335544322, 0, 0, + 0, 671088644, 0, 0, 0, 1342177288, 0, 0, 0, 2684354577, 0, 0, 0, 1073741858, 0, 0, + 0, 2147483716, 0, 0, 0, 136, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 539164932, 0, 0, 0, 1078329864, 0, 0, 0, 2156659728, 0, 0, 0, 18352160, 0, + 0, 0, 36704320, 0, 0, 0, 73408640, 0, 0, 0, 146817280, 0, 0, 0, 293634560, 0, + 0, 0, 587269120, 0, 0, 0, 1174538240, 0, 0, 0, 2349076480, 0, 0, 0, 403185665, 0, + 0, 0, 806371331, 0, 0, 0, 1612742663, 0, 0, 0, 3225485326, 0, 0, 0, 2694906136, 0, + 0, 0, 1094844976, 0, 0, 0, 2189689952, 0, 0, 0, 84412608, 0, 0, 0, 168825216, 0, + 0, 0, 337650433, 0, 0, 0, 675300866, 0, 0, 0, 1350601732, 0, 0, 0, 2701203464, 0, + 0, 0, 1107439632, 0, 0, 0, 2214879264, 0, 0, 0, 134791233, 0, 0, 0, 269582466, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2033130968, + 0, 0, 0, 4066261936, 0, 0, 0, 3837556576, 0, 0, 0, 3380145857, 0, 0, 0, 2465324418, + 0, 0, 0, 635681541, 0, 0, 0, 1271363082, 0, 0, 0, 2542726165, 0, 0, 0, 790485035, + 0, 0, 0, 1580970071, 0, 0, 0, 3161940143, 0, 0, 0, 2028912991, 0, 0, 0, 4057825983, + 0, 0, 0, 3820684671, 0, 0, 0, 3346402046, 0, 0, 0, 2397836796, 0, 0, 0, 500706297, + 0, 0, 0, 1001412595, 0, 0, 0, 2002825190, 0, 0, 0, 4005650380, 0, 0, 0, 3716333464, + 0, 0, 0, 3137699633, 0, 0, 0, 254141371, 0, 0, 0, 508282742, 0, 0, 0, 1016565484, + }, +}; + +// clang-format on +// clang-format off +static const unsigned int h_lfsr113_jump_matrices[LFSR113_JUMP_MATRICES][LFSR113_SIZE] = { + { + 0, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, + 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554433, 0, 0, 0, + 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, 536870928, 0, 0, 0, + 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, 260, 0, 0, 0, + 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, 4160, 0, 0, 0, + 8320, 0, 0, 0, 16640, 0, 0, 0, 33280, 0, 0, 0, 66560, 0, 0, 0, + 133120, 0, 0, 0, 266240, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, + 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, + 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, + 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, + 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, + 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, + 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554432, 0, 0, + 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, 0, 536870917, 0, 0, + 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, + 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, 0, 0, 262152, 0, + 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, + 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67108864, 0, + 0, 0, 134217728, 0, 0, 0, 268435457, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, + 0, 0, 2147483656, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 64, 0, + 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, + 0, 0, 0, 2097152, 0, 0, 0, 4194305, 0, 0, 0, 8388610, 0, 0, 0, 16777220, + 0, 0, 0, 33554441, 0, 0, 0, 67108882, 0, 0, 0, 134217764, 0, 0, 0, 268435528, + 0, 0, 0, 536871056, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + 0, 0, 0, 2304, 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, + 0, 0, 0, 36864, 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, + 0, 0, 0, 589824, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, + }, + { + 0, 0, 0, 0, 8390656, 0, 0, 0, 16781312, 0, 0, 0, 33562625, 0, 0, 0, + 67125250, 0, 0, 0, 134250500, 0, 0, 0, 268501000, 0, 0, 0, 537002000, 0, 0, 0, + 1074004000, 0, 0, 0, 2148008001, 0, 0, 0, 1048706, 0, 0, 0, 2097412, 0, 0, 0, + 4194824, 0, 0, 0, 8389648, 0, 0, 0, 16779296, 0, 0, 0, 33558593, 0, 0, 0, + 67117186, 0, 0, 0, 134234372, 0, 0, 0, 268468744, 0, 0, 0, 536937488, 0, 0, 0, + 1073874976, 0, 0, 0, 2147749953, 0, 0, 0, 532610, 0, 0, 0, 1065220, 0, 0, 0, + 2130440, 0, 0, 0, 4260880, 0, 0, 0, 131104, 0, 0, 0, 262208, 0, 0, 0, + 524416, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195328, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 131092, 0, 0, 0, 262184, 0, 0, 0, 524368, 0, 0, 0, 1048736, 0, + 0, 0, 2097472, 0, 0, 0, 4194944, 0, 0, 0, 8389888, 0, 0, 0, 16779776, 0, + 0, 0, 33559552, 0, 0, 0, 67119104, 0, 0, 0, 134238208, 0, 0, 0, 268476416, 0, + 0, 0, 536952832, 0, 0, 0, 1073905665, 0, 0, 0, 2147811330, 0, 0, 0, 524304, 0, + 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, 0, 0, 8388864, 0, + 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67110912, 0, 0, 0, 134221824, 0, + 0, 0, 268443649, 0, 0, 0, 536887298, 0, 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33280, + 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, 532480, + 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259841, 0, 0, 0, 8519682, + 0, 0, 0, 17039364, 0, 0, 0, 34078729, 0, 0, 0, 68157458, 0, 0, 0, 136314916, + 0, 0, 0, 272629833, 0, 0, 0, 545259666, 0, 0, 0, 1090519332, 0, 0, 0, 2181038665, + 0, 0, 0, 67110034, 0, 0, 0, 134220068, 0, 0, 0, 268440136, 0, 0, 0, 536880272, + 0, 0, 0, 1073760544, 0, 0, 0, 2147487808, 0, 0, 0, 8320, 0, 0, 0, 16640, + }, + { + 0, 0, 0, 0, 134300932, 0, 0, 0, 268601864, 0, 0, 0, 537203728, 0, 0, 0, + 1074407456, 0, 0, 0, 2148814913, 0, 0, 0, 2662530, 0, 0, 0, 5325060, 0, 0, 0, + 10650120, 0, 0, 0, 21300240, 0, 0, 0, 42600481, 0, 0, 0, 85200962, 0, 0, 0, + 170401925, 0, 0, 0, 340803850, 0, 0, 0, 681607700, 0, 0, 0, 1363215400, 0, 0, 0, + 2726430800, 0, 0, 0, 1157894304, 0, 0, 0, 2315788608, 0, 0, 0, 336609920, 0, 0, 0, + 673219840, 0, 0, 0, 1346439680, 0, 0, 0, 2692879361, 0, 0, 0, 1090791426, 0, 0, 0, + 2181582852, 0, 0, 0, 68198408, 0, 0, 0, 2098452, 0, 0, 0, 4196904, 0, 0, 0, + 8393808, 0, 0, 0, 16787616, 0, 0, 0, 33575233, 0, 0, 0, 67150466, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 655360, 0, 0, 0, 1310720, 0, 0, + 0, 2621440, 0, 0, 0, 5242880, 0, 0, 0, 10485760, 0, 0, 0, 20971520, 0, 0, + 0, 41943040, 0, 0, 0, 83886080, 0, 0, 0, 167772161, 0, 0, 0, 335544322, 0, 0, + 0, 671088644, 0, 0, 0, 1342177288, 0, 0, 0, 2684354577, 0, 0, 0, 1073741858, 0, 0, + 0, 2147483716, 0, 0, 0, 136, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 335546385, 0, 0, 0, 671092770, 0, 0, 0, 1342185541, 0, 0, 0, 2684371082, 0, + 0, 0, 1073774869, 0, 0, 0, 2147549738, 0, 0, 0, 132180, 0, 0, 0, 264360, 0, + 0, 0, 528720, 0, 0, 0, 1057440, 0, 0, 0, 2114880, 0, 0, 0, 4229761, 0, + 0, 0, 8459522, 0, 0, 0, 16919044, 0, 0, 0, 33838088, 0, 0, 0, 269000704, 0, + 0, 0, 538001408, 0, 0, 0, 1076002817, 0, 0, 0, 2152005634, 0, 0, 0, 9043972, 0, + 0, 0, 18087944, 0, 0, 0, 36175888, 0, 0, 0, 72351776, 0, 0, 0, 144703552, 0, + 0, 0, 289407105, 0, 0, 0, 578814210, 0, 0, 0, 1157628420, 0, 0, 0, 2315256840, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180224, + 0, 0, 0, 360448, 0, 0, 0, 720896, 0, 0, 0, 1441792, 0, 0, 0, 2883584, + 0, 0, 0, 5767169, 0, 0, 0, 11534338, 0, 0, 0, 23068677, 0, 0, 0, 46137354, + 0, 0, 0, 92274708, 0, 0, 0, 184549417, 0, 0, 0, 369098835, 0, 0, 0, 738197670, + 0, 0, 0, 1476395340, 0, 0, 0, 2952790680, 0, 0, 0, 1610614064, 0, 0, 0, 3221228128, + 0, 0, 0, 2147488960, 0, 0, 0, 10624, 0, 0, 0, 21248, 0, 0, 0, 42496, + 0, 0, 0, 84992, 0, 0, 0, 22528, 0, 0, 0, 45056, 0, 0, 0, 90112, + }, + { + 0, 0, 0, 0, 826542200, 0, 0, 0, 1653084401, 0, 0, 0, 3306168803, 0, 0, 0, + 2317370310, 0, 0, 0, 339773324, 0, 0, 0, 679546648, 0, 0, 0, 1359093296, 0, 0, 0, + 2718186592, 0, 0, 0, 1141405888, 0, 0, 0, 2282811777, 0, 0, 0, 270656258, 0, 0, 0, + 541312516, 0, 0, 0, 1082625032, 0, 0, 0, 2165250065, 0, 0, 0, 35532835, 0, 0, 0, + 71065670, 0, 0, 0, 142131340, 0, 0, 0, 284262680, 0, 0, 0, 568525360, 0, 0, 0, + 1137050721, 0, 0, 0, 2274101442, 0, 0, 0, 253235589, 0, 0, 0, 506471179, 0, 0, 0, + 1012942358, 0, 0, 0, 2025884716, 0, 0, 0, 3234140193, 0, 0, 0, 2173313091, 0, 0, 0, + 51658887, 0, 0, 0, 103317775, 0, 0, 0, 206635550, 0, 0, 0, 413271100, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8421376, 0, 0, + 0, 16842752, 0, 0, 0, 33685504, 0, 0, 0, 67371008, 0, 0, 0, 134742017, 0, 0, + 0, 269484034, 0, 0, 0, 538968069, 0, 0, 0, 1077936138, 0, 0, 0, 2155872276, 0, 0, + 0, 16777256, 0, 0, 0, 33554512, 0, 0, 0, 67109024, 0, 0, 0, 134218049, 0, 0, + 0, 268436098, 0, 0, 0, 536872197, 0, 0, 0, 1073744394, 0, 0, 0, 2147488788, 0, 0, + 0, 10280, 0, 0, 0, 20560, 0, 0, 0, 41120, 0, 0, 0, 82240, 0, 0, + 0, 164480, 0, 0, 0, 328960, 0, 0, 0, 657920, 0, 0, 0, 1315840, 0, 0, + 0, 2631680, 0, 0, 0, 5263360, 0, 0, 0, 2105344, 0, 0, 0, 4210688, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 539164932, 0, 0, 0, 1078329864, 0, 0, 0, 2156659728, 0, 0, 0, 18352160, 0, + 0, 0, 36704320, 0, 0, 0, 73408640, 0, 0, 0, 146817280, 0, 0, 0, 293634560, 0, + 0, 0, 587269120, 0, 0, 0, 1174538240, 0, 0, 0, 2349076480, 0, 0, 0, 403185665, 0, + 0, 0, 806371331, 0, 0, 0, 1612742663, 0, 0, 0, 3225485326, 0, 0, 0, 2694906136, 0, + 0, 0, 1094844976, 0, 0, 0, 2189689952, 0, 0, 0, 84412608, 0, 0, 0, 168825216, 0, + 0, 0, 337650433, 0, 0, 0, 675300866, 0, 0, 0, 1350601732, 0, 0, 0, 2701203464, 0, + 0, 0, 1107439632, 0, 0, 0, 2214879264, 0, 0, 0, 134791233, 0, 0, 0, 269582466, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37905416, + 0, 0, 0, 75810832, 0, 0, 0, 151621664, 0, 0, 0, 303243329, 0, 0, 0, 606486658, + 0, 0, 0, 1212973317, 0, 0, 0, 2425946634, 0, 0, 0, 556925972, 0, 0, 0, 1113851944, + 0, 0, 0, 2227703889, 0, 0, 0, 160440482, 0, 0, 0, 320880965, 0, 0, 0, 641761930, + 0, 0, 0, 1283523860, 0, 0, 0, 2567047720, 0, 0, 0, 839128145, 0, 0, 0, 1678256290, + 0, 0, 0, 3356512580, 0, 0, 0, 2418057864, 0, 0, 0, 541148433, 0, 0, 0, 1082296866, + 0, 0, 0, 2164593732, 0, 0, 0, 4738177, 0, 0, 0, 9476354, 0, 0, 0, 18952708, + }, + { + 0, 0, 0, 0, 2499687121, 0, 0, 0, 704406946, 0, 0, 0, 1408813893, 0, 0, 0, + 2817627786, 0, 0, 0, 1340288277, 0, 0, 0, 2680576554, 0, 0, 0, 1066185813, 0, 0, 0, + 2132371627, 0, 0, 0, 4264743254, 0, 0, 0, 4234519213, 0, 0, 0, 4174071131, 0, 0, 0, + 4053174967, 0, 0, 0, 3811382638, 0, 0, 0, 3327797980, 0, 0, 0, 2360628665, 0, 0, 0, + 426290034, 0, 0, 0, 852580069, 0, 0, 0, 1705160138, 0, 0, 0, 3410320276, 0, 0, 0, + 2525673256, 0, 0, 0, 756379216, 0, 0, 0, 1512758433, 0, 0, 0, 3025516867, 0, 0, 0, + 1756066438, 0, 0, 0, 3512132877, 0, 0, 0, 911472843, 0, 0, 0, 1822945686, 0, 0, 0, + 3645891373, 0, 0, 0, 2996815450, 0, 0, 0, 1698663604, 0, 0, 0, 3397327208, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 591396869, 0, 0, + 0, 1182793738, 0, 0, 0, 2365587477, 0, 0, 0, 436207659, 0, 0, 0, 872415319, 0, 0, + 0, 1744830638, 0, 0, 0, 3489661276, 0, 0, 0, 2684355257, 0, 0, 0, 1073743218, 0, 0, + 0, 2147486436, 0, 0, 0, 5576, 0, 0, 0, 11152, 0, 0, 0, 22304, 0, 0, + 0, 44608, 0, 0, 0, 89216, 0, 0, 0, 178432, 0, 0, 0, 356864, 0, 0, + 0, 713728, 0, 0, 0, 1427456, 0, 0, 0, 2854912, 0, 0, 0, 5709824, 0, 0, + 0, 11419648, 0, 0, 0, 22839296, 0, 0, 0, 45678592, 0, 0, 0, 91357184, 0, 0, + 0, 182714369, 0, 0, 0, 365428738, 0, 0, 0, 147849217, 0, 0, 0, 295698434, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 490817699, 0, 0, 0, 981635398, 0, 0, 0, 1963270797, 0, 0, 0, 3926541594, 0, + 0, 0, 3558115892, 0, 0, 0, 2821264488, 0, 0, 0, 1347561681, 0, 0, 0, 2695123362, 0, + 0, 0, 1095279429, 0, 0, 0, 2190558858, 0, 0, 0, 86150421, 0, 0, 0, 172300842, 0, + 0, 0, 344601685, 0, 0, 0, 689203370, 0, 0, 0, 1378406740, 0, 0, 0, 3104893450, 0, + 0, 0, 1914819604, 0, 0, 0, 3829639209, 0, 0, 0, 3364311122, 0, 0, 0, 2433654948, 0, + 0, 0, 572342600, 0, 0, 0, 1144685201, 0, 0, 0, 2289370402, 0, 0, 0, 283773509, 0, + 0, 0, 567547018, 0, 0, 0, 1135094036, 0, 0, 0, 2270188072, 0, 0, 0, 245408849, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2731147994, + 0, 0, 0, 1167328692, 0, 0, 0, 2334657385, 0, 0, 0, 374347474, 0, 0, 0, 748694948, + 0, 0, 0, 1497389897, 0, 0, 0, 2994779795, 0, 0, 0, 1694592294, 0, 0, 0, 3389184589, + 0, 0, 0, 2483401882, 0, 0, 0, 671836468, 0, 0, 0, 1343672936, 0, 0, 0, 2687345872, + 0, 0, 0, 1079724449, 0, 0, 0, 2159448898, 0, 0, 0, 23930501, 0, 0, 0, 47861002, + 0, 0, 0, 95722004, 0, 0, 0, 191444008, 0, 0, 0, 382888016, 0, 0, 0, 765776032, + 0, 0, 0, 1531552064, 0, 0, 0, 341393499, 0, 0, 0, 682786998, 0, 0, 0, 1365573997, + }, + { + 0, 0, 0, 0, 2271571420, 0, 0, 0, 248175545, 0, 0, 0, 496351090, 0, 0, 0, + 992702181, 0, 0, 0, 1985404363, 0, 0, 0, 3970808727, 0, 0, 0, 3646650159, 0, 0, 0, + 2998333022, 0, 0, 0, 1701698748, 0, 0, 0, 3403397496, 0, 0, 0, 2511827697, 0, 0, 0, + 728688099, 0, 0, 0, 1457376199, 0, 0, 0, 2914752399, 0, 0, 0, 1534537503, 0, 0, 0, + 3069075006, 0, 0, 0, 1843182716, 0, 0, 0, 3686365432, 0, 0, 0, 3077763568, 0, 0, 0, + 1860559841, 0, 0, 0, 3721119683, 0, 0, 0, 3147272070, 0, 0, 0, 1999576845, 0, 0, 0, + 3999153690, 0, 0, 0, 3703340085, 0, 0, 0, 1042126263, 0, 0, 0, 2084252526, 0, 0, 0, + 4168505053, 0, 0, 0, 4042042811, 0, 0, 0, 3789118327, 0, 0, 0, 3283269358, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 661652741, 0, 0, + 0, 1323305483, 0, 0, 0, 2646610967, 0, 0, 0, 998254638, 0, 0, 0, 1996509277, 0, 0, + 0, 3993018554, 0, 0, 0, 3691069813, 0, 0, 0, 3087172330, 0, 0, 0, 1879377365, 0, 0, + 0, 3758754731, 0, 0, 0, 3222542166, 0, 0, 0, 2150117036, 0, 0, 0, 5266776, 0, 0, + 0, 10533552, 0, 0, 0, 21067104, 0, 0, 0, 42134208, 0, 0, 0, 84268416, 0, 0, + 0, 168536833, 0, 0, 0, 337073666, 0, 0, 0, 674147332, 0, 0, 0, 1348294664, 0, 0, + 0, 2696589329, 0, 0, 0, 1098211362, 0, 0, 0, 2196422724, 0, 0, 0, 97878152, 0, 0, + 0, 195756305, 0, 0, 0, 391512610, 0, 0, 0, 165413185, 0, 0, 0, 330826370, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2541187255, 0, 0, 0, 787407215, 0, 0, 0, 1574814430, 0, 0, 0, 3149628860, 0, + 0, 0, 2004290425, 0, 0, 0, 4008580850, 0, 0, 0, 3722194405, 0, 0, 0, 3149421515, 0, + 0, 0, 2003875734, 0, 0, 0, 4007751468, 0, 0, 0, 3720535640, 0, 0, 0, 3146103984, 0, + 0, 0, 1997240673, 0, 0, 0, 3994481347, 0, 0, 0, 3693995398, 0, 0, 0, 791460795, 0, + 0, 0, 1582921591, 0, 0, 0, 3165843182, 0, 0, 0, 2036719068, 0, 0, 0, 4073438136, 0, + 0, 0, 3851908976, 0, 0, 0, 3408850657, 0, 0, 0, 2522734018, 0, 0, 0, 750500741, 0, + 0, 0, 1501001483, 0, 0, 0, 3002002966, 0, 0, 0, 1709038637, 0, 0, 0, 3418077275, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3732057110, + 0, 0, 0, 3169146925, 0, 0, 0, 2043326555, 0, 0, 0, 4086653111, 0, 0, 0, 3878338927, + 0, 0, 0, 3461710558, 0, 0, 0, 2628453820, 0, 0, 0, 961940345, 0, 0, 0, 1923880691, + 0, 0, 0, 3847761383, 0, 0, 0, 3400555471, 0, 0, 0, 2506143647, 0, 0, 0, 717319998, + 0, 0, 0, 1434639996, 0, 0, 0, 2869279993, 0, 0, 0, 1443592691, 0, 0, 0, 2887185382, + 0, 0, 0, 1479403468, 0, 0, 0, 2958806937, 0, 0, 0, 1622646578, 0, 0, 0, 3245293157, + 0, 0, 0, 2195619018, 0, 0, 0, 3687732610, 0, 0, 0, 3080497925, 0, 0, 0, 1866028555, + }, + { + 0, 0, 0, 0, 1739616249, 0, 0, 0, 3479232498, 0, 0, 0, 2663497700, 0, 0, 0, + 1032028104, 0, 0, 0, 2064056209, 0, 0, 0, 4128112418, 0, 0, 0, 3961257541, 0, 0, 0, + 3627547787, 0, 0, 0, 2960128279, 0, 0, 0, 1625289262, 0, 0, 0, 3250578525, 0, 0, 0, + 2206189754, 0, 0, 0, 117412213, 0, 0, 0, 234824426, 0, 0, 0, 469648853, 0, 0, 0, + 939297707, 0, 0, 0, 1878595415, 0, 0, 0, 3757190830, 0, 0, 0, 3219414364, 0, 0, 0, + 2143861433, 0, 0, 0, 4287722866, 0, 0, 0, 4280478436, 0, 0, 0, 4265989576, 0, 0, 0, + 4237011857, 0, 0, 0, 4179056419, 0, 0, 0, 2510209471, 0, 0, 0, 725451647, 0, 0, 0, + 1450903295, 0, 0, 0, 2901806591, 0, 0, 0, 1508645886, 0, 0, 0, 3017291772, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2815455249, 0, 0, + 0, 1335943203, 0, 0, 0, 2671886407, 0, 0, 0, 1048805518, 0, 0, 0, 2097611036, 0, 0, + 0, 4195222072, 0, 0, 0, 4095476849, 0, 0, 0, 3895986402, 0, 0, 0, 3497005508, 0, 0, + 0, 2699043721, 0, 0, 0, 1103120146, 0, 0, 0, 2206240292, 0, 0, 0, 117513288, 0, 0, + 0, 235026577, 0, 0, 0, 470053155, 0, 0, 0, 940106310, 0, 0, 0, 1880212621, 0, 0, + 0, 3760425243, 0, 0, 0, 3225883190, 0, 0, 0, 2156799084, 0, 0, 0, 18630872, 0, 0, + 0, 37261744, 0, 0, 0, 74523488, 0, 0, 0, 149046977, 0, 0, 0, 298093954, 0, 0, + 0, 596187909, 0, 0, 0, 1192375818, 0, 0, 0, 703863812, 0, 0, 0, 1407727624, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2614536692, 0, 0, 0, 934106089, 0, 0, 0, 1868212179, 0, 0, 0, 3736424359, 0, + 0, 0, 3177881422, 0, 0, 0, 2060795549, 0, 0, 0, 4121591099, 0, 0, 0, 3948214903, 0, + 0, 0, 3601462510, 0, 0, 0, 2907957725, 0, 0, 0, 1520948154, 0, 0, 0, 3041896308, 0, + 0, 0, 1788825320, 0, 0, 0, 3577650640, 0, 0, 0, 2860333984, 0, 0, 0, 3475824309, 0, + 0, 0, 2656681322, 0, 0, 0, 1018395349, 0, 0, 0, 2036790698, 0, 0, 0, 4073581396, 0, + 0, 0, 3852195497, 0, 0, 0, 3409423699, 0, 0, 0, 2523880103, 0, 0, 0, 752792911, 0, + 0, 0, 1505585823, 0, 0, 0, 3011171646, 0, 0, 0, 1727375997, 0, 0, 0, 3454751994, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2033130968, + 0, 0, 0, 4066261936, 0, 0, 0, 3837556576, 0, 0, 0, 3380145857, 0, 0, 0, 2465324418, + 0, 0, 0, 635681541, 0, 0, 0, 1271363082, 0, 0, 0, 2542726165, 0, 0, 0, 790485035, + 0, 0, 0, 1580970071, 0, 0, 0, 3161940143, 0, 0, 0, 2028912991, 0, 0, 0, 4057825983, + 0, 0, 0, 3820684671, 0, 0, 0, 3346402046, 0, 0, 0, 2397836796, 0, 0, 0, 500706297, + 0, 0, 0, 1001412595, 0, 0, 0, 2002825190, 0, 0, 0, 4005650380, 0, 0, 0, 3716333464, + 0, 0, 0, 3137699633, 0, 0, 0, 254141371, 0, 0, 0, 508282742, 0, 0, 0, 1016565484, + }, + { + 0, 0, 0, 0, 1915396941, 0, 0, 0, 3830793883, 0, 0, 0, 3366620471, 0, 0, 0, + 2438273647, 0, 0, 0, 581579999, 0, 0, 0, 1163159998, 0, 0, 0, 2326319996, 0, 0, 0, + 357672696, 0, 0, 0, 715345393, 0, 0, 0, 1430690786, 0, 0, 0, 2861381572, 0, 0, 0, + 1427795848, 0, 0, 0, 2855591696, 0, 0, 0, 1416216096, 0, 0, 0, 2832432193, 0, 0, 0, + 1369897090, 0, 0, 0, 2739794180, 0, 0, 0, 1184621065, 0, 0, 0, 2369242131, 0, 0, 0, + 443516967, 0, 0, 0, 887033934, 0, 0, 0, 1774067868, 0, 0, 0, 3548135736, 0, 0, 0, + 2801304176, 0, 0, 0, 1307641056, 0, 0, 0, 3922242189, 0, 0, 0, 3549517082, 0, 0, 0, + 2804066868, 0, 0, 0, 1313166441, 0, 0, 0, 2626332883, 0, 0, 0, 957698470, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1432822912, 0, 0, + 0, 2865645824, 0, 0, 0, 1436324352, 0, 0, 0, 2872648704, 0, 0, 0, 1450330112, 0, 0, + 0, 2900660224, 0, 0, 0, 1506353153, 0, 0, 0, 3012706307, 0, 0, 0, 1730445319, 0, 0, + 0, 3460890639, 0, 0, 0, 2626813983, 0, 0, 0, 958660670, 0, 0, 0, 1917321341, 0, 0, + 0, 3834642683, 0, 0, 0, 3374318071, 0, 0, 0, 2453668846, 0, 0, 0, 612370397, 0, 0, + 0, 1224740795, 0, 0, 0, 2449481590, 0, 0, 0, 603995885, 0, 0, 0, 1207991771, 0, 0, + 0, 2415983542, 0, 0, 0, 536999789, 0, 0, 0, 1073999578, 0, 0, 0, 2147999156, 0, 0, + 0, 1031016, 0, 0, 0, 2062032, 0, 0, 0, 1431947552, 0, 0, 0, 2863895104, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2983242506, 0, 0, 0, 1671517716, 0, 0, 0, 3343035433, 0, 0, 0, 2391103570, 0, + 0, 0, 487239844, 0, 0, 0, 974479689, 0, 0, 0, 1948959378, 0, 0, 0, 3897918757, 0, + 0, 0, 3500870219, 0, 0, 0, 2706773142, 0, 0, 0, 1118578988, 0, 0, 0, 2237157976, 0, + 0, 0, 179348657, 0, 0, 0, 358697315, 0, 0, 0, 717394631, 0, 0, 0, 3830812293, 0, + 0, 0, 3366657290, 0, 0, 0, 2438347285, 0, 0, 0, 581727274, 0, 0, 0, 1163454549, 0, + 0, 0, 2326909099, 0, 0, 0, 358850902, 0, 0, 0, 717701804, 0, 0, 0, 1435403608, 0, + 0, 0, 2870807216, 0, 0, 0, 1446647137, 0, 0, 0, 2893294274, 0, 0, 0, 1491621253, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2199499341, + 0, 0, 0, 104031387, 0, 0, 0, 208062775, 0, 0, 0, 416125551, 0, 0, 0, 832251102, + 0, 0, 0, 1664502205, 0, 0, 0, 3329004410, 0, 0, 0, 2363041525, 0, 0, 0, 431115754, + 0, 0, 0, 862231508, 0, 0, 0, 1724463016, 0, 0, 0, 3448926032, 0, 0, 0, 2602884769, + 0, 0, 0, 910802242, 0, 0, 0, 1821604484, 0, 0, 0, 3643208968, 0, 0, 0, 2991450640, + 0, 0, 0, 1687933984, 0, 0, 0, 3375867968, 0, 0, 0, 2456768640, 0, 0, 0, 618569985, + 0, 0, 0, 1237139970, 0, 0, 0, 274937417, 0, 0, 0, 549874835, 0, 0, 0, 1099749670, + }, + { + 0, 0, 0, 0, 4038889453, 0, 0, 0, 3782811611, 0, 0, 0, 3270655926, 0, 0, 0, + 2246344557, 0, 0, 0, 197721819, 0, 0, 0, 395443639, 0, 0, 0, 790887279, 0, 0, 0, + 1581774559, 0, 0, 0, 3163549119, 0, 0, 0, 2032130942, 0, 0, 0, 4064261884, 0, 0, 0, + 3833556473, 0, 0, 0, 3372145651, 0, 0, 0, 2449324007, 0, 0, 0, 603680719, 0, 0, 0, + 1207361439, 0, 0, 0, 2414722878, 0, 0, 0, 534478461, 0, 0, 0, 1068956923, 0, 0, 0, + 2137913847, 0, 0, 0, 4275827694, 0, 0, 0, 4256688093, 0, 0, 0, 4218408890, 0, 0, 0, + 4141850484, 0, 0, 0, 3988733673, 0, 0, 0, 734196287, 0, 0, 0, 1468392575, 0, 0, 0, + 2936785150, 0, 0, 0, 1578603005, 0, 0, 0, 3157206011, 0, 0, 0, 2019444726, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2740852545, 0, 0, + 0, 1186737794, 0, 0, 0, 2373475589, 0, 0, 0, 451983883, 0, 0, 0, 903967767, 0, 0, + 0, 1807935534, 0, 0, 0, 3615871068, 0, 0, 0, 2936774840, 0, 0, 0, 1578582385, 0, 0, + 0, 3157164770, 0, 0, 0, 2019362244, 0, 0, 0, 4038724489, 0, 0, 0, 3782481683, 0, 0, + 0, 3269996070, 0, 0, 0, 2245024844, 0, 0, 0, 195082393, 0, 0, 0, 390164786, 0, 0, + 0, 780329572, 0, 0, 0, 1560659145, 0, 0, 0, 3121318290, 0, 0, 0, 1947669285, 0, 0, + 0, 3895338570, 0, 0, 0, 3495709844, 0, 0, 0, 2696452393, 0, 0, 0, 1097937490, 0, 0, + 0, 2195874980, 0, 0, 0, 96782664, 0, 0, 0, 2832696784, 0, 0, 0, 1370426272, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2797805596, 0, 0, 0, 1300643896, 0, 0, 0, 2601287792, 0, 0, 0, 907608289, 0, + 0, 0, 1815216578, 0, 0, 0, 3630433157, 0, 0, 0, 2965899019, 0, 0, 0, 1636830742, 0, + 0, 0, 3273661484, 0, 0, 0, 2252355672, 0, 0, 0, 209744048, 0, 0, 0, 419488096, 0, + 0, 0, 838976192, 0, 0, 0, 1677952385, 0, 0, 0, 3355904770, 0, 0, 0, 919418393, 0, + 0, 0, 1838836786, 0, 0, 0, 3677673572, 0, 0, 0, 3060379848, 0, 0, 0, 1825792400, 0, + 0, 0, 3651584800, 0, 0, 0, 3008202304, 0, 0, 0, 1721437312, 0, 0, 0, 3442874624, 0, + 0, 0, 2590781953, 0, 0, 0, 886596611, 0, 0, 0, 1773193223, 0, 0, 0, 3546386446, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4196822677, + 0, 0, 0, 4098678059, 0, 0, 0, 3902388822, 0, 0, 0, 3509810348, 0, 0, 0, 2724653400, + 0, 0, 0, 1154339505, 0, 0, 0, 2308679010, 0, 0, 0, 322390725, 0, 0, 0, 644781450, + 0, 0, 0, 1289562901, 0, 0, 0, 2579125802, 0, 0, 0, 863284308, 0, 0, 0, 1726568616, + 0, 0, 0, 3453137233, 0, 0, 0, 2611307171, 0, 0, 0, 927647046, 0, 0, 0, 1855294093, + 0, 0, 0, 3710588186, 0, 0, 0, 3126209076, 0, 0, 0, 1957450856, 0, 0, 0, 3914901713, + 0, 0, 0, 3534836131, 0, 0, 0, 1598344658, 0, 0, 0, 3196689317, 0, 0, 0, 2098411338, + }, + { + 0, 0, 0, 0, 3824344628, 0, 0, 0, 3353721960, 0, 0, 0, 2412476624, 0, 0, 0, + 529985953, 0, 0, 0, 1059971907, 0, 0, 0, 2119943815, 0, 0, 0, 4239887631, 0, 0, 0, + 4184807967, 0, 0, 0, 4074648638, 0, 0, 0, 3854329981, 0, 0, 0, 3413692666, 0, 0, 0, + 2532418036, 0, 0, 0, 769868776, 0, 0, 0, 1539737553, 0, 0, 0, 3079475106, 0, 0, 0, + 1863982917, 0, 0, 0, 3727965834, 0, 0, 0, 3160964373, 0, 0, 0, 2026961450, 0, 0, 0, + 4053922901, 0, 0, 0, 3812878506, 0, 0, 0, 3330789716, 0, 0, 0, 2366612137, 0, 0, 0, + 438256979, 0, 0, 0, 876513958, 0, 0, 0, 2341456760, 0, 0, 0, 387946225, 0, 0, 0, + 775892451, 0, 0, 0, 1551784902, 0, 0, 0, 3103569805, 0, 0, 0, 1912172314, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3220345106, 0, 0, + 0, 2145722916, 0, 0, 0, 4291445832, 0, 0, 0, 4287924368, 0, 0, 0, 4280881440, 0, 0, + 0, 4266795584, 0, 0, 0, 4238623872, 0, 0, 0, 4182280448, 0, 0, 0, 4069593601, 0, 0, + 0, 3844219907, 0, 0, 0, 3393472519, 0, 0, 0, 2491977742, 0, 0, 0, 688988188, 0, 0, + 0, 1377976376, 0, 0, 0, 2755952753, 0, 0, 0, 1216938211, 0, 0, 0, 2433876422, 0, 0, + 0, 572785549, 0, 0, 0, 1145571098, 0, 0, 0, 2291142197, 0, 0, 0, 287317098, 0, 0, + 0, 574634197, 0, 0, 0, 1149268394, 0, 0, 0, 2298536789, 0, 0, 0, 302106282, 0, 0, + 0, 604212565, 0, 0, 0, 1208425131, 0, 0, 0, 805086276, 0, 0, 0, 1610172553, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3324387232, 0, 0, 0, 2353807168, 0, 0, 0, 412647041, 0, 0, 0, 825294082, 0, + 0, 0, 1650588165, 0, 0, 0, 3301176331, 0, 0, 0, 2307385367, 0, 0, 0, 319803438, 0, + 0, 0, 639606877, 0, 0, 0, 1279213754, 0, 0, 0, 2558427508, 0, 0, 0, 821887721, 0, + 0, 0, 1643775442, 0, 0, 0, 3287550884, 0, 0, 0, 2280134472, 0, 0, 0, 3388354864, 0, + 0, 0, 2481742433, 0, 0, 0, 668517571, 0, 0, 0, 1337035143, 0, 0, 0, 2674070287, 0, + 0, 0, 1053173279, 0, 0, 0, 2106346559, 0, 0, 0, 4212693118, 0, 0, 0, 4130418941, 0, + 0, 0, 3965870586, 0, 0, 0, 3636773876, 0, 0, 0, 2978580456, 0, 0, 0, 1662193616, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2034053848, + 0, 0, 0, 4068107696, 0, 0, 0, 3841248097, 0, 0, 0, 3387528899, 0, 0, 0, 2480090502, + 0, 0, 0, 665213709, 0, 0, 0, 1330427418, 0, 0, 0, 2660854837, 0, 0, 0, 1026742378, + 0, 0, 0, 2053484756, 0, 0, 0, 4106969513, 0, 0, 0, 3918971730, 0, 0, 0, 3542976165, + 0, 0, 0, 2790985034, 0, 0, 0, 1287002772, 0, 0, 0, 2574005545, 0, 0, 0, 853043794, + 0, 0, 0, 1706087588, 0, 0, 0, 3412175176, 0, 0, 0, 2529383056, 0, 0, 0, 763798816, + 0, 0, 0, 1527597633, 0, 0, 0, 3475482203, 0, 0, 0, 2655997110, 0, 0, 0, 1017026924, + }, + { + 0, 0, 0, 0, 1099834596, 0, 0, 0, 2199669192, 0, 0, 0, 104371089, 0, 0, 0, + 208742178, 0, 0, 0, 417484356, 0, 0, 0, 834968712, 0, 0, 0, 1669937425, 0, 0, 0, + 3339874850, 0, 0, 0, 2384782404, 0, 0, 0, 474597512, 0, 0, 0, 949195024, 0, 0, 0, + 1898390048, 0, 0, 0, 3796780096, 0, 0, 0, 3298592897, 0, 0, 0, 2302218499, 0, 0, 0, + 309469703, 0, 0, 0, 618939406, 0, 0, 0, 1237878812, 0, 0, 0, 2475757624, 0, 0, 0, + 656547953, 0, 0, 0, 1313095907, 0, 0, 0, 2626191815, 0, 0, 0, 957416334, 0, 0, 0, + 1914832669, 0, 0, 0, 3829665339, 0, 0, 0, 2298886291, 0, 0, 0, 302805287, 0, 0, 0, + 605610574, 0, 0, 0, 1211221148, 0, 0, 0, 2422442297, 0, 0, 0, 549917298, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 348140706, 0, 0, + 0, 696281412, 0, 0, 0, 1392562824, 0, 0, 0, 2785125649, 0, 0, 0, 1275284003, 0, 0, + 0, 2550568007, 0, 0, 0, 806168719, 0, 0, 0, 1612337439, 0, 0, 0, 3224674878, 0, 0, + 0, 2154382460, 0, 0, 0, 13797624, 0, 0, 0, 27595248, 0, 0, 0, 55190496, 0, 0, + 0, 110380992, 0, 0, 0, 220761985, 0, 0, 0, 441523971, 0, 0, 0, 883047943, 0, 0, + 0, 1766095886, 0, 0, 0, 3532191772, 0, 0, 0, 2769416249, 0, 0, 0, 1243865203, 0, 0, + 0, 2487730406, 0, 0, 0, 680493516, 0, 0, 0, 1360987032, 0, 0, 0, 2721974065, 0, 0, + 0, 1148980834, 0, 0, 0, 2297961669, 0, 0, 0, 87035176, 0, 0, 0, 174070353, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 470870896, 0, 0, 0, 941741792, 0, 0, 0, 1883483584, 0, 0, 0, 3766967168, 0, + 0, 0, 3238967041, 0, 0, 0, 2182966786, 0, 0, 0, 70966277, 0, 0, 0, 141932555, 0, + 0, 0, 283865111, 0, 0, 0, 567730223, 0, 0, 0, 1135460447, 0, 0, 0, 2270920895, 0, + 0, 0, 246874494, 0, 0, 0, 493748989, 0, 0, 0, 987497979, 0, 0, 0, 1772675207, 0, + 0, 0, 3545350414, 0, 0, 0, 2795733533, 0, 0, 0, 1296499770, 0, 0, 0, 2592999541, 0, + 0, 0, 891031787, 0, 0, 0, 1782063574, 0, 0, 0, 3564127149, 0, 0, 0, 2833287003, 0, + 0, 0, 1371606711, 0, 0, 0, 2743213422, 0, 0, 0, 1191459548, 0, 0, 0, 2382919096, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30521863, + 0, 0, 0, 61043727, 0, 0, 0, 122087454, 0, 0, 0, 244174909, 0, 0, 0, 488349818, + 0, 0, 0, 976699637, 0, 0, 0, 1953399275, 0, 0, 0, 3906798551, 0, 0, 0, 3518629806, + 0, 0, 0, 2742292316, 0, 0, 0, 1189617336, 0, 0, 0, 2379234673, 0, 0, 0, 463502051, + 0, 0, 0, 927004102, 0, 0, 0, 1854008205, 0, 0, 0, 3708016410, 0, 0, 0, 3121065525, + 0, 0, 0, 1947163754, 0, 0, 0, 3894327508, 0, 0, 0, 3493687720, 0, 0, 0, 2692408145, + 0, 0, 0, 1089848995, 0, 0, 0, 2151298880, 0, 0, 0, 7630465, 0, 0, 0, 15260931, + }, + { + 0, 0, 0, 0, 3400190380, 0, 0, 0, 2505413465, 0, 0, 0, 715859635, 0, 0, 0, + 1431719270, 0, 0, 0, 2863438540, 0, 0, 0, 1431909784, 0, 0, 0, 2863819568, 0, 0, 0, + 1432671840, 0, 0, 0, 2865343680, 0, 0, 0, 1435720064, 0, 0, 0, 2871440128, 0, 0, 0, + 1447912961, 0, 0, 0, 2895825923, 0, 0, 0, 1496684550, 0, 0, 0, 2993369100, 0, 0, 0, + 1691770904, 0, 0, 0, 3383541809, 0, 0, 0, 2472116322, 0, 0, 0, 649265349, 0, 0, 0, + 1298530698, 0, 0, 0, 2597061396, 0, 0, 0, 899155496, 0, 0, 0, 1798310993, 0, 0, 0, + 3596621986, 0, 0, 0, 2898276677, 0, 0, 0, 2469047078, 0, 0, 0, 643126861, 0, 0, 0, + 1286253722, 0, 0, 0, 2572507445, 0, 0, 0, 850047595, 0, 0, 0, 1700095190, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2692483345, 0, 0, + 0, 1089999394, 0, 0, 0, 2179998788, 0, 0, 0, 65030280, 0, 0, 0, 130060560, 0, 0, + 0, 260121121, 0, 0, 0, 520242243, 0, 0, 0, 1040484486, 0, 0, 0, 2080968972, 0, 0, + 0, 4161937944, 0, 0, 0, 4028908593, 0, 0, 0, 3762849891, 0, 0, 0, 3230732486, 0, 0, + 0, 2166497676, 0, 0, 0, 38028056, 0, 0, 0, 76056112, 0, 0, 0, 152112225, 0, 0, + 0, 304224450, 0, 0, 0, 608448901, 0, 0, 0, 1216897803, 0, 0, 0, 2433795606, 0, 0, + 0, 572623917, 0, 0, 0, 1145247834, 0, 0, 0, 2290495669, 0, 0, 0, 286024042, 0, 0, + 0, 572048085, 0, 0, 0, 1144096170, 0, 0, 0, 673120836, 0, 0, 0, 1346241672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 27673132, 0, 0, 0, 55346265, 0, 0, 0, 110692530, 0, 0, 0, 221385060, 0, + 0, 0, 442770121, 0, 0, 0, 885540243, 0, 0, 0, 1771080487, 0, 0, 0, 3542160975, 0, + 0, 0, 2789354654, 0, 0, 0, 1283742012, 0, 0, 0, 2567484024, 0, 0, 0, 840000753, 0, + 0, 0, 1680001507, 0, 0, 0, 3360003015, 0, 0, 0, 2425038735, 0, 0, 0, 548408626, 0, + 0, 0, 1096817252, 0, 0, 0, 2193634504, 0, 0, 0, 92301712, 0, 0, 0, 184603425, 0, + 0, 0, 369206850, 0, 0, 0, 738413700, 0, 0, 0, 1476827400, 0, 0, 0, 2953654801, 0, + 0, 0, 1612342306, 0, 0, 0, 3224684613, 0, 0, 0, 2154401931, 0, 0, 0, 13836566, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33833481, + 0, 0, 0, 67666962, 0, 0, 0, 135333924, 0, 0, 0, 270667848, 0, 0, 0, 541335697, + 0, 0, 0, 1082671394, 0, 0, 0, 2165342788, 0, 0, 0, 35718281, 0, 0, 0, 71436563, + 0, 0, 0, 142873126, 0, 0, 0, 285746252, 0, 0, 0, 571492505, 0, 0, 0, 1142985010, + 0, 0, 0, 2285970021, 0, 0, 0, 276972746, 0, 0, 0, 553945492, 0, 0, 0, 1107890985, + 0, 0, 0, 2215781970, 0, 0, 0, 136596644, 0, 0, 0, 273193289, 0, 0, 0, 546386578, + 0, 0, 0, 1092773156, 0, 0, 0, 2151712833, 0, 0, 0, 8458370, 0, 0, 0, 16916740, + }, + { + 0, 0, 0, 0, 2928597988, 0, 0, 0, 1562228680, 0, 0, 0, 3124457360, 0, 0, 0, + 1953947424, 0, 0, 0, 3907894849, 0, 0, 0, 3520822403, 0, 0, 0, 2746677510, 0, 0, 0, + 1198387725, 0, 0, 0, 2396775450, 0, 0, 0, 498583604, 0, 0, 0, 997167209, 0, 0, 0, + 1994334419, 0, 0, 0, 3988668839, 0, 0, 0, 3682370382, 0, 0, 0, 3069773468, 0, 0, 0, + 1844579640, 0, 0, 0, 3689159280, 0, 0, 0, 3083351264, 0, 0, 0, 1871735233, 0, 0, 0, + 3743470466, 0, 0, 0, 3191973636, 0, 0, 0, 2088979976, 0, 0, 0, 4177959953, 0, 0, 0, + 4060952610, 0, 0, 0, 3826937925, 0, 0, 0, 1723480943, 0, 0, 0, 3446961887, 0, 0, 0, + 2598956478, 0, 0, 0, 902945660, 0, 0, 0, 1805891321, 0, 0, 0, 3611782642, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1473249760, 0, 0, + 0, 2946499520, 0, 0, 0, 1598031745, 0, 0, 0, 3196063490, 0, 0, 0, 2097159684, 0, 0, + 0, 4194319368, 0, 0, 0, 4093671441, 0, 0, 0, 3892375586, 0, 0, 0, 3489783876, 0, 0, + 0, 2684600457, 0, 0, 0, 1074233618, 0, 0, 0, 2148467236, 0, 0, 0, 1967176, 0, 0, + 0, 3934352, 0, 0, 0, 7868704, 0, 0, 0, 15737408, 0, 0, 0, 31474816, 0, 0, + 0, 62949632, 0, 0, 0, 125899264, 0, 0, 0, 251798529, 0, 0, 0, 503597059, 0, 0, + 0, 1007194118, 0, 0, 0, 2014388236, 0, 0, 0, 4028776473, 0, 0, 0, 3762585651, 0, 0, + 0, 3230204006, 0, 0, 0, 2165440716, 0, 0, 0, 1442054264, 0, 0, 0, 2884108528, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1749068055, 0, 0, 0, 3498136111, 0, 0, 0, 2701304927, 0, 0, 0, 1107642558, 0, + 0, 0, 2215285117, 0, 0, 0, 135602938, 0, 0, 0, 271205877, 0, 0, 0, 542411755, 0, + 0, 0, 1084823510, 0, 0, 0, 2169647020, 0, 0, 0, 44326744, 0, 0, 0, 88653489, 0, + 0, 0, 177306978, 0, 0, 0, 354613956, 0, 0, 0, 709227913, 0, 0, 0, 1019953669, 0, + 0, 0, 2039907338, 0, 0, 0, 4079814677, 0, 0, 0, 3864662058, 0, 0, 0, 3434356820, 0, + 0, 0, 2573746345, 0, 0, 0, 852525394, 0, 0, 0, 1705050788, 0, 0, 0, 3410101576, 0, + 0, 0, 2525235857, 0, 0, 0, 755504418, 0, 0, 0, 1511008837, 0, 0, 0, 3022017675, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67141650, + 0, 0, 0, 134283300, 0, 0, 0, 268566600, 0, 0, 0, 537133200, 0, 0, 0, 1074266400, + 0, 0, 0, 2148532800, 0, 0, 0, 2098304, 0, 0, 0, 4196609, 0, 0, 0, 8393218, + 0, 0, 0, 16786436, 0, 0, 0, 33572873, 0, 0, 0, 67145746, 0, 0, 0, 134291492, + 0, 0, 0, 268582984, 0, 0, 0, 537165968, 0, 0, 0, 1074331936, 0, 0, 0, 2148663872, + 0, 0, 0, 2360448, 0, 0, 0, 4720897, 0, 0, 0, 9441794, 0, 0, 0, 18883588, + 0, 0, 0, 37767176, 0, 0, 0, 8392706, 0, 0, 0, 16785412, 0, 0, 0, 33570825, + }, + { + 0, 0, 0, 0, 1874778401, 0, 0, 0, 3749556802, 0, 0, 0, 3204146308, 0, 0, 0, + 2113325320, 0, 0, 0, 4226650640, 0, 0, 0, 4158333984, 0, 0, 0, 4021700672, 0, 0, 0, + 3748434048, 0, 0, 0, 3201900800, 0, 0, 0, 2108834304, 0, 0, 0, 4217668608, 0, 0, 0, + 4140369920, 0, 0, 0, 3985772545, 0, 0, 0, 3676577794, 0, 0, 0, 3058188292, 0, 0, 0, + 1821409288, 0, 0, 0, 3642818577, 0, 0, 0, 2990669858, 0, 0, 0, 1686372420, 0, 0, 0, + 3372744841, 0, 0, 0, 2450522386, 0, 0, 0, 606077476, 0, 0, 0, 1212154952, 0, 0, 0, + 2424309905, 0, 0, 0, 553652514, 0, 0, 0, 767490916, 0, 0, 0, 1534981833, 0, 0, 0, + 3069963666, 0, 0, 0, 1844960036, 0, 0, 0, 3689920072, 0, 0, 0, 3084872848, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1442838528, 0, 0, + 0, 2885677056, 0, 0, 0, 1476386816, 0, 0, 0, 2952773632, 0, 0, 0, 1610579969, 0, 0, + 0, 3221159938, 0, 0, 0, 2147352580, 0, 0, 0, 4294705160, 0, 0, 0, 4294443024, 0, 0, + 0, 4293918752, 0, 0, 0, 4292870208, 0, 0, 0, 4290773120, 0, 0, 0, 4286578944, 0, 0, + 0, 4278190592, 0, 0, 0, 4261413888, 0, 0, 0, 4227860480, 0, 0, 0, 4160753664, 0, 0, + 0, 4026540033, 0, 0, 0, 3758112771, 0, 0, 0, 3221258246, 0, 0, 0, 2147549196, 0, 0, + 0, 131096, 0, 0, 0, 262192, 0, 0, 0, 524384, 0, 0, 0, 1048768, 0, 0, + 0, 2097536, 0, 0, 0, 4195072, 0, 0, 0, 1434451456, 0, 0, 0, 2868902912, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2164392972, 0, 0, 0, 33818648, 0, 0, 0, 67637296, 0, 0, 0, 135274592, 0, + 0, 0, 270549185, 0, 0, 0, 541098371, 0, 0, 0, 1082196742, 0, 0, 0, 2164393484, 0, + 0, 0, 33819672, 0, 0, 0, 67639344, 0, 0, 0, 135278688, 0, 0, 0, 270557377, 0, + 0, 0, 541114755, 0, 0, 0, 1082229511, 0, 0, 0, 2164459022, 0, 0, 0, 2198079504, 0, + 0, 0, 101191712, 0, 0, 0, 202383424, 0, 0, 0, 404766849, 0, 0, 0, 809533698, 0, + 0, 0, 1619067396, 0, 0, 0, 3238134792, 0, 0, 0, 2181302288, 0, 0, 0, 67637280, 0, + 0, 0, 135274560, 0, 0, 0, 270549121, 0, 0, 0, 541098243, 0, 0, 0, 1082196486, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2304, + 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, + 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, 0, 0, 0, 589824, + 0, 0, 0, 1179648, 0, 0, 0, 2359296, 0, 0, 0, 4718593, 0, 0, 0, 9437186, + 0, 0, 0, 18874372, 0, 0, 0, 37748744, 0, 0, 0, 75497488, 0, 0, 0, 150994976, + 0, 0, 0, 301989953, 0, 0, 0, 603979906, 0, 0, 0, 1207959812, 0, 0, 0, 2415919624, + 0, 0, 0, 536871952, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + }, + { + 0, 0, 0, 0, 882001920, 0, 0, 0, 1764003840, 0, 0, 0, 3528007680, 0, 0, 0, + 2761048065, 0, 0, 0, 1227128834, 0, 0, 0, 2454257668, 0, 0, 0, 613548040, 0, 0, 0, + 1227096080, 0, 0, 0, 2454192160, 0, 0, 0, 613417024, 0, 0, 0, 1226834048, 0, 0, 0, + 2453668096, 0, 0, 0, 612368896, 0, 0, 0, 1224737792, 0, 0, 0, 2449475584, 0, 0, 0, + 603983872, 0, 0, 0, 1207967744, 0, 0, 0, 2415935489, 0, 0, 0, 536903682, 0, 0, 0, + 1073807364, 0, 0, 0, 2147614729, 0, 0, 0, 262162, 0, 0, 0, 524324, 0, 0, 0, + 1048648, 0, 0, 0, 2097296, 0, 0, 0, 886196512, 0, 0, 0, 1772393024, 0, 0, 0, + 3544786048, 0, 0, 0, 2794604800, 0, 0, 0, 1294242304, 0, 0, 0, 2588484608, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, + 0, 32, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, + 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, + 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, + 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, + 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, + 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, + 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483652, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, + 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, 0, 0, 262152, 0, + 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, + 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67108864, 0, + 0, 0, 134217728, 0, 0, 0, 268435457, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, + 0, 0, 2147483656, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 64, 0, + 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8390658, + 0, 0, 0, 16781316, 0, 0, 0, 33562633, 0, 0, 0, 67125266, 0, 0, 0, 134250532, + 0, 0, 0, 268501064, 0, 0, 0, 537002128, 0, 0, 0, 1074004256, 0, 0, 0, 2148008512, + 0, 0, 0, 1049728, 0, 0, 0, 2099456, 0, 0, 0, 4198913, 0, 0, 0, 8397826, + 0, 0, 0, 16795652, 0, 0, 0, 33591305, 0, 0, 0, 67182610, 0, 0, 0, 134365220, + 0, 0, 0, 268730440, 0, 0, 0, 537460880, 0, 0, 0, 1074921760, 0, 0, 0, 2149843520, + 0, 0, 0, 4719745, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195329, + }, + { + 0, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, + 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, + 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, + 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, + 33554433, 0, 0, 0, 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, + 536870928, 0, 0, 0, 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, + 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 131092, 0, 0, 0, 262184, 0, 0, 0, 524368, 0, 0, 0, 1048736, 0, + 0, 0, 2097472, 0, 0, 0, 4194944, 0, 0, 0, 8389888, 0, 0, 0, 16779776, 0, + 0, 0, 33559552, 0, 0, 0, 67119104, 0, 0, 0, 134238208, 0, 0, 0, 268476416, 0, + 0, 0, 536952832, 0, 0, 0, 1073905665, 0, 0, 0, 2147811330, 0, 0, 0, 524304, 0, + 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, 0, 0, 8388864, 0, + 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67110912, 0, 0, 0, 134221824, 0, + 0, 0, 268443649, 0, 0, 0, 536887298, 0, 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144703526, + 0, 0, 0, 289407053, 0, 0, 0, 578814107, 0, 0, 0, 1157628214, 0, 0, 0, 2315256429, + 0, 0, 0, 335545562, 0, 0, 0, 671091124, 0, 0, 0, 1342182248, 0, 0, 0, 2684364496, + 0, 0, 0, 1073761696, 0, 0, 0, 2147523392, 0, 0, 0, 79488, 0, 0, 0, 158976, + 0, 0, 0, 317952, 0, 0, 0, 635904, 0, 0, 0, 1271808, 0, 0, 0, 2543616, + 0, 0, 0, 5087233, 0, 0, 0, 10174466, 0, 0, 0, 20348932, 0, 0, 0, 40697864, + 0, 0, 0, 81395729, 0, 0, 0, 18087940, 0, 0, 0, 36175881, 0, 0, 0, 72351763, + }, + { + 0, 0, 0, 0, 4160, 0, 0, 0, 8320, 0, 0, 0, 16640, 0, 0, 0, + 33280, 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, + 532480, 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259840, 0, 0, 0, + 8519680, 0, 0, 0, 17039360, 0, 0, 0, 34078721, 0, 0, 0, 68157442, 0, 0, 0, + 136314884, 0, 0, 0, 272629768, 0, 0, 0, 545259536, 0, 0, 0, 1090519072, 0, 0, 0, + 2181038144, 0, 0, 0, 67108992, 0, 0, 0, 134217984, 0, 0, 0, 268435968, 0, 0, 0, + 536871936, 0, 0, 0, 1073743872, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 335546385, 0, 0, 0, 671092770, 0, 0, 0, 1342185541, 0, 0, 0, 2684371082, 0, + 0, 0, 1073774869, 0, 0, 0, 2147549738, 0, 0, 0, 132180, 0, 0, 0, 264360, 0, + 0, 0, 528720, 0, 0, 0, 1057440, 0, 0, 0, 2114880, 0, 0, 0, 4229761, 0, + 0, 0, 8459522, 0, 0, 0, 16919044, 0, 0, 0, 33838088, 0, 0, 0, 269000704, 0, + 0, 0, 538001408, 0, 0, 0, 1076002817, 0, 0, 0, 2152005634, 0, 0, 0, 9043972, 0, + 0, 0, 18087944, 0, 0, 0, 36175888, 0, 0, 0, 72351776, 0, 0, 0, 144703552, 0, + 0, 0, 289407105, 0, 0, 0, 578814210, 0, 0, 0, 1157628420, 0, 0, 0, 2315256840, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135049252, + 0, 0, 0, 270098504, 0, 0, 0, 540197008, 0, 0, 0, 1080394017, 0, 0, 0, 2160788035, + 0, 0, 0, 26608774, 0, 0, 0, 53217549, 0, 0, 0, 106435098, 0, 0, 0, 212870196, + 0, 0, 0, 425740393, 0, 0, 0, 851480786, 0, 0, 0, 1702961572, 0, 0, 0, 3405923145, + 0, 0, 0, 2516878995, 0, 0, 0, 738790694, 0, 0, 0, 1477581388, 0, 0, 0, 2955162776, + 0, 0, 0, 1615358257, 0, 0, 0, 3230716514, 0, 0, 0, 2166465732, 0, 0, 0, 37964168, + 0, 0, 0, 75928336, 0, 0, 0, 16881156, 0, 0, 0, 33762313, 0, 0, 0, 67524626, + }, + { + 0, 0, 0, 0, 3162112, 0, 0, 0, 6324224, 0, 0, 0, 12648448, 0, 0, 0, + 25296896, 0, 0, 0, 50593793, 0, 0, 0, 101187587, 0, 0, 0, 202375174, 0, 0, 0, + 404750348, 0, 0, 0, 809500696, 0, 0, 0, 1619001392, 0, 0, 0, 3238002785, 0, 0, 0, + 2181038274, 0, 0, 0, 67109252, 0, 0, 0, 134218504, 0, 0, 0, 268437008, 0, 0, 0, + 536874016, 0, 0, 0, 1073748032, 0, 0, 0, 2147496065, 0, 0, 0, 24834, 0, 0, 0, + 49668, 0, 0, 0, 99336, 0, 0, 0, 198672, 0, 0, 0, 397344, 0, 0, 0, + 794688, 0, 0, 0, 1589376, 0, 0, 0, 49408, 0, 0, 0, 98816, 0, 0, 0, + 197632, 0, 0, 0, 395264, 0, 0, 0, 790528, 0, 0, 0, 1581056, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8704, 0, 0, + 0, 17408, 0, 0, 0, 34816, 0, 0, 0, 69632, 0, 0, 0, 139264, 0, 0, + 0, 278528, 0, 0, 0, 557056, 0, 0, 0, 1114112, 0, 0, 0, 2228224, 0, 0, + 0, 4456448, 0, 0, 0, 8912896, 0, 0, 0, 17825792, 0, 0, 0, 35651584, 0, 0, + 0, 71303168, 0, 0, 0, 142606337, 0, 0, 0, 285212674, 0, 0, 0, 570425349, 0, 0, + 0, 1140850698, 0, 0, 0, 2281701397, 0, 0, 0, 268435498, 0, 0, 0, 536870997, 0, 0, + 0, 1073741994, 0, 0, 0, 2147483988, 0, 0, 0, 680, 0, 0, 0, 1360, 0, 0, + 0, 2720, 0, 0, 0, 5440, 0, 0, 0, 2176, 0, 0, 0, 4352, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 539164932, 0, 0, 0, 1078329864, 0, 0, 0, 2156659728, 0, 0, 0, 18352160, 0, + 0, 0, 36704320, 0, 0, 0, 73408640, 0, 0, 0, 146817280, 0, 0, 0, 293634560, 0, + 0, 0, 587269120, 0, 0, 0, 1174538240, 0, 0, 0, 2349076480, 0, 0, 0, 403185665, 0, + 0, 0, 806371331, 0, 0, 0, 1612742663, 0, 0, 0, 3225485326, 0, 0, 0, 2694906136, 0, + 0, 0, 1094844976, 0, 0, 0, 2189689952, 0, 0, 0, 84412608, 0, 0, 0, 168825216, 0, + 0, 0, 337650433, 0, 0, 0, 675300866, 0, 0, 0, 1350601732, 0, 0, 0, 2701203464, 0, + 0, 0, 1107439632, 0, 0, 0, 2214879264, 0, 0, 0, 134791233, 0, 0, 0, 269582466, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3961969350, + 0, 0, 0, 3628971405, 0, 0, 0, 2962975514, 0, 0, 0, 1630983732, 0, 0, 0, 3261967464, + 0, 0, 0, 2228967633, 0, 0, 0, 162967970, 0, 0, 0, 325935940, 0, 0, 0, 651871880, + 0, 0, 0, 1303743760, 0, 0, 0, 2607487520, 0, 0, 0, 920007744, 0, 0, 0, 1840015488, + 0, 0, 0, 3680030976, 0, 0, 0, 3065094657, 0, 0, 0, 1835222019, 0, 0, 0, 3670444038, + 0, 0, 0, 3045920780, 0, 0, 0, 1796874265, 0, 0, 0, 3593748531, 0, 0, 0, 2892529767, + 0, 0, 0, 1490092239, 0, 0, 0, 1568987992, 0, 0, 0, 3137975985, 0, 0, 0, 1980984675, + }, + { + 0, 0, 0, 0, 2554888269, 0, 0, 0, 814809242, 0, 0, 0, 1629618484, 0, 0, 0, + 3259236968, 0, 0, 0, 2223506641, 0, 0, 0, 152045986, 0, 0, 0, 304091973, 0, 0, 0, + 608183946, 0, 0, 0, 1216367892, 0, 0, 0, 2432735785, 0, 0, 0, 570504275, 0, 0, 0, + 1141008550, 0, 0, 0, 2282017101, 0, 0, 0, 269066906, 0, 0, 0, 538133812, 0, 0, 0, + 1076267624, 0, 0, 0, 2152535249, 0, 0, 0, 10103202, 0, 0, 0, 20206404, 0, 0, 0, + 40412809, 0, 0, 0, 80825618, 0, 0, 0, 161651236, 0, 0, 0, 323302473, 0, 0, 0, + 646604947, 0, 0, 0, 1293209894, 0, 0, 0, 39920129, 0, 0, 0, 79840258, 0, 0, 0, + 159680516, 0, 0, 0, 319361033, 0, 0, 0, 638722067, 0, 0, 0, 1277444134, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 134299649, 0, 0, + 0, 268599298, 0, 0, 0, 537198597, 0, 0, 0, 1074397194, 0, 0, 0, 2148794388, 0, 0, + 0, 2621480, 0, 0, 0, 5242960, 0, 0, 0, 10485920, 0, 0, 0, 20971840, 0, 0, + 0, 41943680, 0, 0, 0, 83887360, 0, 0, 0, 167774721, 0, 0, 0, 335549442, 0, 0, + 0, 671098884, 0, 0, 0, 1342197768, 0, 0, 0, 2684395537, 0, 0, 0, 1073823778, 0, 0, + 0, 2147647556, 0, 0, 0, 327816, 0, 0, 0, 655632, 0, 0, 0, 1311264, 0, 0, + 0, 2622528, 0, 0, 0, 5245056, 0, 0, 0, 10490112, 0, 0, 0, 20980224, 0, 0, + 0, 41960448, 0, 0, 0, 83920896, 0, 0, 0, 33574912, 0, 0, 0, 67149824, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 490817699, 0, 0, 0, 981635398, 0, 0, 0, 1963270797, 0, 0, 0, 3926541594, 0, + 0, 0, 3558115892, 0, 0, 0, 2821264488, 0, 0, 0, 1347561681, 0, 0, 0, 2695123362, 0, + 0, 0, 1095279429, 0, 0, 0, 2190558858, 0, 0, 0, 86150421, 0, 0, 0, 172300842, 0, + 0, 0, 344601685, 0, 0, 0, 689203370, 0, 0, 0, 1378406740, 0, 0, 0, 3104893450, 0, + 0, 0, 1914819604, 0, 0, 0, 3829639209, 0, 0, 0, 3364311122, 0, 0, 0, 2433654948, 0, + 0, 0, 572342600, 0, 0, 0, 1144685201, 0, 0, 0, 2289370402, 0, 0, 0, 283773509, 0, + 0, 0, 567547018, 0, 0, 0, 1135094036, 0, 0, 0, 2270188072, 0, 0, 0, 245408849, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3548126500, + 0, 0, 0, 2801285704, 0, 0, 0, 1307604113, 0, 0, 0, 2615208226, 0, 0, 0, 935449156, + 0, 0, 0, 1870898313, 0, 0, 0, 3741796627, 0, 0, 0, 3188625959, 0, 0, 0, 2082284622, + 0, 0, 0, 4164569244, 0, 0, 0, 4034171193, 0, 0, 0, 3773375091, 0, 0, 0, 3251782887, + 0, 0, 0, 2208598479, 0, 0, 0, 122229662, 0, 0, 0, 244459325, 0, 0, 0, 488918650, + 0, 0, 0, 977837300, 0, 0, 0, 1955674600, 0, 0, 0, 3911349200, 0, 0, 0, 3527731104, + 0, 0, 0, 2760494912, 0, 0, 0, 2590999460, 0, 0, 0, 887031625, 0, 0, 0, 1774063250, + }, + { + 0, 0, 0, 0, 1164254896, 0, 0, 0, 2328509792, 0, 0, 0, 362052288, 0, 0, 0, + 724104577, 0, 0, 0, 1448209155, 0, 0, 0, 2896418311, 0, 0, 0, 1497869326, 0, 0, 0, + 2995738652, 0, 0, 0, 1696510008, 0, 0, 0, 3393020016, 0, 0, 0, 2491072737, 0, 0, 0, + 687178178, 0, 0, 0, 1374356356, 0, 0, 0, 2748712712, 0, 0, 0, 1202458129, 0, 0, 0, + 2404916258, 0, 0, 0, 514865221, 0, 0, 0, 1029730442, 0, 0, 0, 2059460885, 0, 0, 0, + 4118921771, 0, 0, 0, 3942876246, 0, 0, 0, 3590785196, 0, 0, 0, 2886603097, 0, 0, 0, + 1478238898, 0, 0, 0, 2956477797, 0, 0, 0, 622171258, 0, 0, 0, 1244342517, 0, 0, 0, + 2488685035, 0, 0, 0, 682402774, 0, 0, 0, 1364805548, 0, 0, 0, 2729611096, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336678914, 0, 0, + 0, 673357828, 0, 0, 0, 1346715656, 0, 0, 0, 2693431313, 0, 0, 0, 1091895330, 0, 0, + 0, 2183790660, 0, 0, 0, 72614024, 0, 0, 0, 145228049, 0, 0, 0, 290456098, 0, 0, + 0, 580912197, 0, 0, 0, 1161824394, 0, 0, 0, 2323648789, 0, 0, 0, 352330282, 0, 0, + 0, 704660564, 0, 0, 0, 1409321128, 0, 0, 0, 2818642256, 0, 0, 0, 1342317216, 0, 0, + 0, 2684634433, 0, 0, 0, 1074301570, 0, 0, 0, 2148603140, 0, 0, 0, 2238984, 0, 0, + 0, 4477968, 0, 0, 0, 8955936, 0, 0, 0, 17911872, 0, 0, 0, 35823744, 0, 0, + 0, 71647488, 0, 0, 0, 143294977, 0, 0, 0, 84169728, 0, 0, 0, 168339457, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2541187255, 0, 0, 0, 787407215, 0, 0, 0, 1574814430, 0, 0, 0, 3149628860, 0, + 0, 0, 2004290425, 0, 0, 0, 4008580850, 0, 0, 0, 3722194405, 0, 0, 0, 3149421515, 0, + 0, 0, 2003875734, 0, 0, 0, 4007751468, 0, 0, 0, 3720535640, 0, 0, 0, 3146103984, 0, + 0, 0, 1997240673, 0, 0, 0, 3994481347, 0, 0, 0, 3693995398, 0, 0, 0, 791460795, 0, + 0, 0, 1582921591, 0, 0, 0, 3165843182, 0, 0, 0, 2036719068, 0, 0, 0, 4073438136, 0, + 0, 0, 3851908976, 0, 0, 0, 3408850657, 0, 0, 0, 2522734018, 0, 0, 0, 750500741, 0, + 0, 0, 1501001483, 0, 0, 0, 3002002966, 0, 0, 0, 1709038637, 0, 0, 0, 3418077275, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357270623, + 0, 0, 0, 714541247, 0, 0, 0, 1429082494, 0, 0, 0, 2858164988, 0, 0, 0, 1421362680, + 0, 0, 0, 2842725361, 0, 0, 0, 1390483426, 0, 0, 0, 2780966853, 0, 0, 0, 1266966411, + 0, 0, 0, 2533932823, 0, 0, 0, 772898351, 0, 0, 0, 1545796702, 0, 0, 0, 3091593405, + 0, 0, 0, 1888219514, 0, 0, 0, 3776439028, 0, 0, 0, 3257910761, 0, 0, 0, 2220854227, + 0, 0, 0, 146741158, 0, 0, 0, 293482317, 0, 0, 0, 586964634, 0, 0, 0, 1173929269, + 0, 0, 0, 2347858538, 0, 0, 0, 44658827, 0, 0, 0, 89317655, 0, 0, 0, 178635311, + }, + { + 0, 0, 0, 0, 126298325, 0, 0, 0, 252596651, 0, 0, 0, 505193303, 0, 0, 0, + 1010386606, 0, 0, 0, 2020773212, 0, 0, 0, 4041546425, 0, 0, 0, 3788125555, 0, 0, 0, + 3281283814, 0, 0, 0, 2267600332, 0, 0, 0, 240233369, 0, 0, 0, 480466738, 0, 0, 0, + 960933476, 0, 0, 0, 1921866953, 0, 0, 0, 3843733907, 0, 0, 0, 3392500518, 0, 0, 0, + 2490033741, 0, 0, 0, 685100186, 0, 0, 0, 1370200372, 0, 0, 0, 2740400744, 0, 0, 0, + 1185834193, 0, 0, 0, 2371668387, 0, 0, 0, 448369479, 0, 0, 0, 896738958, 0, 0, 0, + 1793477917, 0, 0, 0, 3586955835, 0, 0, 0, 2887654563, 0, 0, 0, 1480341830, 0, 0, 0, + 2960683661, 0, 0, 0, 1626400026, 0, 0, 0, 3252800053, 0, 0, 0, 2210632810, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339960066, 0, 0, + 0, 679920132, 0, 0, 0, 1359840264, 0, 0, 0, 2719680529, 0, 0, 0, 1144393762, 0, 0, + 0, 2288787525, 0, 0, 0, 282607754, 0, 0, 0, 565215509, 0, 0, 0, 1130431018, 0, 0, + 0, 2260862036, 0, 0, 0, 226756777, 0, 0, 0, 453513555, 0, 0, 0, 907027111, 0, 0, + 0, 1814054222, 0, 0, 0, 3628108445, 0, 0, 0, 2961249595, 0, 0, 0, 1627531895, 0, 0, + 0, 3255063790, 0, 0, 0, 2215160284, 0, 0, 0, 135353273, 0, 0, 0, 270706546, 0, 0, + 0, 541413093, 0, 0, 0, 1082826186, 0, 0, 0, 2165652372, 0, 0, 0, 36337448, 0, 0, + 0, 72674896, 0, 0, 0, 145349793, 0, 0, 0, 84990016, 0, 0, 0, 169980033, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2614536692, 0, 0, 0, 934106089, 0, 0, 0, 1868212179, 0, 0, 0, 3736424359, 0, + 0, 0, 3177881422, 0, 0, 0, 2060795549, 0, 0, 0, 4121591099, 0, 0, 0, 3948214903, 0, + 0, 0, 3601462510, 0, 0, 0, 2907957725, 0, 0, 0, 1520948154, 0, 0, 0, 3041896308, 0, + 0, 0, 1788825320, 0, 0, 0, 3577650640, 0, 0, 0, 2860333984, 0, 0, 0, 3475824309, 0, + 0, 0, 2656681322, 0, 0, 0, 1018395349, 0, 0, 0, 2036790698, 0, 0, 0, 4073581396, 0, + 0, 0, 3852195497, 0, 0, 0, 3409423699, 0, 0, 0, 2523880103, 0, 0, 0, 752792911, 0, + 0, 0, 1505585823, 0, 0, 0, 3011171646, 0, 0, 0, 1727375997, 0, 0, 0, 3454751994, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3265725291, + 0, 0, 0, 2236483287, 0, 0, 0, 177999279, 0, 0, 0, 355998558, 0, 0, 0, 711997116, + 0, 0, 0, 1423994233, 0, 0, 0, 2847988467, 0, 0, 0, 1401009639, 0, 0, 0, 2802019279, + 0, 0, 0, 1309071263, 0, 0, 0, 2618142526, 0, 0, 0, 941317756, 0, 0, 0, 1882635512, + 0, 0, 0, 3765271025, 0, 0, 0, 3235574755, 0, 0, 0, 2176182214, 0, 0, 0, 57397132, + 0, 0, 0, 114794264, 0, 0, 0, 229588528, 0, 0, 0, 459177056, 0, 0, 0, 918354113, + 0, 0, 0, 1836708227, 0, 0, 0, 408215661, 0, 0, 0, 816431322, 0, 0, 0, 1632862645, + }, + { + 0, 0, 0, 0, 3480795388, 0, 0, 0, 2666623480, 0, 0, 0, 1038279664, 0, 0, 0, + 2076559329, 0, 0, 0, 4153118658, 0, 0, 0, 4011270020, 0, 0, 0, 3727572744, 0, 0, 0, + 3160178193, 0, 0, 0, 2025389090, 0, 0, 0, 4050778181, 0, 0, 0, 3806589066, 0, 0, 0, + 3318210837, 0, 0, 0, 2341454378, 0, 0, 0, 387941461, 0, 0, 0, 775882923, 0, 0, 0, + 1551765846, 0, 0, 0, 3103531693, 0, 0, 0, 1912096090, 0, 0, 0, 3824192180, 0, 0, 0, + 3353417064, 0, 0, 0, 2411866832, 0, 0, 0, 528766369, 0, 0, 0, 1057532739, 0, 0, 0, + 2115065479, 0, 0, 0, 4230130959, 0, 0, 0, 926802659, 0, 0, 0, 1853605319, 0, 0, 0, + 3707210639, 0, 0, 0, 3119453983, 0, 0, 0, 1943940671, 0, 0, 0, 3887881342, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1457526048, 0, 0, + 0, 2915052096, 0, 0, 0, 1535136897, 0, 0, 0, 3070273795, 0, 0, 0, 1845580294, 0, 0, + 0, 3691160589, 0, 0, 0, 3087353882, 0, 0, 0, 1879740469, 0, 0, 0, 3759480939, 0, 0, + 0, 3223994582, 0, 0, 0, 2153021868, 0, 0, 0, 11076440, 0, 0, 0, 22152880, 0, 0, + 0, 44305760, 0, 0, 0, 88611520, 0, 0, 0, 177223041, 0, 0, 0, 354446082, 0, 0, + 0, 708892164, 0, 0, 0, 1417784328, 0, 0, 0, 2835568656, 0, 0, 0, 1376170016, 0, 0, + 0, 2752340033, 0, 0, 0, 1209712771, 0, 0, 0, 2419425542, 0, 0, 0, 543883789, 0, 0, + 0, 1087767578, 0, 0, 0, 2175535156, 0, 0, 0, 1438123336, 0, 0, 0, 2876246672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2983242506, 0, 0, 0, 1671517716, 0, 0, 0, 3343035433, 0, 0, 0, 2391103570, 0, + 0, 0, 487239844, 0, 0, 0, 974479689, 0, 0, 0, 1948959378, 0, 0, 0, 3897918757, 0, + 0, 0, 3500870219, 0, 0, 0, 2706773142, 0, 0, 0, 1118578988, 0, 0, 0, 2237157976, 0, + 0, 0, 179348657, 0, 0, 0, 358697315, 0, 0, 0, 717394631, 0, 0, 0, 3830812293, 0, + 0, 0, 3366657290, 0, 0, 0, 2438347285, 0, 0, 0, 581727274, 0, 0, 0, 1163454549, 0, + 0, 0, 2326909099, 0, 0, 0, 358850902, 0, 0, 0, 717701804, 0, 0, 0, 1435403608, 0, + 0, 0, 2870807216, 0, 0, 0, 1446647137, 0, 0, 0, 2893294274, 0, 0, 0, 1491621253, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3612118070, + 0, 0, 0, 2929268845, 0, 0, 0, 1563570394, 0, 0, 0, 3127140788, 0, 0, 0, 1959314281, + 0, 0, 0, 3918628562, 0, 0, 0, 3542289829, 0, 0, 0, 2789612362, 0, 0, 0, 1284257428, + 0, 0, 0, 2568514856, 0, 0, 0, 842062417, 0, 0, 0, 1684124835, 0, 0, 0, 3368249671, + 0, 0, 0, 2441532046, 0, 0, 0, 588096797, 0, 0, 0, 1176193595, 0, 0, 0, 2352387190, + 0, 0, 0, 409807085, 0, 0, 0, 819614171, 0, 0, 0, 1639228342, 0, 0, 0, 3278456684, + 0, 0, 0, 2261946072, 0, 0, 0, 3672740230, 0, 0, 0, 3050513165, 0, 0, 0, 1806059035, + }, + { + 0, 0, 0, 0, 460238757, 0, 0, 0, 920477515, 0, 0, 0, 1840955030, 0, 0, 0, + 3681910060, 0, 0, 0, 3068852824, 0, 0, 0, 1842738352, 0, 0, 0, 3685476704, 0, 0, 0, + 3075986112, 0, 0, 0, 1857004929, 0, 0, 0, 3714009859, 0, 0, 0, 3133052422, 0, 0, 0, + 1971137548, 0, 0, 0, 3942275096, 0, 0, 0, 3589582897, 0, 0, 0, 2884198498, 0, 0, 0, + 1473429701, 0, 0, 0, 2946859402, 0, 0, 0, 1598751509, 0, 0, 0, 3197503018, 0, 0, 0, + 2100038740, 0, 0, 0, 4200077480, 0, 0, 0, 4105187665, 0, 0, 0, 3915408035, 0, 0, 0, + 3535848774, 0, 0, 0, 2776730253, 0, 0, 0, 1349368510, 0, 0, 0, 2698737021, 0, 0, 0, + 1102506746, 0, 0, 0, 2205013492, 0, 0, 0, 115059689, 0, 0, 0, 230119378, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4124527681, 0, 0, + 0, 3954088066, 0, 0, 0, 3613208836, 0, 0, 0, 2931450376, 0, 0, 0, 1567933457, 0, 0, + 0, 3135866914, 0, 0, 0, 1976766533, 0, 0, 0, 3953533066, 0, 0, 0, 3612098836, 0, 0, + 0, 2929230376, 0, 0, 0, 1563493457, 0, 0, 0, 3126986914, 0, 0, 0, 1959006533, 0, 0, + 0, 3918013066, 0, 0, 0, 3541058836, 0, 0, 0, 2787150377, 0, 0, 0, 1279333459, 0, 0, + 0, 2558666919, 0, 0, 0, 822366543, 0, 0, 0, 1644733087, 0, 0, 0, 3289466174, 0, 0, + 0, 2283965053, 0, 0, 0, 272962810, 0, 0, 0, 545925621, 0, 0, 0, 1091851242, 0, 0, + 0, 2183702484, 0, 0, 0, 72437672, 0, 0, 0, 4252357392, 0, 0, 0, 4209747488, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2797805596, 0, 0, 0, 1300643896, 0, 0, 0, 2601287792, 0, 0, 0, 907608289, 0, + 0, 0, 1815216578, 0, 0, 0, 3630433157, 0, 0, 0, 2965899019, 0, 0, 0, 1636830742, 0, + 0, 0, 3273661484, 0, 0, 0, 2252355672, 0, 0, 0, 209744048, 0, 0, 0, 419488096, 0, + 0, 0, 838976192, 0, 0, 0, 1677952385, 0, 0, 0, 3355904770, 0, 0, 0, 919418393, 0, + 0, 0, 1838836786, 0, 0, 0, 3677673572, 0, 0, 0, 3060379848, 0, 0, 0, 1825792400, 0, + 0, 0, 3651584800, 0, 0, 0, 3008202304, 0, 0, 0, 1721437312, 0, 0, 0, 3442874624, 0, + 0, 0, 2590781953, 0, 0, 0, 886596611, 0, 0, 0, 1773193223, 0, 0, 0, 3546386446, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 490940283, + 0, 0, 0, 981880567, 0, 0, 0, 1963761134, 0, 0, 0, 3927522269, 0, 0, 0, 3560077242, + 0, 0, 0, 2825187189, 0, 0, 0, 1355407083, 0, 0, 0, 2710814166, 0, 0, 0, 1126661037, + 0, 0, 0, 2253322074, 0, 0, 0, 211676852, 0, 0, 0, 423353704, 0, 0, 0, 846707408, + 0, 0, 0, 1693414817, 0, 0, 0, 3386829635, 0, 0, 0, 2478691975, 0, 0, 0, 662416654, + 0, 0, 0, 1324833308, 0, 0, 0, 2649666617, 0, 0, 0, 1004365938, 0, 0, 0, 2008731877, + 0, 0, 0, 4017463754, 0, 0, 0, 3282593007, 0, 0, 0, 2270218718, 0, 0, 0, 245470141, + }, + { + 0, 0, 0, 0, 1980210557, 0, 0, 0, 3960421115, 0, 0, 0, 3625874935, 0, 0, 0, + 2956782575, 0, 0, 0, 1618597854, 0, 0, 0, 3237195709, 0, 0, 0, 2179424123, 0, 0, 0, + 63880951, 0, 0, 0, 127761903, 0, 0, 0, 255523807, 0, 0, 0, 511047615, 0, 0, 0, + 1022095230, 0, 0, 0, 2044190460, 0, 0, 0, 4088380920, 0, 0, 0, 3881794544, 0, 0, 0, + 3468621792, 0, 0, 0, 2642276289, 0, 0, 0, 989585283, 0, 0, 0, 1979170566, 0, 0, 0, + 3958341132, 0, 0, 0, 3621714968, 0, 0, 0, 2948462640, 0, 0, 0, 1601957985, 0, 0, 0, + 3203915970, 0, 0, 0, 2112864644, 0, 0, 0, 2379751029, 0, 0, 0, 464534763, 0, 0, 0, + 929069527, 0, 0, 0, 1858139055, 0, 0, 0, 3716278111, 0, 0, 0, 3137588926, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3560175108, 0, 0, + 0, 2825382920, 0, 0, 0, 1355798544, 0, 0, 0, 2711597089, 0, 0, 0, 1128226882, 0, 0, + 0, 2256453764, 0, 0, 0, 217940233, 0, 0, 0, 435880467, 0, 0, 0, 871760935, 0, 0, + 0, 1743521871, 0, 0, 0, 3487043743, 0, 0, 0, 2679120191, 0, 0, 0, 1063273086, 0, 0, + 0, 2126546172, 0, 0, 0, 4253092344, 0, 0, 0, 4211217392, 0, 0, 0, 4127467489, 0, 0, + 0, 3959967682, 0, 0, 0, 3624968069, 0, 0, 0, 2954968843, 0, 0, 0, 1614970391, 0, 0, + 0, 3229940782, 0, 0, 0, 2164914268, 0, 0, 0, 34861240, 0, 0, 0, 69722480, 0, 0, + 0, 139444961, 0, 0, 0, 278889922, 0, 0, 0, 4111269249, 0, 0, 0, 3927571202, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3324387232, 0, 0, 0, 2353807168, 0, 0, 0, 412647041, 0, 0, 0, 825294082, 0, + 0, 0, 1650588165, 0, 0, 0, 3301176331, 0, 0, 0, 2307385367, 0, 0, 0, 319803438, 0, + 0, 0, 639606877, 0, 0, 0, 1279213754, 0, 0, 0, 2558427508, 0, 0, 0, 821887721, 0, + 0, 0, 1643775442, 0, 0, 0, 3287550884, 0, 0, 0, 2280134472, 0, 0, 0, 3388354864, 0, + 0, 0, 2481742433, 0, 0, 0, 668517571, 0, 0, 0, 1337035143, 0, 0, 0, 2674070287, 0, + 0, 0, 1053173279, 0, 0, 0, 2106346559, 0, 0, 0, 4212693118, 0, 0, 0, 4130418941, 0, + 0, 0, 3965870586, 0, 0, 0, 3636773876, 0, 0, 0, 2978580456, 0, 0, 0, 1662193616, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42029323, + 0, 0, 0, 84058646, 0, 0, 0, 168117293, 0, 0, 0, 336234586, 0, 0, 0, 672469172, + 0, 0, 0, 1344938344, 0, 0, 0, 2689876689, 0, 0, 0, 1084786082, 0, 0, 0, 2169572165, + 0, 0, 0, 44177035, 0, 0, 0, 88354071, 0, 0, 0, 176708143, 0, 0, 0, 353416286, + 0, 0, 0, 706832573, 0, 0, 0, 1413665147, 0, 0, 0, 2827330294, 0, 0, 0, 1359693292, + 0, 0, 0, 2719386585, 0, 0, 0, 1143805874, 0, 0, 0, 2287611749, 0, 0, 0, 280256202, + 0, 0, 0, 560512405, 0, 0, 0, 1078995489, 0, 0, 0, 2157990978, 0, 0, 0, 21014661, + }, + { + 0, 0, 0, 0, 893107048, 0, 0, 0, 1786214097, 0, 0, 0, 3572428195, 0, 0, 0, + 2849889095, 0, 0, 0, 1404810895, 0, 0, 0, 2809621790, 0, 0, 0, 1324276285, 0, 0, 0, + 2648552571, 0, 0, 0, 1002137847, 0, 0, 0, 2004275695, 0, 0, 0, 4008551390, 0, 0, 0, + 3722135485, 0, 0, 0, 3149303674, 0, 0, 0, 2003640053, 0, 0, 0, 4007280106, 0, 0, 0, + 3719592917, 0, 0, 0, 3144218538, 0, 0, 0, 1993469781, 0, 0, 0, 3986939563, 0, 0, 0, + 3678911830, 0, 0, 0, 3062856364, 0, 0, 0, 1830745432, 0, 0, 0, 3661490864, 0, 0, 0, + 3028014433, 0, 0, 0, 1761061570, 0, 0, 0, 3839160045, 0, 0, 0, 3383352795, 0, 0, 0, + 2471738294, 0, 0, 0, 648509293, 0, 0, 0, 1297018586, 0, 0, 0, 2594037172, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3625984309, 0, 0, + 0, 2957001323, 0, 0, 0, 1619035351, 0, 0, 0, 3238070702, 0, 0, 0, 2181174108, 0, 0, + 0, 67380920, 0, 0, 0, 134761841, 0, 0, 0, 269523682, 0, 0, 0, 539047365, 0, 0, + 0, 1078094730, 0, 0, 0, 2156189460, 0, 0, 0, 17411624, 0, 0, 0, 34823248, 0, 0, + 0, 69646496, 0, 0, 0, 139292993, 0, 0, 0, 278585986, 0, 0, 0, 557171973, 0, 0, + 0, 1114343946, 0, 0, 0, 2228687892, 0, 0, 0, 162408489, 0, 0, 0, 324816978, 0, 0, + 0, 649633957, 0, 0, 0, 1299267915, 0, 0, 0, 2598535831, 0, 0, 0, 902104367, 0, 0, + 0, 1804208734, 0, 0, 0, 3608417468, 0, 0, 0, 1980237901, 0, 0, 0, 3960475802, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 470870896, 0, 0, 0, 941741792, 0, 0, 0, 1883483584, 0, 0, 0, 3766967168, 0, + 0, 0, 3238967041, 0, 0, 0, 2182966786, 0, 0, 0, 70966277, 0, 0, 0, 141932555, 0, + 0, 0, 283865111, 0, 0, 0, 567730223, 0, 0, 0, 1135460447, 0, 0, 0, 2270920895, 0, + 0, 0, 246874494, 0, 0, 0, 493748989, 0, 0, 0, 987497979, 0, 0, 0, 1772675207, 0, + 0, 0, 3545350414, 0, 0, 0, 2795733533, 0, 0, 0, 1296499770, 0, 0, 0, 2592999541, 0, + 0, 0, 891031787, 0, 0, 0, 1782063574, 0, 0, 0, 3564127149, 0, 0, 0, 2833287003, 0, + 0, 0, 1371606711, 0, 0, 0, 2743213422, 0, 0, 0, 1191459548, 0, 0, 0, 2382919096, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 537135248, + 0, 0, 0, 1074270496, 0, 0, 0, 2148540992, 0, 0, 0, 2114688, 0, 0, 0, 4229377, + 0, 0, 0, 8458754, 0, 0, 0, 16917508, 0, 0, 0, 33835017, 0, 0, 0, 67670034, + 0, 0, 0, 135340068, 0, 0, 0, 270680136, 0, 0, 0, 541360273, 0, 0, 0, 1082720546, + 0, 0, 0, 2165441092, 0, 0, 0, 35914889, 0, 0, 0, 71829779, 0, 0, 0, 143659558, + 0, 0, 0, 287319116, 0, 0, 0, 574638232, 0, 0, 0, 1149276464, 0, 0, 0, 2298552928, + 0, 0, 0, 302138561, 0, 0, 0, 67141906, 0, 0, 0, 134283812, 0, 0, 0, 268567624, + }, + { + 0, 0, 0, 0, 2113541784, 0, 0, 0, 4227083568, 0, 0, 0, 4159199840, 0, 0, 0, + 4023432384, 0, 0, 0, 3751897472, 0, 0, 0, 3208827648, 0, 0, 0, 2122688001, 0, 0, 0, + 4245376003, 0, 0, 0, 4195784710, 0, 0, 0, 4096602125, 0, 0, 0, 3898236955, 0, 0, 0, + 3501506615, 0, 0, 0, 2708045935, 0, 0, 0, 1121124575, 0, 0, 0, 2242249151, 0, 0, 0, + 189531007, 0, 0, 0, 379062015, 0, 0, 0, 758124030, 0, 0, 0, 1516248061, 0, 0, 0, + 3032496123, 0, 0, 0, 1770024950, 0, 0, 0, 3540049900, 0, 0, 0, 2785132504, 0, 0, 0, + 1275297712, 0, 0, 0, 2550595425, 0, 0, 0, 1308092506, 0, 0, 0, 2616185012, 0, 0, 0, + 937402729, 0, 0, 0, 1874805459, 0, 0, 0, 3749610918, 0, 0, 0, 3204254540, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99817600, 0, 0, + 0, 199635201, 0, 0, 0, 399270402, 0, 0, 0, 798540804, 0, 0, 0, 1597081609, 0, 0, + 0, 3194163218, 0, 0, 0, 2093359140, 0, 0, 0, 4186718280, 0, 0, 0, 4078469265, 0, 0, + 0, 3861971235, 0, 0, 0, 3428975175, 0, 0, 0, 2562983055, 0, 0, 0, 830998815, 0, 0, + 0, 1661997631, 0, 0, 0, 3323995262, 0, 0, 0, 2353023229, 0, 0, 0, 411079163, 0, 0, + 0, 822158327, 0, 0, 0, 1644316655, 0, 0, 0, 3288633310, 0, 0, 0, 2282299325, 0, 0, + 0, 269631354, 0, 0, 0, 539262709, 0, 0, 0, 1078525418, 0, 0, 0, 2157050836, 0, 0, + 0, 19134376, 0, 0, 0, 38268752, 0, 0, 0, 24954400, 0, 0, 0, 49908800, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 27673132, 0, 0, 0, 55346265, 0, 0, 0, 110692530, 0, 0, 0, 221385060, 0, + 0, 0, 442770121, 0, 0, 0, 885540243, 0, 0, 0, 1771080487, 0, 0, 0, 3542160975, 0, + 0, 0, 2789354654, 0, 0, 0, 1283742012, 0, 0, 0, 2567484024, 0, 0, 0, 840000753, 0, + 0, 0, 1680001507, 0, 0, 0, 3360003015, 0, 0, 0, 2425038735, 0, 0, 0, 548408626, 0, + 0, 0, 1096817252, 0, 0, 0, 2193634504, 0, 0, 0, 92301712, 0, 0, 0, 184603425, 0, + 0, 0, 369206850, 0, 0, 0, 738413700, 0, 0, 0, 1476827400, 0, 0, 0, 2953654801, 0, + 0, 0, 1612342306, 0, 0, 0, 3224684613, 0, 0, 0, 2154401931, 0, 0, 0, 13836566, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, + 0, 0, 0, 2097152, 0, 0, 0, 4194305, 0, 0, 0, 8388610, 0, 0, 0, 16777220, + 0, 0, 0, 33554441, 0, 0, 0, 67108882, 0, 0, 0, 134217764, 0, 0, 0, 268435528, + 0, 0, 0, 536871056, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + 0, 0, 0, 2304, 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, + 0, 0, 0, 36864, 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, + 0, 0, 0, 589824, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, + }, + { + 0, 0, 0, 0, 307855197, 0, 0, 0, 615710394, 0, 0, 0, 1231420788, 0, 0, 0, + 2462841576, 0, 0, 0, 630715856, 0, 0, 0, 1261431713, 0, 0, 0, 2522863426, 0, 0, 0, + 750759556, 0, 0, 0, 1501519112, 0, 0, 0, 3003038224, 0, 0, 0, 1711109152, 0, 0, 0, + 3422218304, 0, 0, 0, 2549469312, 0, 0, 0, 803971329, 0, 0, 0, 1607942659, 0, 0, 0, + 3215885318, 0, 0, 0, 2136803341, 0, 0, 0, 4273606682, 0, 0, 0, 4252246069, 0, 0, 0, + 4209524842, 0, 0, 0, 4124082389, 0, 0, 0, 3953197482, 0, 0, 0, 3611427668, 0, 0, 0, + 2927888040, 0, 0, 0, 1560808784, 0, 0, 0, 2823382525, 0, 0, 0, 1351797754, 0, 0, 0, + 2703595509, 0, 0, 0, 1112223723, 0, 0, 0, 2224447447, 0, 0, 0, 153927598, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1409835056, 0, 0, + 0, 2819670112, 0, 0, 0, 1344372928, 0, 0, 0, 2688745857, 0, 0, 0, 1082524418, 0, 0, + 0, 2165048836, 0, 0, 0, 35130376, 0, 0, 0, 70260752, 0, 0, 0, 140521505, 0, 0, + 0, 281043010, 0, 0, 0, 562086021, 0, 0, 0, 1124172042, 0, 0, 0, 2248344084, 0, 0, + 0, 201720873, 0, 0, 0, 403441747, 0, 0, 0, 806883495, 0, 0, 0, 1613766991, 0, 0, + 0, 3227533982, 0, 0, 0, 2160100668, 0, 0, 0, 25234040, 0, 0, 0, 50468080, 0, 0, + 0, 100936160, 0, 0, 0, 201872321, 0, 0, 0, 403744643, 0, 0, 0, 807489287, 0, 0, + 0, 1614978575, 0, 0, 0, 3229957150, 0, 0, 0, 3573684236, 0, 0, 0, 2852401176, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1749068055, 0, 0, 0, 3498136111, 0, 0, 0, 2701304927, 0, 0, 0, 1107642558, 0, + 0, 0, 2215285117, 0, 0, 0, 135602938, 0, 0, 0, 271205877, 0, 0, 0, 542411755, 0, + 0, 0, 1084823510, 0, 0, 0, 2169647020, 0, 0, 0, 44326744, 0, 0, 0, 88653489, 0, + 0, 0, 177306978, 0, 0, 0, 354613956, 0, 0, 0, 709227913, 0, 0, 0, 1019953669, 0, + 0, 0, 2039907338, 0, 0, 0, 4079814677, 0, 0, 0, 3864662058, 0, 0, 0, 3434356820, 0, + 0, 0, 2573746345, 0, 0, 0, 852525394, 0, 0, 0, 1705050788, 0, 0, 0, 3410101576, 0, + 0, 0, 2525235857, 0, 0, 0, 755504418, 0, 0, 0, 1511008837, 0, 0, 0, 3022017675, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33280, + 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, 532480, + 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259841, 0, 0, 0, 8519682, + 0, 0, 0, 17039364, 0, 0, 0, 34078729, 0, 0, 0, 68157458, 0, 0, 0, 136314916, + 0, 0, 0, 272629833, 0, 0, 0, 545259666, 0, 0, 0, 1090519332, 0, 0, 0, 2181038665, + 0, 0, 0, 67110034, 0, 0, 0, 134220068, 0, 0, 0, 268440136, 0, 0, 0, 536880272, + 0, 0, 0, 1073760544, 0, 0, 0, 2147487808, 0, 0, 0, 8320, 0, 0, 0, 16640, + }, + { + 0, 0, 0, 0, 1769338744, 0, 0, 0, 3538677488, 0, 0, 0, 2782387681, 0, 0, 0, + 1269808067, 0, 0, 0, 2539616134, 0, 0, 0, 784264973, 0, 0, 0, 1568529946, 0, 0, 0, + 3137059892, 0, 0, 0, 1979152488, 0, 0, 0, 3958304976, 0, 0, 0, 3621642656, 0, 0, 0, + 2948318016, 0, 0, 0, 1601668737, 0, 0, 0, 3203337474, 0, 0, 0, 2111707652, 0, 0, 0, + 4223415304, 0, 0, 0, 4151863312, 0, 0, 0, 4008759328, 0, 0, 0, 3722551361, 0, 0, 0, + 3150135426, 0, 0, 0, 2005303557, 0, 0, 0, 4010607114, 0, 0, 0, 3726246932, 0, 0, 0, + 3157526569, 0, 0, 0, 2020085842, 0, 0, 0, 2577782749, 0, 0, 0, 860598203, 0, 0, 0, + 1721196407, 0, 0, 0, 3442392815, 0, 0, 0, 2589818334, 0, 0, 0, 884669372, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1430273920, 0, 0, + 0, 2860547840, 0, 0, 0, 1426128384, 0, 0, 0, 2852256768, 0, 0, 0, 1409546240, 0, 0, + 0, 2819092480, 0, 0, 0, 1343217664, 0, 0, 0, 2686435329, 0, 0, 0, 1077903362, 0, 0, + 0, 2155806724, 0, 0, 0, 16646152, 0, 0, 0, 33292304, 0, 0, 0, 66584608, 0, 0, + 0, 133169216, 0, 0, 0, 266338433, 0, 0, 0, 532676867, 0, 0, 0, 1065353734, 0, 0, + 0, 2130707468, 0, 0, 0, 4261414936, 0, 0, 0, 4227862576, 0, 0, 0, 4160757856, 0, 0, + 0, 4026548417, 0, 0, 0, 3758129539, 0, 0, 0, 3221291782, 0, 0, 0, 2147616268, 0, 0, + 0, 265240, 0, 0, 0, 530480, 0, 0, 0, 1431310304, 0, 0, 0, 2862620608, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2164392972, 0, 0, 0, 33818648, 0, 0, 0, 67637296, 0, 0, 0, 135274592, 0, + 0, 0, 270549185, 0, 0, 0, 541098371, 0, 0, 0, 1082196742, 0, 0, 0, 2164393484, 0, + 0, 0, 33819672, 0, 0, 0, 67639344, 0, 0, 0, 135278688, 0, 0, 0, 270557377, 0, + 0, 0, 541114755, 0, 0, 0, 1082229511, 0, 0, 0, 2164459022, 0, 0, 0, 2198079504, 0, + 0, 0, 101191712, 0, 0, 0, 202383424, 0, 0, 0, 404766849, 0, 0, 0, 809533698, 0, + 0, 0, 1619067396, 0, 0, 0, 3238134792, 0, 0, 0, 2181302288, 0, 0, 0, 67637280, 0, + 0, 0, 135274560, 0, 0, 0, 270549121, 0, 0, 0, 541098243, 0, 0, 0, 1082196486, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180224, + 0, 0, 0, 360448, 0, 0, 0, 720896, 0, 0, 0, 1441792, 0, 0, 0, 2883584, + 0, 0, 0, 5767169, 0, 0, 0, 11534338, 0, 0, 0, 23068677, 0, 0, 0, 46137354, + 0, 0, 0, 92274708, 0, 0, 0, 184549417, 0, 0, 0, 369098835, 0, 0, 0, 738197670, + 0, 0, 0, 1476395340, 0, 0, 0, 2952790680, 0, 0, 0, 1610614064, 0, 0, 0, 3221228128, + 0, 0, 0, 2147488960, 0, 0, 0, 10624, 0, 0, 0, 21248, 0, 0, 0, 42496, + 0, 0, 0, 84992, 0, 0, 0, 22528, 0, 0, 0, 45056, 0, 0, 0, 90112, + }, + { + 0, 0, 0, 0, 2045181768, 0, 0, 0, 4090363536, 0, 0, 0, 3885759776, 0, 0, 0, + 3476552256, 0, 0, 0, 2658137216, 0, 0, 0, 1021307136, 0, 0, 0, 2042614272, 0, 0, 0, + 4085228544, 0, 0, 0, 3875489792, 0, 0, 0, 3456012289, 0, 0, 0, 2617057282, 0, 0, 0, + 939147269, 0, 0, 0, 1878294539, 0, 0, 0, 3756589078, 0, 0, 0, 3218210860, 0, 0, 0, + 2141454425, 0, 0, 0, 4282908850, 0, 0, 0, 4270850404, 0, 0, 0, 4246733513, 0, 0, 0, + 4198499730, 0, 0, 0, 4102032165, 0, 0, 0, 3909097035, 0, 0, 0, 3523226774, 0, 0, 0, + 2751486253, 0, 0, 0, 1208005210, 0, 0, 0, 3924270077, 0, 0, 0, 3553572858, 0, 0, 0, + 2812178420, 0, 0, 0, 1329389545, 0, 0, 0, 2658779090, 0, 0, 0, 1022590884, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4294705152, 0, 0, + 0, 4294443008, 0, 0, 0, 4293918720, 0, 0, 0, 4292870144, 0, 0, 0, 4290772992, 0, 0, + 0, 4286578688, 0, 0, 0, 4278190080, 0, 0, 0, 4261412864, 0, 0, 0, 4227858432, 0, 0, + 0, 4160749568, 0, 0, 0, 4026531841, 0, 0, 0, 3758096387, 0, 0, 0, 3221225478, 0, 0, + 0, 2147483660, 0, 0, 0, 24, 0, 0, 0, 48, 0, 0, 0, 96, 0, 0, + 0, 192, 0, 0, 0, 384, 0, 0, 0, 768, 0, 0, 0, 1536, 0, 0, + 0, 3072, 0, 0, 0, 6144, 0, 0, 0, 12288, 0, 0, 0, 24576, 0, 0, + 0, 49152, 0, 0, 0, 98304, 0, 0, 0, 4294901760, 0, 0, 0, 4294836224, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, + 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, 0, 0, 262152, 0, + 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, + 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67108864, 0, + 0, 0, 134217728, 0, 0, 0, 268435457, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, + 0, 0, 2147483656, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 64, 0, + 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37905416, + 0, 0, 0, 75810832, 0, 0, 0, 151621664, 0, 0, 0, 303243329, 0, 0, 0, 606486658, + 0, 0, 0, 1212973317, 0, 0, 0, 2425946634, 0, 0, 0, 556925972, 0, 0, 0, 1113851944, + 0, 0, 0, 2227703889, 0, 0, 0, 160440482, 0, 0, 0, 320880965, 0, 0, 0, 641761930, + 0, 0, 0, 1283523860, 0, 0, 0, 2567047720, 0, 0, 0, 839128145, 0, 0, 0, 1678256290, + 0, 0, 0, 3356512580, 0, 0, 0, 2418057864, 0, 0, 0, 541148433, 0, 0, 0, 1082296866, + 0, 0, 0, 2164593732, 0, 0, 0, 4738177, 0, 0, 0, 9476354, 0, 0, 0, 18952708, + }, + { + 0, 0, 0, 0, 698651200, 0, 0, 0, 1397302401, 0, 0, 0, 2794604802, 0, 0, 0, + 1294242308, 0, 0, 0, 2588484616, 0, 0, 0, 882001936, 0, 0, 0, 1764003872, 0, 0, 0, + 3528007744, 0, 0, 0, 2761048193, 0, 0, 0, 1227129090, 0, 0, 0, 2454258180, 0, 0, 0, + 613549064, 0, 0, 0, 1227098128, 0, 0, 0, 2454196256, 0, 0, 0, 613425216, 0, 0, 0, + 1226850432, 0, 0, 0, 2453700864, 0, 0, 0, 612434432, 0, 0, 0, 1224868864, 0, 0, 0, + 2449737728, 0, 0, 0, 604508160, 0, 0, 0, 1209016320, 0, 0, 0, 2418032641, 0, 0, 0, + 541097986, 0, 0, 0, 1082195972, 0, 0, 0, 2829488713, 0, 0, 0, 1364010130, 0, 0, 0, + 2728020260, 0, 0, 0, 1161073224, 0, 0, 0, 2322146448, 0, 0, 0, 349325600, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, + 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, + 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, + 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, + 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, + 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554432, 0, 0, + 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, 0, 536870917, 0, 0, + 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 131092, 0, 0, 0, 262184, 0, 0, 0, 524368, 0, 0, 0, 1048736, 0, + 0, 0, 2097472, 0, 0, 0, 4194944, 0, 0, 0, 8389888, 0, 0, 0, 16779776, 0, + 0, 0, 33559552, 0, 0, 0, 67119104, 0, 0, 0, 134238208, 0, 0, 0, 268476416, 0, + 0, 0, 536952832, 0, 0, 0, 1073905665, 0, 0, 0, 2147811330, 0, 0, 0, 524304, 0, + 0, 0, 1048608, 0, 0, 0, 2097216, 0, 0, 0, 4194432, 0, 0, 0, 8388864, 0, + 0, 0, 16777728, 0, 0, 0, 33555456, 0, 0, 0, 67110912, 0, 0, 0, 134221824, 0, + 0, 0, 268443649, 0, 0, 0, 536887298, 0, 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2731147994, + 0, 0, 0, 1167328692, 0, 0, 0, 2334657385, 0, 0, 0, 374347474, 0, 0, 0, 748694948, + 0, 0, 0, 1497389897, 0, 0, 0, 2994779795, 0, 0, 0, 1694592294, 0, 0, 0, 3389184589, + 0, 0, 0, 2483401882, 0, 0, 0, 671836468, 0, 0, 0, 1343672936, 0, 0, 0, 2687345872, + 0, 0, 0, 1079724449, 0, 0, 0, 2159448898, 0, 0, 0, 23930501, 0, 0, 0, 47861002, + 0, 0, 0, 95722004, 0, 0, 0, 191444008, 0, 0, 0, 382888016, 0, 0, 0, 765776032, + 0, 0, 0, 1531552064, 0, 0, 0, 341393499, 0, 0, 0, 682786998, 0, 0, 0, 1365573997, + }, + { + 0, 0, 0, 0, 1226833920, 0, 0, 0, 2453667840, 0, 0, 0, 612368384, 0, 0, 0, + 1224736768, 0, 0, 0, 2449473536, 0, 0, 0, 603979776, 0, 0, 0, 1207959552, 0, 0, 0, + 2415919105, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, 0, 0, 2147483657, 0, 0, 0, + 18, 0, 0, 0, 36, 0, 0, 0, 72, 0, 0, 0, 144, 0, 0, 0, + 288, 0, 0, 0, 576, 0, 0, 0, 1152, 0, 0, 0, 2304, 0, 0, 0, + 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, 0, 0, 0, + 73728, 0, 0, 0, 147456, 0, 0, 0, 1227128832, 0, 0, 0, 2454257664, 0, 0, 0, + 613548032, 0, 0, 0, 1227096064, 0, 0, 0, 2454192128, 0, 0, 0, 613416960, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 335546385, 0, 0, 0, 671092770, 0, 0, 0, 1342185541, 0, 0, 0, 2684371082, 0, + 0, 0, 1073774869, 0, 0, 0, 2147549738, 0, 0, 0, 132180, 0, 0, 0, 264360, 0, + 0, 0, 528720, 0, 0, 0, 1057440, 0, 0, 0, 2114880, 0, 0, 0, 4229761, 0, + 0, 0, 8459522, 0, 0, 0, 16919044, 0, 0, 0, 33838088, 0, 0, 0, 269000704, 0, + 0, 0, 538001408, 0, 0, 0, 1076002817, 0, 0, 0, 2152005634, 0, 0, 0, 9043972, 0, + 0, 0, 18087944, 0, 0, 0, 36175888, 0, 0, 0, 72351776, 0, 0, 0, 144703552, 0, + 0, 0, 289407105, 0, 0, 0, 578814210, 0, 0, 0, 1157628420, 0, 0, 0, 2315256840, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3732057110, + 0, 0, 0, 3169146925, 0, 0, 0, 2043326555, 0, 0, 0, 4086653111, 0, 0, 0, 3878338927, + 0, 0, 0, 3461710558, 0, 0, 0, 2628453820, 0, 0, 0, 961940345, 0, 0, 0, 1923880691, + 0, 0, 0, 3847761383, 0, 0, 0, 3400555471, 0, 0, 0, 2506143647, 0, 0, 0, 717319998, + 0, 0, 0, 1434639996, 0, 0, 0, 2869279993, 0, 0, 0, 1443592691, 0, 0, 0, 2887185382, + 0, 0, 0, 1479403468, 0, 0, 0, 2958806937, 0, 0, 0, 1622646578, 0, 0, 0, 3245293157, + 0, 0, 0, 2195619018, 0, 0, 0, 3687732610, 0, 0, 0, 3080497925, 0, 0, 0, 1866028555, + }, + { + 0, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, + 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554433, 0, 0, 0, + 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, 536870928, 0, 0, 0, + 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, 260, 0, 0, 0, + 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, 4160, 0, 0, 0, + 8320, 0, 0, 0, 16640, 0, 0, 0, 33280, 0, 0, 0, 66560, 0, 0, 0, + 133120, 0, 0, 0, 266240, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, + 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 655360, 0, 0, 0, 1310720, 0, 0, + 0, 2621440, 0, 0, 0, 5242880, 0, 0, 0, 10485760, 0, 0, 0, 20971520, 0, 0, + 0, 41943040, 0, 0, 0, 83886080, 0, 0, 0, 167772161, 0, 0, 0, 335544322, 0, 0, + 0, 671088644, 0, 0, 0, 1342177288, 0, 0, 0, 2684354577, 0, 0, 0, 1073741858, 0, 0, + 0, 2147483716, 0, 0, 0, 136, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 539164932, 0, 0, 0, 1078329864, 0, 0, 0, 2156659728, 0, 0, 0, 18352160, 0, + 0, 0, 36704320, 0, 0, 0, 73408640, 0, 0, 0, 146817280, 0, 0, 0, 293634560, 0, + 0, 0, 587269120, 0, 0, 0, 1174538240, 0, 0, 0, 2349076480, 0, 0, 0, 403185665, 0, + 0, 0, 806371331, 0, 0, 0, 1612742663, 0, 0, 0, 3225485326, 0, 0, 0, 2694906136, 0, + 0, 0, 1094844976, 0, 0, 0, 2189689952, 0, 0, 0, 84412608, 0, 0, 0, 168825216, 0, + 0, 0, 337650433, 0, 0, 0, 675300866, 0, 0, 0, 1350601732, 0, 0, 0, 2701203464, 0, + 0, 0, 1107439632, 0, 0, 0, 2214879264, 0, 0, 0, 134791233, 0, 0, 0, 269582466, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2033130968, + 0, 0, 0, 4066261936, 0, 0, 0, 3837556576, 0, 0, 0, 3380145857, 0, 0, 0, 2465324418, + 0, 0, 0, 635681541, 0, 0, 0, 1271363082, 0, 0, 0, 2542726165, 0, 0, 0, 790485035, + 0, 0, 0, 1580970071, 0, 0, 0, 3161940143, 0, 0, 0, 2028912991, 0, 0, 0, 4057825983, + 0, 0, 0, 3820684671, 0, 0, 0, 3346402046, 0, 0, 0, 2397836796, 0, 0, 0, 500706297, + 0, 0, 0, 1001412595, 0, 0, 0, 2002825190, 0, 0, 0, 4005650380, 0, 0, 0, 3716333464, + 0, 0, 0, 3137699633, 0, 0, 0, 254141371, 0, 0, 0, 508282742, 0, 0, 0, 1016565484, + }, +}; + +// clang-format on +// clang-format off +static const __device__ unsigned int d_lfsr113_sequence_jump_matrices[LFSR113_JUMP_MATRICES][LFSR113_SIZE] = { + { + 0, 0, 0, 0, 2928597988, 0, 0, 0, 1562228680, 0, 0, 0, 3124457360, 0, 0, 0, + 1953947424, 0, 0, 0, 3907894849, 0, 0, 0, 3520822403, 0, 0, 0, 2746677510, 0, 0, 0, + 1198387725, 0, 0, 0, 2396775450, 0, 0, 0, 498583604, 0, 0, 0, 997167209, 0, 0, 0, + 1994334419, 0, 0, 0, 3988668839, 0, 0, 0, 3682370382, 0, 0, 0, 3069773468, 0, 0, 0, + 1844579640, 0, 0, 0, 3689159280, 0, 0, 0, 3083351264, 0, 0, 0, 1871735233, 0, 0, 0, + 3743470466, 0, 0, 0, 3191973636, 0, 0, 0, 2088979976, 0, 0, 0, 4177959953, 0, 0, 0, + 4060952610, 0, 0, 0, 3826937925, 0, 0, 0, 1723480943, 0, 0, 0, 3446961887, 0, 0, 0, + 2598956478, 0, 0, 0, 902945660, 0, 0, 0, 1805891321, 0, 0, 0, 3611782642, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1442838528, 0, 0, + 0, 2885677056, 0, 0, 0, 1476386816, 0, 0, 0, 2952773632, 0, 0, 0, 1610579969, 0, 0, + 0, 3221159938, 0, 0, 0, 2147352580, 0, 0, 0, 4294705160, 0, 0, 0, 4294443024, 0, 0, + 0, 4293918752, 0, 0, 0, 4292870208, 0, 0, 0, 4290773120, 0, 0, 0, 4286578944, 0, 0, + 0, 4278190592, 0, 0, 0, 4261413888, 0, 0, 0, 4227860480, 0, 0, 0, 4160753664, 0, 0, + 0, 4026540033, 0, 0, 0, 3758112771, 0, 0, 0, 3221258246, 0, 0, 0, 2147549196, 0, 0, + 0, 131096, 0, 0, 0, 262192, 0, 0, 0, 524384, 0, 0, 0, 1048768, 0, 0, + 0, 2097536, 0, 0, 0, 4195072, 0, 0, 0, 1434451456, 0, 0, 0, 2868902912, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 536903683, 0, 0, 0, 1073807366, 0, 0, 0, 2147614732, 0, 0, 0, 262168, 0, + 0, 0, 524336, 0, 0, 0, 1048672, 0, 0, 0, 2097344, 0, 0, 0, 4194688, 0, + 0, 0, 8389376, 0, 0, 0, 16778752, 0, 0, 0, 33557504, 0, 0, 0, 67115008, 0, + 0, 0, 134230016, 0, 0, 0, 268460033, 0, 0, 0, 536920067, 0, 0, 0, 1610678276, 0, + 0, 0, 3221356552, 0, 0, 0, 2147745808, 0, 0, 0, 524320, 0, 0, 0, 1048640, 0, + 0, 0, 2097280, 0, 0, 0, 4194560, 0, 0, 0, 8389120, 0, 0, 0, 16778240, 0, + 0, 0, 33556480, 0, 0, 0, 67112960, 0, 0, 0, 134225920, 0, 0, 0, 268451841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144703526, + 0, 0, 0, 289407053, 0, 0, 0, 578814107, 0, 0, 0, 1157628214, 0, 0, 0, 2315256429, + 0, 0, 0, 335545562, 0, 0, 0, 671091124, 0, 0, 0, 1342182248, 0, 0, 0, 2684364496, + 0, 0, 0, 1073761696, 0, 0, 0, 2147523392, 0, 0, 0, 79488, 0, 0, 0, 158976, + 0, 0, 0, 317952, 0, 0, 0, 635904, 0, 0, 0, 1271808, 0, 0, 0, 2543616, + 0, 0, 0, 5087233, 0, 0, 0, 10174466, 0, 0, 0, 20348932, 0, 0, 0, 40697864, + 0, 0, 0, 81395729, 0, 0, 0, 18087940, 0, 0, 0, 36175881, 0, 0, 0, 72351763, + }, + { + 0, 0, 0, 0, 1874778401, 0, 0, 0, 3749556802, 0, 0, 0, 3204146308, 0, 0, 0, + 2113325320, 0, 0, 0, 4226650640, 0, 0, 0, 4158333984, 0, 0, 0, 4021700672, 0, 0, 0, + 3748434048, 0, 0, 0, 3201900800, 0, 0, 0, 2108834304, 0, 0, 0, 4217668608, 0, 0, 0, + 4140369920, 0, 0, 0, 3985772545, 0, 0, 0, 3676577794, 0, 0, 0, 3058188292, 0, 0, 0, + 1821409288, 0, 0, 0, 3642818577, 0, 0, 0, 2990669858, 0, 0, 0, 1686372420, 0, 0, 0, + 3372744841, 0, 0, 0, 2450522386, 0, 0, 0, 606077476, 0, 0, 0, 1212154952, 0, 0, 0, + 2424309905, 0, 0, 0, 553652514, 0, 0, 0, 767490916, 0, 0, 0, 1534981833, 0, 0, 0, + 3069963666, 0, 0, 0, 1844960036, 0, 0, 0, 3689920072, 0, 0, 0, 3084872848, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, + 0, 32, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, + 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, + 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, + 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, + 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, + 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, + 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483652, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 262152, 0, 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, + 0, 0, 4194432, 0, 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, + 0, 0, 67110912, 0, 0, 0, 134221824, 0, 0, 0, 268443649, 0, 0, 0, 536887298, 0, + 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, 0, 0, 131092, 0, 0, 0, 32, 0, + 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, + 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, + 0, 0, 16384, 0, 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135049252, + 0, 0, 0, 270098504, 0, 0, 0, 540197008, 0, 0, 0, 1080394017, 0, 0, 0, 2160788035, + 0, 0, 0, 26608774, 0, 0, 0, 53217549, 0, 0, 0, 106435098, 0, 0, 0, 212870196, + 0, 0, 0, 425740393, 0, 0, 0, 851480786, 0, 0, 0, 1702961572, 0, 0, 0, 3405923145, + 0, 0, 0, 2516878995, 0, 0, 0, 738790694, 0, 0, 0, 1477581388, 0, 0, 0, 2955162776, + 0, 0, 0, 1615358257, 0, 0, 0, 3230716514, 0, 0, 0, 2166465732, 0, 0, 0, 37964168, + 0, 0, 0, 75928336, 0, 0, 0, 16881156, 0, 0, 0, 33762313, 0, 0, 0, 67524626, + }, + { + 0, 0, 0, 0, 882001920, 0, 0, 0, 1764003840, 0, 0, 0, 3528007680, 0, 0, 0, + 2761048065, 0, 0, 0, 1227128834, 0, 0, 0, 2454257668, 0, 0, 0, 613548040, 0, 0, 0, + 1227096080, 0, 0, 0, 2454192160, 0, 0, 0, 613417024, 0, 0, 0, 1226834048, 0, 0, 0, + 2453668096, 0, 0, 0, 612368896, 0, 0, 0, 1224737792, 0, 0, 0, 2449475584, 0, 0, 0, + 603983872, 0, 0, 0, 1207967744, 0, 0, 0, 2415935489, 0, 0, 0, 536903682, 0, 0, 0, + 1073807364, 0, 0, 0, 2147614729, 0, 0, 0, 262162, 0, 0, 0, 524324, 0, 0, 0, + 1048648, 0, 0, 0, 2097296, 0, 0, 0, 886196512, 0, 0, 0, 1772393024, 0, 0, 0, + 3544786048, 0, 0, 0, 2794604800, 0, 0, 0, 1294242304, 0, 0, 0, 2588484608, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1073774613, 0, 0, 0, 2147549226, 0, 0, 0, 131156, 0, 0, 0, 262312, 0, + 0, 0, 524624, 0, 0, 0, 1049248, 0, 0, 0, 2098496, 0, 0, 0, 4196992, 0, + 0, 0, 8393984, 0, 0, 0, 16787968, 0, 0, 0, 33575936, 0, 0, 0, 67151873, 0, + 0, 0, 134303746, 0, 0, 0, 268607492, 0, 0, 0, 537214984, 0, 0, 0, 655364, 0, + 0, 0, 1310728, 0, 0, 0, 2621456, 0, 0, 0, 5242912, 0, 0, 0, 10485824, 0, + 0, 0, 20971648, 0, 0, 0, 41943296, 0, 0, 0, 83886592, 0, 0, 0, 167773184, 0, + 0, 0, 335546369, 0, 0, 0, 671092738, 0, 0, 0, 1342185477, 0, 0, 0, 2684370954, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3961969350, + 0, 0, 0, 3628971405, 0, 0, 0, 2962975514, 0, 0, 0, 1630983732, 0, 0, 0, 3261967464, + 0, 0, 0, 2228967633, 0, 0, 0, 162967970, 0, 0, 0, 325935940, 0, 0, 0, 651871880, + 0, 0, 0, 1303743760, 0, 0, 0, 2607487520, 0, 0, 0, 920007744, 0, 0, 0, 1840015488, + 0, 0, 0, 3680030976, 0, 0, 0, 3065094657, 0, 0, 0, 1835222019, 0, 0, 0, 3670444038, + 0, 0, 0, 3045920780, 0, 0, 0, 1796874265, 0, 0, 0, 3593748531, 0, 0, 0, 2892529767, + 0, 0, 0, 1490092239, 0, 0, 0, 1568987992, 0, 0, 0, 3137975985, 0, 0, 0, 1980984675, + }, + { + 0, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, + 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, + 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, + 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, + 33554433, 0, 0, 0, 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, + 536870928, 0, 0, 0, 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, + 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 18087960, 0, 0, 0, 36175920, 0, 0, 0, 72351840, 0, 0, 0, 144703680, 0, + 0, 0, 289407361, 0, 0, 0, 578814722, 0, 0, 0, 1157629444, 0, 0, 0, 2315258888, 0, + 0, 0, 335550481, 0, 0, 0, 671100962, 0, 0, 0, 1342201925, 0, 0, 0, 2684403851, 0, + 0, 0, 1073840407, 0, 0, 0, 2147680814, 0, 0, 0, 394332, 0, 0, 0, 18352288, 0, + 0, 0, 36704576, 0, 0, 0, 73409152, 0, 0, 0, 146818304, 0, 0, 0, 293636608, 0, + 0, 0, 587273216, 0, 0, 0, 1174546432, 0, 0, 0, 2349092864, 0, 0, 0, 403218432, 0, + 0, 0, 806436865, 0, 0, 0, 1612873731, 0, 0, 0, 3225747462, 0, 0, 0, 2156527628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3548126500, + 0, 0, 0, 2801285704, 0, 0, 0, 1307604113, 0, 0, 0, 2615208226, 0, 0, 0, 935449156, + 0, 0, 0, 1870898313, 0, 0, 0, 3741796627, 0, 0, 0, 3188625959, 0, 0, 0, 2082284622, + 0, 0, 0, 4164569244, 0, 0, 0, 4034171193, 0, 0, 0, 3773375091, 0, 0, 0, 3251782887, + 0, 0, 0, 2208598479, 0, 0, 0, 122229662, 0, 0, 0, 244459325, 0, 0, 0, 488918650, + 0, 0, 0, 977837300, 0, 0, 0, 1955674600, 0, 0, 0, 3911349200, 0, 0, 0, 3527731104, + 0, 0, 0, 2760494912, 0, 0, 0, 2590999460, 0, 0, 0, 887031625, 0, 0, 0, 1774063250, + }, + { + 0, 0, 0, 0, 4160, 0, 0, 0, 8320, 0, 0, 0, 16640, 0, 0, 0, + 33280, 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, + 532480, 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259840, 0, 0, 0, + 8519680, 0, 0, 0, 17039360, 0, 0, 0, 34078721, 0, 0, 0, 68157442, 0, 0, 0, + 136314884, 0, 0, 0, 272629768, 0, 0, 0, 545259536, 0, 0, 0, 1090519072, 0, 0, 0, + 2181038144, 0, 0, 0, 67108992, 0, 0, 0, 134217984, 0, 0, 0, 268435968, 0, 0, 0, + 536871936, 0, 0, 0, 1073743872, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8704, 0, 0, + 0, 17408, 0, 0, 0, 34816, 0, 0, 0, 69632, 0, 0, 0, 139264, 0, 0, + 0, 278528, 0, 0, 0, 557056, 0, 0, 0, 1114112, 0, 0, 0, 2228224, 0, 0, + 0, 4456448, 0, 0, 0, 8912896, 0, 0, 0, 17825792, 0, 0, 0, 35651584, 0, 0, + 0, 71303168, 0, 0, 0, 142606337, 0, 0, 0, 285212674, 0, 0, 0, 570425349, 0, 0, + 0, 1140850698, 0, 0, 0, 2281701397, 0, 0, 0, 268435498, 0, 0, 0, 536870997, 0, 0, + 0, 1073741994, 0, 0, 0, 2147483988, 0, 0, 0, 680, 0, 0, 0, 1360, 0, 0, + 0, 2720, 0, 0, 0, 5440, 0, 0, 0, 2176, 0, 0, 0, 4352, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1417720836, 0, 0, 0, 2835441672, 0, 0, 0, 1375916048, 0, 0, 0, 2751832097, 0, + 0, 0, 1208696898, 0, 0, 0, 2417393796, 0, 0, 0, 539820296, 0, 0, 0, 1079640592, 0, + 0, 0, 2159281184, 0, 0, 0, 23595072, 0, 0, 0, 47190144, 0, 0, 0, 94380288, 0, + 0, 0, 188760576, 0, 0, 0, 377521152, 0, 0, 0, 755042304, 0, 0, 0, 243442693, 0, + 0, 0, 486885387, 0, 0, 0, 973770774, 0, 0, 0, 1947541549, 0, 0, 0, 3895083098, 0, + 0, 0, 3495198900, 0, 0, 0, 2695430504, 0, 0, 0, 1095893712, 0, 0, 0, 2191787424, 0, + 0, 0, 88607552, 0, 0, 0, 177215104, 0, 0, 0, 354430209, 0, 0, 0, 708860418, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357270623, + 0, 0, 0, 714541247, 0, 0, 0, 1429082494, 0, 0, 0, 2858164988, 0, 0, 0, 1421362680, + 0, 0, 0, 2842725361, 0, 0, 0, 1390483426, 0, 0, 0, 2780966853, 0, 0, 0, 1266966411, + 0, 0, 0, 2533932823, 0, 0, 0, 772898351, 0, 0, 0, 1545796702, 0, 0, 0, 3091593405, + 0, 0, 0, 1888219514, 0, 0, 0, 3776439028, 0, 0, 0, 3257910761, 0, 0, 0, 2220854227, + 0, 0, 0, 146741158, 0, 0, 0, 293482317, 0, 0, 0, 586964634, 0, 0, 0, 1173929269, + 0, 0, 0, 2347858538, 0, 0, 0, 44658827, 0, 0, 0, 89317655, 0, 0, 0, 178635311, + }, + { + 0, 0, 0, 0, 3162112, 0, 0, 0, 6324224, 0, 0, 0, 12648448, 0, 0, 0, + 25296896, 0, 0, 0, 50593793, 0, 0, 0, 101187587, 0, 0, 0, 202375174, 0, 0, 0, + 404750348, 0, 0, 0, 809500696, 0, 0, 0, 1619001392, 0, 0, 0, 3238002785, 0, 0, 0, + 2181038274, 0, 0, 0, 67109252, 0, 0, 0, 134218504, 0, 0, 0, 268437008, 0, 0, 0, + 536874016, 0, 0, 0, 1073748032, 0, 0, 0, 2147496065, 0, 0, 0, 24834, 0, 0, 0, + 49668, 0, 0, 0, 99336, 0, 0, 0, 198672, 0, 0, 0, 397344, 0, 0, 0, + 794688, 0, 0, 0, 1589376, 0, 0, 0, 49408, 0, 0, 0, 98816, 0, 0, 0, + 197632, 0, 0, 0, 395264, 0, 0, 0, 790528, 0, 0, 0, 1581056, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 134299649, 0, 0, + 0, 268599298, 0, 0, 0, 537198597, 0, 0, 0, 1074397194, 0, 0, 0, 2148794388, 0, 0, + 0, 2621480, 0, 0, 0, 5242960, 0, 0, 0, 10485920, 0, 0, 0, 20971840, 0, 0, + 0, 41943680, 0, 0, 0, 83887360, 0, 0, 0, 167774721, 0, 0, 0, 335549442, 0, 0, + 0, 671098884, 0, 0, 0, 1342197768, 0, 0, 0, 2684395537, 0, 0, 0, 1073823778, 0, 0, + 0, 2147647556, 0, 0, 0, 327816, 0, 0, 0, 655632, 0, 0, 0, 1311264, 0, 0, + 0, 2622528, 0, 0, 0, 5245056, 0, 0, 0, 10490112, 0, 0, 0, 20980224, 0, 0, + 0, 41960448, 0, 0, 0, 83920896, 0, 0, 0, 33574912, 0, 0, 0, 67149824, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 844445257, 0, 0, 0, 1688890514, 0, 0, 0, 3377781029, 0, 0, 0, 2460594762, 0, + 0, 0, 626222228, 0, 0, 0, 1252444457, 0, 0, 0, 2504888914, 0, 0, 0, 714810532, 0, + 0, 0, 1429621065, 0, 0, 0, 2859242131, 0, 0, 0, 1423516967, 0, 0, 0, 2847033934, 0, + 0, 0, 1399100572, 0, 0, 0, 2798201144, 0, 0, 0, 1301434992, 0, 0, 0, 2842792617, 0, + 0, 0, 1390617939, 0, 0, 0, 2781235878, 0, 0, 0, 1267504461, 0, 0, 0, 2535008923, 0, + 0, 0, 775050550, 0, 0, 0, 1550101100, 0, 0, 0, 3100202201, 0, 0, 0, 1905437106, 0, + 0, 0, 3810874212, 0, 0, 0, 3326781129, 0, 0, 0, 2358594962, 0, 0, 0, 422222628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3265725291, + 0, 0, 0, 2236483287, 0, 0, 0, 177999279, 0, 0, 0, 355998558, 0, 0, 0, 711997116, + 0, 0, 0, 1423994233, 0, 0, 0, 2847988467, 0, 0, 0, 1401009639, 0, 0, 0, 2802019279, + 0, 0, 0, 1309071263, 0, 0, 0, 2618142526, 0, 0, 0, 941317756, 0, 0, 0, 1882635512, + 0, 0, 0, 3765271025, 0, 0, 0, 3235574755, 0, 0, 0, 2176182214, 0, 0, 0, 57397132, + 0, 0, 0, 114794264, 0, 0, 0, 229588528, 0, 0, 0, 459177056, 0, 0, 0, 918354113, + 0, 0, 0, 1836708227, 0, 0, 0, 408215661, 0, 0, 0, 816431322, 0, 0, 0, 1632862645, + }, + { + 0, 0, 0, 0, 2554888269, 0, 0, 0, 814809242, 0, 0, 0, 1629618484, 0, 0, 0, + 3259236968, 0, 0, 0, 2223506641, 0, 0, 0, 152045986, 0, 0, 0, 304091973, 0, 0, 0, + 608183946, 0, 0, 0, 1216367892, 0, 0, 0, 2432735785, 0, 0, 0, 570504275, 0, 0, 0, + 1141008550, 0, 0, 0, 2282017101, 0, 0, 0, 269066906, 0, 0, 0, 538133812, 0, 0, 0, + 1076267624, 0, 0, 0, 2152535249, 0, 0, 0, 10103202, 0, 0, 0, 20206404, 0, 0, 0, + 40412809, 0, 0, 0, 80825618, 0, 0, 0, 161651236, 0, 0, 0, 323302473, 0, 0, 0, + 646604947, 0, 0, 0, 1293209894, 0, 0, 0, 39920129, 0, 0, 0, 79840258, 0, 0, 0, + 159680516, 0, 0, 0, 319361033, 0, 0, 0, 638722067, 0, 0, 0, 1277444134, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336678914, 0, 0, + 0, 673357828, 0, 0, 0, 1346715656, 0, 0, 0, 2693431313, 0, 0, 0, 1091895330, 0, 0, + 0, 2183790660, 0, 0, 0, 72614024, 0, 0, 0, 145228049, 0, 0, 0, 290456098, 0, 0, + 0, 580912197, 0, 0, 0, 1161824394, 0, 0, 0, 2323648789, 0, 0, 0, 352330282, 0, 0, + 0, 704660564, 0, 0, 0, 1409321128, 0, 0, 0, 2818642256, 0, 0, 0, 1342317216, 0, 0, + 0, 2684634433, 0, 0, 0, 1074301570, 0, 0, 0, 2148603140, 0, 0, 0, 2238984, 0, 0, + 0, 4477968, 0, 0, 0, 8955936, 0, 0, 0, 17911872, 0, 0, 0, 35823744, 0, 0, + 0, 71647488, 0, 0, 0, 143294977, 0, 0, 0, 84169728, 0, 0, 0, 168339457, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3001529316, 0, 0, 0, 1708091336, 0, 0, 0, 3416182673, 0, 0, 0, 2537398050, 0, + 0, 0, 779828804, 0, 0, 0, 1559657609, 0, 0, 0, 3119315218, 0, 0, 0, 1943663140, 0, + 0, 0, 3887326281, 0, 0, 0, 3479685267, 0, 0, 0, 2664403238, 0, 0, 0, 1033839181, 0, + 0, 0, 2067678363, 0, 0, 0, 4135356726, 0, 0, 0, 3975746156, 0, 0, 0, 1796577085, 0, + 0, 0, 3593154171, 0, 0, 0, 2891341046, 0, 0, 0, 1487714796, 0, 0, 0, 2975429593, 0, + 0, 0, 1655891891, 0, 0, 0, 3311783783, 0, 0, 0, 2328600271, 0, 0, 0, 362233247, 0, + 0, 0, 724466494, 0, 0, 0, 1448932988, 0, 0, 0, 2897865977, 0, 0, 0, 1500764658, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3612118070, + 0, 0, 0, 2929268845, 0, 0, 0, 1563570394, 0, 0, 0, 3127140788, 0, 0, 0, 1959314281, + 0, 0, 0, 3918628562, 0, 0, 0, 3542289829, 0, 0, 0, 2789612362, 0, 0, 0, 1284257428, + 0, 0, 0, 2568514856, 0, 0, 0, 842062417, 0, 0, 0, 1684124835, 0, 0, 0, 3368249671, + 0, 0, 0, 2441532046, 0, 0, 0, 588096797, 0, 0, 0, 1176193595, 0, 0, 0, 2352387190, + 0, 0, 0, 409807085, 0, 0, 0, 819614171, 0, 0, 0, 1639228342, 0, 0, 0, 3278456684, + 0, 0, 0, 2261946072, 0, 0, 0, 3672740230, 0, 0, 0, 3050513165, 0, 0, 0, 1806059035, + }, + { + 0, 0, 0, 0, 1164254896, 0, 0, 0, 2328509792, 0, 0, 0, 362052288, 0, 0, 0, + 724104577, 0, 0, 0, 1448209155, 0, 0, 0, 2896418311, 0, 0, 0, 1497869326, 0, 0, 0, + 2995738652, 0, 0, 0, 1696510008, 0, 0, 0, 3393020016, 0, 0, 0, 2491072737, 0, 0, 0, + 687178178, 0, 0, 0, 1374356356, 0, 0, 0, 2748712712, 0, 0, 0, 1202458129, 0, 0, 0, + 2404916258, 0, 0, 0, 514865221, 0, 0, 0, 1029730442, 0, 0, 0, 2059460885, 0, 0, 0, + 4118921771, 0, 0, 0, 3942876246, 0, 0, 0, 3590785196, 0, 0, 0, 2886603097, 0, 0, 0, + 1478238898, 0, 0, 0, 2956477797, 0, 0, 0, 622171258, 0, 0, 0, 1244342517, 0, 0, 0, + 2488685035, 0, 0, 0, 682402774, 0, 0, 0, 1364805548, 0, 0, 0, 2729611096, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339960066, 0, 0, + 0, 679920132, 0, 0, 0, 1359840264, 0, 0, 0, 2719680529, 0, 0, 0, 1144393762, 0, 0, + 0, 2288787525, 0, 0, 0, 282607754, 0, 0, 0, 565215509, 0, 0, 0, 1130431018, 0, 0, + 0, 2260862036, 0, 0, 0, 226756777, 0, 0, 0, 453513555, 0, 0, 0, 907027111, 0, 0, + 0, 1814054222, 0, 0, 0, 3628108445, 0, 0, 0, 2961249595, 0, 0, 0, 1627531895, 0, 0, + 0, 3255063790, 0, 0, 0, 2215160284, 0, 0, 0, 135353273, 0, 0, 0, 270706546, 0, 0, + 0, 541413093, 0, 0, 0, 1082826186, 0, 0, 0, 2165652372, 0, 0, 0, 36337448, 0, 0, + 0, 72674896, 0, 0, 0, 145349793, 0, 0, 0, 84990016, 0, 0, 0, 169980033, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2938358989, 0, 0, 0, 1581750682, 0, 0, 0, 3163501365, 0, 0, 0, 2032035435, 0, + 0, 0, 4064070870, 0, 0, 0, 3833174445, 0, 0, 0, 3371381594, 0, 0, 0, 2447795893, 0, + 0, 0, 600624491, 0, 0, 0, 1201248983, 0, 0, 0, 2402497966, 0, 0, 0, 510028637, 0, + 0, 0, 1020057274, 0, 0, 0, 2040114548, 0, 0, 0, 4080229097, 0, 0, 0, 1229289758, 0, + 0, 0, 2458579516, 0, 0, 0, 622191737, 0, 0, 0, 1244383475, 0, 0, 0, 2488766950, 0, + 0, 0, 682566604, 0, 0, 0, 1365133209, 0, 0, 0, 2730266419, 0, 0, 0, 1165565542, 0, + 0, 0, 2331131084, 0, 0, 0, 367294873, 0, 0, 0, 734589747, 0, 0, 0, 1469179494, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 490940283, + 0, 0, 0, 981880567, 0, 0, 0, 1963761134, 0, 0, 0, 3927522269, 0, 0, 0, 3560077242, + 0, 0, 0, 2825187189, 0, 0, 0, 1355407083, 0, 0, 0, 2710814166, 0, 0, 0, 1126661037, + 0, 0, 0, 2253322074, 0, 0, 0, 211676852, 0, 0, 0, 423353704, 0, 0, 0, 846707408, + 0, 0, 0, 1693414817, 0, 0, 0, 3386829635, 0, 0, 0, 2478691975, 0, 0, 0, 662416654, + 0, 0, 0, 1324833308, 0, 0, 0, 2649666617, 0, 0, 0, 1004365938, 0, 0, 0, 2008731877, + 0, 0, 0, 4017463754, 0, 0, 0, 3282593007, 0, 0, 0, 2270218718, 0, 0, 0, 245470141, + }, + { + 0, 0, 0, 0, 126298325, 0, 0, 0, 252596651, 0, 0, 0, 505193303, 0, 0, 0, + 1010386606, 0, 0, 0, 2020773212, 0, 0, 0, 4041546425, 0, 0, 0, 3788125555, 0, 0, 0, + 3281283814, 0, 0, 0, 2267600332, 0, 0, 0, 240233369, 0, 0, 0, 480466738, 0, 0, 0, + 960933476, 0, 0, 0, 1921866953, 0, 0, 0, 3843733907, 0, 0, 0, 3392500518, 0, 0, 0, + 2490033741, 0, 0, 0, 685100186, 0, 0, 0, 1370200372, 0, 0, 0, 2740400744, 0, 0, 0, + 1185834193, 0, 0, 0, 2371668387, 0, 0, 0, 448369479, 0, 0, 0, 896738958, 0, 0, 0, + 1793477917, 0, 0, 0, 3586955835, 0, 0, 0, 2887654563, 0, 0, 0, 1480341830, 0, 0, 0, + 2960683661, 0, 0, 0, 1626400026, 0, 0, 0, 3252800053, 0, 0, 0, 2210632810, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1457526048, 0, 0, + 0, 2915052096, 0, 0, 0, 1535136897, 0, 0, 0, 3070273795, 0, 0, 0, 1845580294, 0, 0, + 0, 3691160589, 0, 0, 0, 3087353882, 0, 0, 0, 1879740469, 0, 0, 0, 3759480939, 0, 0, + 0, 3223994582, 0, 0, 0, 2153021868, 0, 0, 0, 11076440, 0, 0, 0, 22152880, 0, 0, + 0, 44305760, 0, 0, 0, 88611520, 0, 0, 0, 177223041, 0, 0, 0, 354446082, 0, 0, + 0, 708892164, 0, 0, 0, 1417784328, 0, 0, 0, 2835568656, 0, 0, 0, 1376170016, 0, 0, + 0, 2752340033, 0, 0, 0, 1209712771, 0, 0, 0, 2419425542, 0, 0, 0, 543883789, 0, 0, + 0, 1087767578, 0, 0, 0, 2175535156, 0, 0, 0, 1438123336, 0, 0, 0, 2876246672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1801783692, 0, 0, 0, 3603567385, 0, 0, 0, 2912167474, 0, 0, 0, 1529367653, 0, + 0, 0, 3058735306, 0, 0, 0, 1822503316, 0, 0, 0, 3645006633, 0, 0, 0, 2995045970, 0, + 0, 0, 1695124645, 0, 0, 0, 3390249290, 0, 0, 0, 2485531285, 0, 0, 0, 676095274, 0, + 0, 0, 1352190548, 0, 0, 0, 2704381097, 0, 0, 0, 1113794898, 0, 0, 0, 4020460328, 0, + 0, 0, 3745953360, 0, 0, 0, 3196939425, 0, 0, 0, 2098911554, 0, 0, 0, 4197823108, 0, + 0, 0, 4100678921, 0, 0, 0, 3906390547, 0, 0, 0, 3517813798, 0, 0, 0, 2740660300, 0, + 0, 0, 1186353304, 0, 0, 0, 2372706609, 0, 0, 0, 450445923, 0, 0, 0, 900891846, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42029323, + 0, 0, 0, 84058646, 0, 0, 0, 168117293, 0, 0, 0, 336234586, 0, 0, 0, 672469172, + 0, 0, 0, 1344938344, 0, 0, 0, 2689876689, 0, 0, 0, 1084786082, 0, 0, 0, 2169572165, + 0, 0, 0, 44177035, 0, 0, 0, 88354071, 0, 0, 0, 176708143, 0, 0, 0, 353416286, + 0, 0, 0, 706832573, 0, 0, 0, 1413665147, 0, 0, 0, 2827330294, 0, 0, 0, 1359693292, + 0, 0, 0, 2719386585, 0, 0, 0, 1143805874, 0, 0, 0, 2287611749, 0, 0, 0, 280256202, + 0, 0, 0, 560512405, 0, 0, 0, 1078995489, 0, 0, 0, 2157990978, 0, 0, 0, 21014661, + }, + { + 0, 0, 0, 0, 3480795388, 0, 0, 0, 2666623480, 0, 0, 0, 1038279664, 0, 0, 0, + 2076559329, 0, 0, 0, 4153118658, 0, 0, 0, 4011270020, 0, 0, 0, 3727572744, 0, 0, 0, + 3160178193, 0, 0, 0, 2025389090, 0, 0, 0, 4050778181, 0, 0, 0, 3806589066, 0, 0, 0, + 3318210837, 0, 0, 0, 2341454378, 0, 0, 0, 387941461, 0, 0, 0, 775882923, 0, 0, 0, + 1551765846, 0, 0, 0, 3103531693, 0, 0, 0, 1912096090, 0, 0, 0, 3824192180, 0, 0, 0, + 3353417064, 0, 0, 0, 2411866832, 0, 0, 0, 528766369, 0, 0, 0, 1057532739, 0, 0, 0, + 2115065479, 0, 0, 0, 4230130959, 0, 0, 0, 926802659, 0, 0, 0, 1853605319, 0, 0, 0, + 3707210639, 0, 0, 0, 3119453983, 0, 0, 0, 1943940671, 0, 0, 0, 3887881342, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4124527681, 0, 0, + 0, 3954088066, 0, 0, 0, 3613208836, 0, 0, 0, 2931450376, 0, 0, 0, 1567933457, 0, 0, + 0, 3135866914, 0, 0, 0, 1976766533, 0, 0, 0, 3953533066, 0, 0, 0, 3612098836, 0, 0, + 0, 2929230376, 0, 0, 0, 1563493457, 0, 0, 0, 3126986914, 0, 0, 0, 1959006533, 0, 0, + 0, 3918013066, 0, 0, 0, 3541058836, 0, 0, 0, 2787150377, 0, 0, 0, 1279333459, 0, 0, + 0, 2558666919, 0, 0, 0, 822366543, 0, 0, 0, 1644733087, 0, 0, 0, 3289466174, 0, 0, + 0, 2283965053, 0, 0, 0, 272962810, 0, 0, 0, 545925621, 0, 0, 0, 1091851242, 0, 0, + 0, 2183702484, 0, 0, 0, 72437672, 0, 0, 0, 4252357392, 0, 0, 0, 4209747488, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2656366736, 0, 0, 0, 1017766176, 0, 0, 0, 2035532352, 0, 0, 0, 4071064704, 0, + 0, 0, 3847162112, 0, 0, 0, 3399356928, 0, 0, 0, 2503746561, 0, 0, 0, 712525826, 0, + 0, 0, 1425051652, 0, 0, 0, 2850103304, 0, 0, 0, 1405239313, 0, 0, 0, 2810478627, 0, + 0, 0, 1325989958, 0, 0, 0, 2651979917, 0, 0, 0, 1008992539, 0, 0, 0, 3860656807, 0, + 0, 0, 3426346319, 0, 0, 0, 2557725342, 0, 0, 0, 820483388, 0, 0, 0, 1640966776, 0, + 0, 0, 3281933552, 0, 0, 0, 2268899809, 0, 0, 0, 242832322, 0, 0, 0, 485664644, 0, + 0, 0, 971329289, 0, 0, 0, 1942658578, 0, 0, 0, 3885317156, 0, 0, 0, 3475667016, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 537135248, + 0, 0, 0, 1074270496, 0, 0, 0, 2148540992, 0, 0, 0, 2114688, 0, 0, 0, 4229377, + 0, 0, 0, 8458754, 0, 0, 0, 16917508, 0, 0, 0, 33835017, 0, 0, 0, 67670034, + 0, 0, 0, 135340068, 0, 0, 0, 270680136, 0, 0, 0, 541360273, 0, 0, 0, 1082720546, + 0, 0, 0, 2165441092, 0, 0, 0, 35914889, 0, 0, 0, 71829779, 0, 0, 0, 143659558, + 0, 0, 0, 287319116, 0, 0, 0, 574638232, 0, 0, 0, 1149276464, 0, 0, 0, 2298552928, + 0, 0, 0, 302138561, 0, 0, 0, 67141906, 0, 0, 0, 134283812, 0, 0, 0, 268567624, + }, + { + 0, 0, 0, 0, 460238757, 0, 0, 0, 920477515, 0, 0, 0, 1840955030, 0, 0, 0, + 3681910060, 0, 0, 0, 3068852824, 0, 0, 0, 1842738352, 0, 0, 0, 3685476704, 0, 0, 0, + 3075986112, 0, 0, 0, 1857004929, 0, 0, 0, 3714009859, 0, 0, 0, 3133052422, 0, 0, 0, + 1971137548, 0, 0, 0, 3942275096, 0, 0, 0, 3589582897, 0, 0, 0, 2884198498, 0, 0, 0, + 1473429701, 0, 0, 0, 2946859402, 0, 0, 0, 1598751509, 0, 0, 0, 3197503018, 0, 0, 0, + 2100038740, 0, 0, 0, 4200077480, 0, 0, 0, 4105187665, 0, 0, 0, 3915408035, 0, 0, 0, + 3535848774, 0, 0, 0, 2776730253, 0, 0, 0, 1349368510, 0, 0, 0, 2698737021, 0, 0, 0, + 1102506746, 0, 0, 0, 2205013492, 0, 0, 0, 115059689, 0, 0, 0, 230119378, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3560175108, 0, 0, + 0, 2825382920, 0, 0, 0, 1355798544, 0, 0, 0, 2711597089, 0, 0, 0, 1128226882, 0, 0, + 0, 2256453764, 0, 0, 0, 217940233, 0, 0, 0, 435880467, 0, 0, 0, 871760935, 0, 0, + 0, 1743521871, 0, 0, 0, 3487043743, 0, 0, 0, 2679120191, 0, 0, 0, 1063273086, 0, 0, + 0, 2126546172, 0, 0, 0, 4253092344, 0, 0, 0, 4211217392, 0, 0, 0, 4127467489, 0, 0, + 0, 3959967682, 0, 0, 0, 3624968069, 0, 0, 0, 2954968843, 0, 0, 0, 1614970391, 0, 0, + 0, 3229940782, 0, 0, 0, 2164914268, 0, 0, 0, 34861240, 0, 0, 0, 69722480, 0, 0, + 0, 139444961, 0, 0, 0, 278889922, 0, 0, 0, 4111269249, 0, 0, 0, 3927571202, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2424557694, 0, 0, 0, 554148093, 0, 0, 0, 1108296186, 0, 0, 0, 2216592373, 0, + 0, 0, 138217450, 0, 0, 0, 276434901, 0, 0, 0, 552869802, 0, 0, 0, 1105739604, 0, + 0, 0, 2211479208, 0, 0, 0, 127991121, 0, 0, 0, 255982243, 0, 0, 0, 511964486, 0, + 0, 0, 1023928972, 0, 0, 0, 2047857944, 0, 0, 0, 4095715888, 0, 0, 0, 2025620510, 0, + 0, 0, 4051241021, 0, 0, 0, 3807514746, 0, 0, 0, 3320062196, 0, 0, 0, 2345157096, 0, + 0, 0, 395346896, 0, 0, 0, 790693792, 0, 0, 0, 1581387585, 0, 0, 0, 3162775171, 0, + 0, 0, 2030583047, 0, 0, 0, 4061166095, 0, 0, 0, 3827364895, 0, 0, 0, 3359762495, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, + 0, 0, 0, 2097152, 0, 0, 0, 4194305, 0, 0, 0, 8388610, 0, 0, 0, 16777220, + 0, 0, 0, 33554441, 0, 0, 0, 67108882, 0, 0, 0, 134217764, 0, 0, 0, 268435528, + 0, 0, 0, 536871056, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + 0, 0, 0, 2304, 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, + 0, 0, 0, 36864, 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, + 0, 0, 0, 589824, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, + }, + { + 0, 0, 0, 0, 1980210557, 0, 0, 0, 3960421115, 0, 0, 0, 3625874935, 0, 0, 0, + 2956782575, 0, 0, 0, 1618597854, 0, 0, 0, 3237195709, 0, 0, 0, 2179424123, 0, 0, 0, + 63880951, 0, 0, 0, 127761903, 0, 0, 0, 255523807, 0, 0, 0, 511047615, 0, 0, 0, + 1022095230, 0, 0, 0, 2044190460, 0, 0, 0, 4088380920, 0, 0, 0, 3881794544, 0, 0, 0, + 3468621792, 0, 0, 0, 2642276289, 0, 0, 0, 989585283, 0, 0, 0, 1979170566, 0, 0, 0, + 3958341132, 0, 0, 0, 3621714968, 0, 0, 0, 2948462640, 0, 0, 0, 1601957985, 0, 0, 0, + 3203915970, 0, 0, 0, 2112864644, 0, 0, 0, 2379751029, 0, 0, 0, 464534763, 0, 0, 0, + 929069527, 0, 0, 0, 1858139055, 0, 0, 0, 3716278111, 0, 0, 0, 3137588926, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3625984309, 0, 0, + 0, 2957001323, 0, 0, 0, 1619035351, 0, 0, 0, 3238070702, 0, 0, 0, 2181174108, 0, 0, + 0, 67380920, 0, 0, 0, 134761841, 0, 0, 0, 269523682, 0, 0, 0, 539047365, 0, 0, + 0, 1078094730, 0, 0, 0, 2156189460, 0, 0, 0, 17411624, 0, 0, 0, 34823248, 0, 0, + 0, 69646496, 0, 0, 0, 139292993, 0, 0, 0, 278585986, 0, 0, 0, 557171973, 0, 0, + 0, 1114343946, 0, 0, 0, 2228687892, 0, 0, 0, 162408489, 0, 0, 0, 324816978, 0, 0, + 0, 649633957, 0, 0, 0, 1299267915, 0, 0, 0, 2598535831, 0, 0, 0, 902104367, 0, 0, + 0, 1804208734, 0, 0, 0, 3608417468, 0, 0, 0, 1980237901, 0, 0, 0, 3960475802, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 70539992, 0, 0, 0, 141079985, 0, 0, 0, 282159971, 0, 0, 0, 564319943, 0, + 0, 0, 1128639887, 0, 0, 0, 2257279774, 0, 0, 0, 219592253, 0, 0, 0, 439184507, 0, + 0, 0, 878369014, 0, 0, 0, 1756738029, 0, 0, 0, 3513476059, 0, 0, 0, 2731984823, 0, + 0, 0, 1169002351, 0, 0, 0, 2338004702, 0, 0, 0, 381042109, 0, 0, 0, 693641634, 0, + 0, 0, 1387283269, 0, 0, 0, 2774566539, 0, 0, 0, 1254165782, 0, 0, 0, 2508331565, 0, + 0, 0, 721695834, 0, 0, 0, 1443391669, 0, 0, 0, 2886783339, 0, 0, 0, 1478599382, 0, + 0, 0, 2957198765, 0, 0, 0, 1619430235, 0, 0, 0, 3238860470, 0, 0, 0, 2182753644, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33280, + 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, 532480, + 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259841, 0, 0, 0, 8519682, + 0, 0, 0, 17039364, 0, 0, 0, 34078729, 0, 0, 0, 68157458, 0, 0, 0, 136314916, + 0, 0, 0, 272629833, 0, 0, 0, 545259666, 0, 0, 0, 1090519332, 0, 0, 0, 2181038665, + 0, 0, 0, 67110034, 0, 0, 0, 134220068, 0, 0, 0, 268440136, 0, 0, 0, 536880272, + 0, 0, 0, 1073760544, 0, 0, 0, 2147487808, 0, 0, 0, 8320, 0, 0, 0, 16640, + }, + { + 0, 0, 0, 0, 893107048, 0, 0, 0, 1786214097, 0, 0, 0, 3572428195, 0, 0, 0, + 2849889095, 0, 0, 0, 1404810895, 0, 0, 0, 2809621790, 0, 0, 0, 1324276285, 0, 0, 0, + 2648552571, 0, 0, 0, 1002137847, 0, 0, 0, 2004275695, 0, 0, 0, 4008551390, 0, 0, 0, + 3722135485, 0, 0, 0, 3149303674, 0, 0, 0, 2003640053, 0, 0, 0, 4007280106, 0, 0, 0, + 3719592917, 0, 0, 0, 3144218538, 0, 0, 0, 1993469781, 0, 0, 0, 3986939563, 0, 0, 0, + 3678911830, 0, 0, 0, 3062856364, 0, 0, 0, 1830745432, 0, 0, 0, 3661490864, 0, 0, 0, + 3028014433, 0, 0, 0, 1761061570, 0, 0, 0, 3839160045, 0, 0, 0, 3383352795, 0, 0, 0, + 2471738294, 0, 0, 0, 648509293, 0, 0, 0, 1297018586, 0, 0, 0, 2594037172, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99817600, 0, 0, + 0, 199635201, 0, 0, 0, 399270402, 0, 0, 0, 798540804, 0, 0, 0, 1597081609, 0, 0, + 0, 3194163218, 0, 0, 0, 2093359140, 0, 0, 0, 4186718280, 0, 0, 0, 4078469265, 0, 0, + 0, 3861971235, 0, 0, 0, 3428975175, 0, 0, 0, 2562983055, 0, 0, 0, 830998815, 0, 0, + 0, 1661997631, 0, 0, 0, 3323995262, 0, 0, 0, 2353023229, 0, 0, 0, 411079163, 0, 0, + 0, 822158327, 0, 0, 0, 1644316655, 0, 0, 0, 3288633310, 0, 0, 0, 2282299325, 0, 0, + 0, 269631354, 0, 0, 0, 539262709, 0, 0, 0, 1078525418, 0, 0, 0, 2157050836, 0, 0, + 0, 19134376, 0, 0, 0, 38268752, 0, 0, 0, 24954400, 0, 0, 0, 49908800, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1770227281, 0, 0, 0, 3540454563, 0, 0, 0, 2785941830, 0, 0, 0, 1276916364, 0, + 0, 0, 2553832729, 0, 0, 0, 812698162, 0, 0, 0, 1625396325, 0, 0, 0, 3250792650, 0, + 0, 0, 2206618004, 0, 0, 0, 118268713, 0, 0, 0, 236537426, 0, 0, 0, 473074852, 0, + 0, 0, 946149705, 0, 0, 0, 1892299411, 0, 0, 0, 3784598822, 0, 0, 0, 2863352860, 0, + 0, 0, 1431738424, 0, 0, 0, 2863476848, 0, 0, 0, 1431986401, 0, 0, 0, 2863972803, 0, + 0, 0, 1432978310, 0, 0, 0, 2865956620, 0, 0, 0, 1436945945, 0, 0, 0, 2873891890, 0, + 0, 0, 1452816485, 0, 0, 0, 2905632970, 0, 0, 0, 1516298644, 0, 0, 0, 3032597288, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180224, + 0, 0, 0, 360448, 0, 0, 0, 720896, 0, 0, 0, 1441792, 0, 0, 0, 2883584, + 0, 0, 0, 5767169, 0, 0, 0, 11534338, 0, 0, 0, 23068677, 0, 0, 0, 46137354, + 0, 0, 0, 92274708, 0, 0, 0, 184549417, 0, 0, 0, 369098835, 0, 0, 0, 738197670, + 0, 0, 0, 1476395340, 0, 0, 0, 2952790680, 0, 0, 0, 1610614064, 0, 0, 0, 3221228128, + 0, 0, 0, 2147488960, 0, 0, 0, 10624, 0, 0, 0, 21248, 0, 0, 0, 42496, + 0, 0, 0, 84992, 0, 0, 0, 22528, 0, 0, 0, 45056, 0, 0, 0, 90112, + }, + { + 0, 0, 0, 0, 2113541784, 0, 0, 0, 4227083568, 0, 0, 0, 4159199840, 0, 0, 0, + 4023432384, 0, 0, 0, 3751897472, 0, 0, 0, 3208827648, 0, 0, 0, 2122688001, 0, 0, 0, + 4245376003, 0, 0, 0, 4195784710, 0, 0, 0, 4096602125, 0, 0, 0, 3898236955, 0, 0, 0, + 3501506615, 0, 0, 0, 2708045935, 0, 0, 0, 1121124575, 0, 0, 0, 2242249151, 0, 0, 0, + 189531007, 0, 0, 0, 379062015, 0, 0, 0, 758124030, 0, 0, 0, 1516248061, 0, 0, 0, + 3032496123, 0, 0, 0, 1770024950, 0, 0, 0, 3540049900, 0, 0, 0, 2785132504, 0, 0, 0, + 1275297712, 0, 0, 0, 2550595425, 0, 0, 0, 1308092506, 0, 0, 0, 2616185012, 0, 0, 0, + 937402729, 0, 0, 0, 1874805459, 0, 0, 0, 3749610918, 0, 0, 0, 3204254540, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1409835056, 0, 0, + 0, 2819670112, 0, 0, 0, 1344372928, 0, 0, 0, 2688745857, 0, 0, 0, 1082524418, 0, 0, + 0, 2165048836, 0, 0, 0, 35130376, 0, 0, 0, 70260752, 0, 0, 0, 140521505, 0, 0, + 0, 281043010, 0, 0, 0, 562086021, 0, 0, 0, 1124172042, 0, 0, 0, 2248344084, 0, 0, + 0, 201720873, 0, 0, 0, 403441747, 0, 0, 0, 806883495, 0, 0, 0, 1613766991, 0, 0, + 0, 3227533982, 0, 0, 0, 2160100668, 0, 0, 0, 25234040, 0, 0, 0, 50468080, 0, 0, + 0, 100936160, 0, 0, 0, 201872321, 0, 0, 0, 403744643, 0, 0, 0, 807489287, 0, 0, + 0, 1614978575, 0, 0, 0, 3229957150, 0, 0, 0, 3573684236, 0, 0, 0, 2852401176, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 302254233, 0, 0, 0, 604508466, 0, 0, 0, 1209016932, 0, 0, 0, 2418033865, 0, + 0, 0, 541100435, 0, 0, 0, 1082200870, 0, 0, 0, 2164401740, 0, 0, 0, 33836184, 0, + 0, 0, 67672369, 0, 0, 0, 135344738, 0, 0, 0, 270689477, 0, 0, 0, 541378955, 0, + 0, 0, 1082757911, 0, 0, 0, 2165515822, 0, 0, 0, 36064348, 0, 0, 0, 373854240, 0, + 0, 0, 747708480, 0, 0, 0, 1495416961, 0, 0, 0, 2990833922, 0, 0, 0, 1686700548, 0, + 0, 0, 3373401096, 0, 0, 0, 2451834897, 0, 0, 0, 608702498, 0, 0, 0, 1217404996, 0, + 0, 0, 2434809993, 0, 0, 0, 574652691, 0, 0, 0, 1149305382, 0, 0, 0, 2298610764, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37905416, + 0, 0, 0, 75810832, 0, 0, 0, 151621664, 0, 0, 0, 303243329, 0, 0, 0, 606486658, + 0, 0, 0, 1212973317, 0, 0, 0, 2425946634, 0, 0, 0, 556925972, 0, 0, 0, 1113851944, + 0, 0, 0, 2227703889, 0, 0, 0, 160440482, 0, 0, 0, 320880965, 0, 0, 0, 641761930, + 0, 0, 0, 1283523860, 0, 0, 0, 2567047720, 0, 0, 0, 839128145, 0, 0, 0, 1678256290, + 0, 0, 0, 3356512580, 0, 0, 0, 2418057864, 0, 0, 0, 541148433, 0, 0, 0, 1082296866, + 0, 0, 0, 2164593732, 0, 0, 0, 4738177, 0, 0, 0, 9476354, 0, 0, 0, 18952708, + }, + { + 0, 0, 0, 0, 307855197, 0, 0, 0, 615710394, 0, 0, 0, 1231420788, 0, 0, 0, + 2462841576, 0, 0, 0, 630715856, 0, 0, 0, 1261431713, 0, 0, 0, 2522863426, 0, 0, 0, + 750759556, 0, 0, 0, 1501519112, 0, 0, 0, 3003038224, 0, 0, 0, 1711109152, 0, 0, 0, + 3422218304, 0, 0, 0, 2549469312, 0, 0, 0, 803971329, 0, 0, 0, 1607942659, 0, 0, 0, + 3215885318, 0, 0, 0, 2136803341, 0, 0, 0, 4273606682, 0, 0, 0, 4252246069, 0, 0, 0, + 4209524842, 0, 0, 0, 4124082389, 0, 0, 0, 3953197482, 0, 0, 0, 3611427668, 0, 0, 0, + 2927888040, 0, 0, 0, 1560808784, 0, 0, 0, 2823382525, 0, 0, 0, 1351797754, 0, 0, 0, + 2703595509, 0, 0, 0, 1112223723, 0, 0, 0, 2224447447, 0, 0, 0, 153927598, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1430273920, 0, 0, + 0, 2860547840, 0, 0, 0, 1426128384, 0, 0, 0, 2852256768, 0, 0, 0, 1409546240, 0, 0, + 0, 2819092480, 0, 0, 0, 1343217664, 0, 0, 0, 2686435329, 0, 0, 0, 1077903362, 0, 0, + 0, 2155806724, 0, 0, 0, 16646152, 0, 0, 0, 33292304, 0, 0, 0, 66584608, 0, 0, + 0, 133169216, 0, 0, 0, 266338433, 0, 0, 0, 532676867, 0, 0, 0, 1065353734, 0, 0, + 0, 2130707468, 0, 0, 0, 4261414936, 0, 0, 0, 4227862576, 0, 0, 0, 4160757856, 0, 0, + 0, 4026548417, 0, 0, 0, 3758129539, 0, 0, 0, 3221291782, 0, 0, 0, 2147616268, 0, 0, + 0, 265240, 0, 0, 0, 530480, 0, 0, 0, 1431310304, 0, 0, 0, 2862620608, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 536903683, 0, 0, 0, 1073807366, 0, 0, 0, 2147614732, 0, 0, 0, 262168, 0, + 0, 0, 524336, 0, 0, 0, 1048672, 0, 0, 0, 2097344, 0, 0, 0, 4194688, 0, + 0, 0, 8389376, 0, 0, 0, 16778752, 0, 0, 0, 33557504, 0, 0, 0, 67115008, 0, + 0, 0, 134230016, 0, 0, 0, 268460033, 0, 0, 0, 536920067, 0, 0, 0, 1610678276, 0, + 0, 0, 3221356552, 0, 0, 0, 2147745808, 0, 0, 0, 524320, 0, 0, 0, 1048640, 0, + 0, 0, 2097280, 0, 0, 0, 4194560, 0, 0, 0, 8389120, 0, 0, 0, 16778240, 0, + 0, 0, 33556480, 0, 0, 0, 67112960, 0, 0, 0, 134225920, 0, 0, 0, 268451841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2731147994, + 0, 0, 0, 1167328692, 0, 0, 0, 2334657385, 0, 0, 0, 374347474, 0, 0, 0, 748694948, + 0, 0, 0, 1497389897, 0, 0, 0, 2994779795, 0, 0, 0, 1694592294, 0, 0, 0, 3389184589, + 0, 0, 0, 2483401882, 0, 0, 0, 671836468, 0, 0, 0, 1343672936, 0, 0, 0, 2687345872, + 0, 0, 0, 1079724449, 0, 0, 0, 2159448898, 0, 0, 0, 23930501, 0, 0, 0, 47861002, + 0, 0, 0, 95722004, 0, 0, 0, 191444008, 0, 0, 0, 382888016, 0, 0, 0, 765776032, + 0, 0, 0, 1531552064, 0, 0, 0, 341393499, 0, 0, 0, 682786998, 0, 0, 0, 1365573997, + }, + { + 0, 0, 0, 0, 1769338744, 0, 0, 0, 3538677488, 0, 0, 0, 2782387681, 0, 0, 0, + 1269808067, 0, 0, 0, 2539616134, 0, 0, 0, 784264973, 0, 0, 0, 1568529946, 0, 0, 0, + 3137059892, 0, 0, 0, 1979152488, 0, 0, 0, 3958304976, 0, 0, 0, 3621642656, 0, 0, 0, + 2948318016, 0, 0, 0, 1601668737, 0, 0, 0, 3203337474, 0, 0, 0, 2111707652, 0, 0, 0, + 4223415304, 0, 0, 0, 4151863312, 0, 0, 0, 4008759328, 0, 0, 0, 3722551361, 0, 0, 0, + 3150135426, 0, 0, 0, 2005303557, 0, 0, 0, 4010607114, 0, 0, 0, 3726246932, 0, 0, 0, + 3157526569, 0, 0, 0, 2020085842, 0, 0, 0, 2577782749, 0, 0, 0, 860598203, 0, 0, 0, + 1721196407, 0, 0, 0, 3442392815, 0, 0, 0, 2589818334, 0, 0, 0, 884669372, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4294705152, 0, 0, + 0, 4294443008, 0, 0, 0, 4293918720, 0, 0, 0, 4292870144, 0, 0, 0, 4290772992, 0, 0, + 0, 4286578688, 0, 0, 0, 4278190080, 0, 0, 0, 4261412864, 0, 0, 0, 4227858432, 0, 0, + 0, 4160749568, 0, 0, 0, 4026531841, 0, 0, 0, 3758096387, 0, 0, 0, 3221225478, 0, 0, + 0, 2147483660, 0, 0, 0, 24, 0, 0, 0, 48, 0, 0, 0, 96, 0, 0, + 0, 192, 0, 0, 0, 384, 0, 0, 0, 768, 0, 0, 0, 1536, 0, 0, + 0, 3072, 0, 0, 0, 6144, 0, 0, 0, 12288, 0, 0, 0, 24576, 0, 0, + 0, 49152, 0, 0, 0, 98304, 0, 0, 0, 4294901760, 0, 0, 0, 4294836224, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 262152, 0, 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, + 0, 0, 4194432, 0, 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, + 0, 0, 67110912, 0, 0, 0, 134221824, 0, 0, 0, 268443649, 0, 0, 0, 536887298, 0, + 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, 0, 0, 131092, 0, 0, 0, 32, 0, + 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, + 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, + 0, 0, 16384, 0, 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3732057110, + 0, 0, 0, 3169146925, 0, 0, 0, 2043326555, 0, 0, 0, 4086653111, 0, 0, 0, 3878338927, + 0, 0, 0, 3461710558, 0, 0, 0, 2628453820, 0, 0, 0, 961940345, 0, 0, 0, 1923880691, + 0, 0, 0, 3847761383, 0, 0, 0, 3400555471, 0, 0, 0, 2506143647, 0, 0, 0, 717319998, + 0, 0, 0, 1434639996, 0, 0, 0, 2869279993, 0, 0, 0, 1443592691, 0, 0, 0, 2887185382, + 0, 0, 0, 1479403468, 0, 0, 0, 2958806937, 0, 0, 0, 1622646578, 0, 0, 0, 3245293157, + 0, 0, 0, 2195619018, 0, 0, 0, 3687732610, 0, 0, 0, 3080497925, 0, 0, 0, 1866028555, + }, + { + 0, 0, 0, 0, 2045181768, 0, 0, 0, 4090363536, 0, 0, 0, 3885759776, 0, 0, 0, + 3476552256, 0, 0, 0, 2658137216, 0, 0, 0, 1021307136, 0, 0, 0, 2042614272, 0, 0, 0, + 4085228544, 0, 0, 0, 3875489792, 0, 0, 0, 3456012289, 0, 0, 0, 2617057282, 0, 0, 0, + 939147269, 0, 0, 0, 1878294539, 0, 0, 0, 3756589078, 0, 0, 0, 3218210860, 0, 0, 0, + 2141454425, 0, 0, 0, 4282908850, 0, 0, 0, 4270850404, 0, 0, 0, 4246733513, 0, 0, 0, + 4198499730, 0, 0, 0, 4102032165, 0, 0, 0, 3909097035, 0, 0, 0, 3523226774, 0, 0, 0, + 2751486253, 0, 0, 0, 1208005210, 0, 0, 0, 3924270077, 0, 0, 0, 3553572858, 0, 0, 0, + 2812178420, 0, 0, 0, 1329389545, 0, 0, 0, 2658779090, 0, 0, 0, 1022590884, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, + 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, + 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, + 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, + 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, + 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554432, 0, 0, + 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, 0, 536870917, 0, 0, + 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1073774613, 0, 0, 0, 2147549226, 0, 0, 0, 131156, 0, 0, 0, 262312, 0, + 0, 0, 524624, 0, 0, 0, 1049248, 0, 0, 0, 2098496, 0, 0, 0, 4196992, 0, + 0, 0, 8393984, 0, 0, 0, 16787968, 0, 0, 0, 33575936, 0, 0, 0, 67151873, 0, + 0, 0, 134303746, 0, 0, 0, 268607492, 0, 0, 0, 537214984, 0, 0, 0, 655364, 0, + 0, 0, 1310728, 0, 0, 0, 2621456, 0, 0, 0, 5242912, 0, 0, 0, 10485824, 0, + 0, 0, 20971648, 0, 0, 0, 41943296, 0, 0, 0, 83886592, 0, 0, 0, 167773184, 0, + 0, 0, 335546369, 0, 0, 0, 671092738, 0, 0, 0, 1342185477, 0, 0, 0, 2684370954, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2033130968, + 0, 0, 0, 4066261936, 0, 0, 0, 3837556576, 0, 0, 0, 3380145857, 0, 0, 0, 2465324418, + 0, 0, 0, 635681541, 0, 0, 0, 1271363082, 0, 0, 0, 2542726165, 0, 0, 0, 790485035, + 0, 0, 0, 1580970071, 0, 0, 0, 3161940143, 0, 0, 0, 2028912991, 0, 0, 0, 4057825983, + 0, 0, 0, 3820684671, 0, 0, 0, 3346402046, 0, 0, 0, 2397836796, 0, 0, 0, 500706297, + 0, 0, 0, 1001412595, 0, 0, 0, 2002825190, 0, 0, 0, 4005650380, 0, 0, 0, 3716333464, + 0, 0, 0, 3137699633, 0, 0, 0, 254141371, 0, 0, 0, 508282742, 0, 0, 0, 1016565484, + }, + { + 0, 0, 0, 0, 698651200, 0, 0, 0, 1397302401, 0, 0, 0, 2794604802, 0, 0, 0, + 1294242308, 0, 0, 0, 2588484616, 0, 0, 0, 882001936, 0, 0, 0, 1764003872, 0, 0, 0, + 3528007744, 0, 0, 0, 2761048193, 0, 0, 0, 1227129090, 0, 0, 0, 2454258180, 0, 0, 0, + 613549064, 0, 0, 0, 1227098128, 0, 0, 0, 2454196256, 0, 0, 0, 613425216, 0, 0, 0, + 1226850432, 0, 0, 0, 2453700864, 0, 0, 0, 612434432, 0, 0, 0, 1224868864, 0, 0, 0, + 2449737728, 0, 0, 0, 604508160, 0, 0, 0, 1209016320, 0, 0, 0, 2418032641, 0, 0, 0, + 541097986, 0, 0, 0, 1082195972, 0, 0, 0, 2829488713, 0, 0, 0, 1364010130, 0, 0, 0, + 2728020260, 0, 0, 0, 1161073224, 0, 0, 0, 2322146448, 0, 0, 0, 349325600, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 18087960, 0, 0, 0, 36175920, 0, 0, 0, 72351840, 0, 0, 0, 144703680, 0, + 0, 0, 289407361, 0, 0, 0, 578814722, 0, 0, 0, 1157629444, 0, 0, 0, 2315258888, 0, + 0, 0, 335550481, 0, 0, 0, 671100962, 0, 0, 0, 1342201925, 0, 0, 0, 2684403851, 0, + 0, 0, 1073840407, 0, 0, 0, 2147680814, 0, 0, 0, 394332, 0, 0, 0, 18352288, 0, + 0, 0, 36704576, 0, 0, 0, 73409152, 0, 0, 0, 146818304, 0, 0, 0, 293636608, 0, + 0, 0, 587273216, 0, 0, 0, 1174546432, 0, 0, 0, 2349092864, 0, 0, 0, 403218432, 0, + 0, 0, 806436865, 0, 0, 0, 1612873731, 0, 0, 0, 3225747462, 0, 0, 0, 2156527628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2199499341, + 0, 0, 0, 104031387, 0, 0, 0, 208062775, 0, 0, 0, 416125551, 0, 0, 0, 832251102, + 0, 0, 0, 1664502205, 0, 0, 0, 3329004410, 0, 0, 0, 2363041525, 0, 0, 0, 431115754, + 0, 0, 0, 862231508, 0, 0, 0, 1724463016, 0, 0, 0, 3448926032, 0, 0, 0, 2602884769, + 0, 0, 0, 910802242, 0, 0, 0, 1821604484, 0, 0, 0, 3643208968, 0, 0, 0, 2991450640, + 0, 0, 0, 1687933984, 0, 0, 0, 3375867968, 0, 0, 0, 2456768640, 0, 0, 0, 618569985, + 0, 0, 0, 1237139970, 0, 0, 0, 274937417, 0, 0, 0, 549874835, 0, 0, 0, 1099749670, + }, + { + 0, 0, 0, 0, 1226833920, 0, 0, 0, 2453667840, 0, 0, 0, 612368384, 0, 0, 0, + 1224736768, 0, 0, 0, 2449473536, 0, 0, 0, 603979776, 0, 0, 0, 1207959552, 0, 0, 0, + 2415919105, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, 0, 0, 2147483657, 0, 0, 0, + 18, 0, 0, 0, 36, 0, 0, 0, 72, 0, 0, 0, 144, 0, 0, 0, + 288, 0, 0, 0, 576, 0, 0, 0, 1152, 0, 0, 0, 2304, 0, 0, 0, + 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, 0, 0, 0, + 73728, 0, 0, 0, 147456, 0, 0, 0, 1227128832, 0, 0, 0, 2454257664, 0, 0, 0, + 613548032, 0, 0, 0, 1227096064, 0, 0, 0, 2454192128, 0, 0, 0, 613416960, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 655360, 0, 0, 0, 1310720, 0, 0, + 0, 2621440, 0, 0, 0, 5242880, 0, 0, 0, 10485760, 0, 0, 0, 20971520, 0, 0, + 0, 41943040, 0, 0, 0, 83886080, 0, 0, 0, 167772161, 0, 0, 0, 335544322, 0, 0, + 0, 671088644, 0, 0, 0, 1342177288, 0, 0, 0, 2684354577, 0, 0, 0, 1073741858, 0, 0, + 0, 2147483716, 0, 0, 0, 136, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1417720836, 0, 0, 0, 2835441672, 0, 0, 0, 1375916048, 0, 0, 0, 2751832097, 0, + 0, 0, 1208696898, 0, 0, 0, 2417393796, 0, 0, 0, 539820296, 0, 0, 0, 1079640592, 0, + 0, 0, 2159281184, 0, 0, 0, 23595072, 0, 0, 0, 47190144, 0, 0, 0, 94380288, 0, + 0, 0, 188760576, 0, 0, 0, 377521152, 0, 0, 0, 755042304, 0, 0, 0, 243442693, 0, + 0, 0, 486885387, 0, 0, 0, 973770774, 0, 0, 0, 1947541549, 0, 0, 0, 3895083098, 0, + 0, 0, 3495198900, 0, 0, 0, 2695430504, 0, 0, 0, 1095893712, 0, 0, 0, 2191787424, 0, + 0, 0, 88607552, 0, 0, 0, 177215104, 0, 0, 0, 354430209, 0, 0, 0, 708860418, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4196822677, + 0, 0, 0, 4098678059, 0, 0, 0, 3902388822, 0, 0, 0, 3509810348, 0, 0, 0, 2724653400, + 0, 0, 0, 1154339505, 0, 0, 0, 2308679010, 0, 0, 0, 322390725, 0, 0, 0, 644781450, + 0, 0, 0, 1289562901, 0, 0, 0, 2579125802, 0, 0, 0, 863284308, 0, 0, 0, 1726568616, + 0, 0, 0, 3453137233, 0, 0, 0, 2611307171, 0, 0, 0, 927647046, 0, 0, 0, 1855294093, + 0, 0, 0, 3710588186, 0, 0, 0, 3126209076, 0, 0, 0, 1957450856, 0, 0, 0, 3914901713, + 0, 0, 0, 3534836131, 0, 0, 0, 1598344658, 0, 0, 0, 3196689317, 0, 0, 0, 2098411338, + }, + { + 0, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, + 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554433, 0, 0, 0, + 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, 536870928, 0, 0, 0, + 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, 260, 0, 0, 0, + 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, 4160, 0, 0, 0, + 8320, 0, 0, 0, 16640, 0, 0, 0, 33280, 0, 0, 0, 66560, 0, 0, 0, + 133120, 0, 0, 0, 266240, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, + 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8421376, 0, 0, + 0, 16842752, 0, 0, 0, 33685504, 0, 0, 0, 67371008, 0, 0, 0, 134742017, 0, 0, + 0, 269484034, 0, 0, 0, 538968069, 0, 0, 0, 1077936138, 0, 0, 0, 2155872276, 0, 0, + 0, 16777256, 0, 0, 0, 33554512, 0, 0, 0, 67109024, 0, 0, 0, 134218049, 0, 0, + 0, 268436098, 0, 0, 0, 536872197, 0, 0, 0, 1073744394, 0, 0, 0, 2147488788, 0, 0, + 0, 10280, 0, 0, 0, 20560, 0, 0, 0, 41120, 0, 0, 0, 82240, 0, 0, + 0, 164480, 0, 0, 0, 328960, 0, 0, 0, 657920, 0, 0, 0, 1315840, 0, 0, + 0, 2631680, 0, 0, 0, 5263360, 0, 0, 0, 2105344, 0, 0, 0, 4210688, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 844445257, 0, 0, 0, 1688890514, 0, 0, 0, 3377781029, 0, 0, 0, 2460594762, 0, + 0, 0, 626222228, 0, 0, 0, 1252444457, 0, 0, 0, 2504888914, 0, 0, 0, 714810532, 0, + 0, 0, 1429621065, 0, 0, 0, 2859242131, 0, 0, 0, 1423516967, 0, 0, 0, 2847033934, 0, + 0, 0, 1399100572, 0, 0, 0, 2798201144, 0, 0, 0, 1301434992, 0, 0, 0, 2842792617, 0, + 0, 0, 1390617939, 0, 0, 0, 2781235878, 0, 0, 0, 1267504461, 0, 0, 0, 2535008923, 0, + 0, 0, 775050550, 0, 0, 0, 1550101100, 0, 0, 0, 3100202201, 0, 0, 0, 1905437106, 0, + 0, 0, 3810874212, 0, 0, 0, 3326781129, 0, 0, 0, 2358594962, 0, 0, 0, 422222628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2034053848, + 0, 0, 0, 4068107696, 0, 0, 0, 3841248097, 0, 0, 0, 3387528899, 0, 0, 0, 2480090502, + 0, 0, 0, 665213709, 0, 0, 0, 1330427418, 0, 0, 0, 2660854837, 0, 0, 0, 1026742378, + 0, 0, 0, 2053484756, 0, 0, 0, 4106969513, 0, 0, 0, 3918971730, 0, 0, 0, 3542976165, + 0, 0, 0, 2790985034, 0, 0, 0, 1287002772, 0, 0, 0, 2574005545, 0, 0, 0, 853043794, + 0, 0, 0, 1706087588, 0, 0, 0, 3412175176, 0, 0, 0, 2529383056, 0, 0, 0, 763798816, + 0, 0, 0, 1527597633, 0, 0, 0, 3475482203, 0, 0, 0, 2655997110, 0, 0, 0, 1017026924, + }, + { + 0, 0, 0, 0, 8390656, 0, 0, 0, 16781312, 0, 0, 0, 33562625, 0, 0, 0, + 67125250, 0, 0, 0, 134250500, 0, 0, 0, 268501000, 0, 0, 0, 537002000, 0, 0, 0, + 1074004000, 0, 0, 0, 2148008001, 0, 0, 0, 1048706, 0, 0, 0, 2097412, 0, 0, 0, + 4194824, 0, 0, 0, 8389648, 0, 0, 0, 16779296, 0, 0, 0, 33558593, 0, 0, 0, + 67117186, 0, 0, 0, 134234372, 0, 0, 0, 268468744, 0, 0, 0, 536937488, 0, 0, 0, + 1073874976, 0, 0, 0, 2147749953, 0, 0, 0, 532610, 0, 0, 0, 1065220, 0, 0, 0, + 2130440, 0, 0, 0, 4260880, 0, 0, 0, 131104, 0, 0, 0, 262208, 0, 0, 0, + 524416, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195328, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 591396869, 0, 0, + 0, 1182793738, 0, 0, 0, 2365587477, 0, 0, 0, 436207659, 0, 0, 0, 872415319, 0, 0, + 0, 1744830638, 0, 0, 0, 3489661276, 0, 0, 0, 2684355257, 0, 0, 0, 1073743218, 0, 0, + 0, 2147486436, 0, 0, 0, 5576, 0, 0, 0, 11152, 0, 0, 0, 22304, 0, 0, + 0, 44608, 0, 0, 0, 89216, 0, 0, 0, 178432, 0, 0, 0, 356864, 0, 0, + 0, 713728, 0, 0, 0, 1427456, 0, 0, 0, 2854912, 0, 0, 0, 5709824, 0, 0, + 0, 11419648, 0, 0, 0, 22839296, 0, 0, 0, 45678592, 0, 0, 0, 91357184, 0, 0, + 0, 182714369, 0, 0, 0, 365428738, 0, 0, 0, 147849217, 0, 0, 0, 295698434, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3001529316, 0, 0, 0, 1708091336, 0, 0, 0, 3416182673, 0, 0, 0, 2537398050, 0, + 0, 0, 779828804, 0, 0, 0, 1559657609, 0, 0, 0, 3119315218, 0, 0, 0, 1943663140, 0, + 0, 0, 3887326281, 0, 0, 0, 3479685267, 0, 0, 0, 2664403238, 0, 0, 0, 1033839181, 0, + 0, 0, 2067678363, 0, 0, 0, 4135356726, 0, 0, 0, 3975746156, 0, 0, 0, 1796577085, 0, + 0, 0, 3593154171, 0, 0, 0, 2891341046, 0, 0, 0, 1487714796, 0, 0, 0, 2975429593, 0, + 0, 0, 1655891891, 0, 0, 0, 3311783783, 0, 0, 0, 2328600271, 0, 0, 0, 362233247, 0, + 0, 0, 724466494, 0, 0, 0, 1448932988, 0, 0, 0, 2897865977, 0, 0, 0, 1500764658, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30521863, + 0, 0, 0, 61043727, 0, 0, 0, 122087454, 0, 0, 0, 244174909, 0, 0, 0, 488349818, + 0, 0, 0, 976699637, 0, 0, 0, 1953399275, 0, 0, 0, 3906798551, 0, 0, 0, 3518629806, + 0, 0, 0, 2742292316, 0, 0, 0, 1189617336, 0, 0, 0, 2379234673, 0, 0, 0, 463502051, + 0, 0, 0, 927004102, 0, 0, 0, 1854008205, 0, 0, 0, 3708016410, 0, 0, 0, 3121065525, + 0, 0, 0, 1947163754, 0, 0, 0, 3894327508, 0, 0, 0, 3493687720, 0, 0, 0, 2692408145, + 0, 0, 0, 1089848995, 0, 0, 0, 2151298880, 0, 0, 0, 7630465, 0, 0, 0, 15260931, + }, + { + 0, 0, 0, 0, 134300932, 0, 0, 0, 268601864, 0, 0, 0, 537203728, 0, 0, 0, + 1074407456, 0, 0, 0, 2148814913, 0, 0, 0, 2662530, 0, 0, 0, 5325060, 0, 0, 0, + 10650120, 0, 0, 0, 21300240, 0, 0, 0, 42600481, 0, 0, 0, 85200962, 0, 0, 0, + 170401925, 0, 0, 0, 340803850, 0, 0, 0, 681607700, 0, 0, 0, 1363215400, 0, 0, 0, + 2726430800, 0, 0, 0, 1157894304, 0, 0, 0, 2315788608, 0, 0, 0, 336609920, 0, 0, 0, + 673219840, 0, 0, 0, 1346439680, 0, 0, 0, 2692879361, 0, 0, 0, 1090791426, 0, 0, 0, + 2181582852, 0, 0, 0, 68198408, 0, 0, 0, 2098452, 0, 0, 0, 4196904, 0, 0, 0, + 8393808, 0, 0, 0, 16787616, 0, 0, 0, 33575233, 0, 0, 0, 67150466, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 661652741, 0, 0, + 0, 1323305483, 0, 0, 0, 2646610967, 0, 0, 0, 998254638, 0, 0, 0, 1996509277, 0, 0, + 0, 3993018554, 0, 0, 0, 3691069813, 0, 0, 0, 3087172330, 0, 0, 0, 1879377365, 0, 0, + 0, 3758754731, 0, 0, 0, 3222542166, 0, 0, 0, 2150117036, 0, 0, 0, 5266776, 0, 0, + 0, 10533552, 0, 0, 0, 21067104, 0, 0, 0, 42134208, 0, 0, 0, 84268416, 0, 0, + 0, 168536833, 0, 0, 0, 337073666, 0, 0, 0, 674147332, 0, 0, 0, 1348294664, 0, 0, + 0, 2696589329, 0, 0, 0, 1098211362, 0, 0, 0, 2196422724, 0, 0, 0, 97878152, 0, 0, + 0, 195756305, 0, 0, 0, 391512610, 0, 0, 0, 165413185, 0, 0, 0, 330826370, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2938358989, 0, 0, 0, 1581750682, 0, 0, 0, 3163501365, 0, 0, 0, 2032035435, 0, + 0, 0, 4064070870, 0, 0, 0, 3833174445, 0, 0, 0, 3371381594, 0, 0, 0, 2447795893, 0, + 0, 0, 600624491, 0, 0, 0, 1201248983, 0, 0, 0, 2402497966, 0, 0, 0, 510028637, 0, + 0, 0, 1020057274, 0, 0, 0, 2040114548, 0, 0, 0, 4080229097, 0, 0, 0, 1229289758, 0, + 0, 0, 2458579516, 0, 0, 0, 622191737, 0, 0, 0, 1244383475, 0, 0, 0, 2488766950, 0, + 0, 0, 682566604, 0, 0, 0, 1365133209, 0, 0, 0, 2730266419, 0, 0, 0, 1165565542, 0, + 0, 0, 2331131084, 0, 0, 0, 367294873, 0, 0, 0, 734589747, 0, 0, 0, 1469179494, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33833481, + 0, 0, 0, 67666962, 0, 0, 0, 135333924, 0, 0, 0, 270667848, 0, 0, 0, 541335697, + 0, 0, 0, 1082671394, 0, 0, 0, 2165342788, 0, 0, 0, 35718281, 0, 0, 0, 71436563, + 0, 0, 0, 142873126, 0, 0, 0, 285746252, 0, 0, 0, 571492505, 0, 0, 0, 1142985010, + 0, 0, 0, 2285970021, 0, 0, 0, 276972746, 0, 0, 0, 553945492, 0, 0, 0, 1107890985, + 0, 0, 0, 2215781970, 0, 0, 0, 136596644, 0, 0, 0, 273193289, 0, 0, 0, 546386578, + 0, 0, 0, 1092773156, 0, 0, 0, 2151712833, 0, 0, 0, 8458370, 0, 0, 0, 16916740, + }, + { + 0, 0, 0, 0, 826542200, 0, 0, 0, 1653084401, 0, 0, 0, 3306168803, 0, 0, 0, + 2317370310, 0, 0, 0, 339773324, 0, 0, 0, 679546648, 0, 0, 0, 1359093296, 0, 0, 0, + 2718186592, 0, 0, 0, 1141405888, 0, 0, 0, 2282811777, 0, 0, 0, 270656258, 0, 0, 0, + 541312516, 0, 0, 0, 1082625032, 0, 0, 0, 2165250065, 0, 0, 0, 35532835, 0, 0, 0, + 71065670, 0, 0, 0, 142131340, 0, 0, 0, 284262680, 0, 0, 0, 568525360, 0, 0, 0, + 1137050721, 0, 0, 0, 2274101442, 0, 0, 0, 253235589, 0, 0, 0, 506471179, 0, 0, 0, + 1012942358, 0, 0, 0, 2025884716, 0, 0, 0, 3234140193, 0, 0, 0, 2173313091, 0, 0, 0, + 51658887, 0, 0, 0, 103317775, 0, 0, 0, 206635550, 0, 0, 0, 413271100, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2815455249, 0, 0, + 0, 1335943203, 0, 0, 0, 2671886407, 0, 0, 0, 1048805518, 0, 0, 0, 2097611036, 0, 0, + 0, 4195222072, 0, 0, 0, 4095476849, 0, 0, 0, 3895986402, 0, 0, 0, 3497005508, 0, 0, + 0, 2699043721, 0, 0, 0, 1103120146, 0, 0, 0, 2206240292, 0, 0, 0, 117513288, 0, 0, + 0, 235026577, 0, 0, 0, 470053155, 0, 0, 0, 940106310, 0, 0, 0, 1880212621, 0, 0, + 0, 3760425243, 0, 0, 0, 3225883190, 0, 0, 0, 2156799084, 0, 0, 0, 18630872, 0, 0, + 0, 37261744, 0, 0, 0, 74523488, 0, 0, 0, 149046977, 0, 0, 0, 298093954, 0, 0, + 0, 596187909, 0, 0, 0, 1192375818, 0, 0, 0, 703863812, 0, 0, 0, 1407727624, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1801783692, 0, 0, 0, 3603567385, 0, 0, 0, 2912167474, 0, 0, 0, 1529367653, 0, + 0, 0, 3058735306, 0, 0, 0, 1822503316, 0, 0, 0, 3645006633, 0, 0, 0, 2995045970, 0, + 0, 0, 1695124645, 0, 0, 0, 3390249290, 0, 0, 0, 2485531285, 0, 0, 0, 676095274, 0, + 0, 0, 1352190548, 0, 0, 0, 2704381097, 0, 0, 0, 1113794898, 0, 0, 0, 4020460328, 0, + 0, 0, 3745953360, 0, 0, 0, 3196939425, 0, 0, 0, 2098911554, 0, 0, 0, 4197823108, 0, + 0, 0, 4100678921, 0, 0, 0, 3906390547, 0, 0, 0, 3517813798, 0, 0, 0, 2740660300, 0, + 0, 0, 1186353304, 0, 0, 0, 2372706609, 0, 0, 0, 450445923, 0, 0, 0, 900891846, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67141650, + 0, 0, 0, 134283300, 0, 0, 0, 268566600, 0, 0, 0, 537133200, 0, 0, 0, 1074266400, + 0, 0, 0, 2148532800, 0, 0, 0, 2098304, 0, 0, 0, 4196609, 0, 0, 0, 8393218, + 0, 0, 0, 16786436, 0, 0, 0, 33572873, 0, 0, 0, 67145746, 0, 0, 0, 134291492, + 0, 0, 0, 268582984, 0, 0, 0, 537165968, 0, 0, 0, 1074331936, 0, 0, 0, 2148663872, + 0, 0, 0, 2360448, 0, 0, 0, 4720897, 0, 0, 0, 9441794, 0, 0, 0, 18883588, + 0, 0, 0, 37767176, 0, 0, 0, 8392706, 0, 0, 0, 16785412, 0, 0, 0, 33570825, + }, + { + 0, 0, 0, 0, 2499687121, 0, 0, 0, 704406946, 0, 0, 0, 1408813893, 0, 0, 0, + 2817627786, 0, 0, 0, 1340288277, 0, 0, 0, 2680576554, 0, 0, 0, 1066185813, 0, 0, 0, + 2132371627, 0, 0, 0, 4264743254, 0, 0, 0, 4234519213, 0, 0, 0, 4174071131, 0, 0, 0, + 4053174967, 0, 0, 0, 3811382638, 0, 0, 0, 3327797980, 0, 0, 0, 2360628665, 0, 0, 0, + 426290034, 0, 0, 0, 852580069, 0, 0, 0, 1705160138, 0, 0, 0, 3410320276, 0, 0, 0, + 2525673256, 0, 0, 0, 756379216, 0, 0, 0, 1512758433, 0, 0, 0, 3025516867, 0, 0, 0, + 1756066438, 0, 0, 0, 3512132877, 0, 0, 0, 911472843, 0, 0, 0, 1822945686, 0, 0, 0, + 3645891373, 0, 0, 0, 2996815450, 0, 0, 0, 1698663604, 0, 0, 0, 3397327208, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1432822912, 0, 0, + 0, 2865645824, 0, 0, 0, 1436324352, 0, 0, 0, 2872648704, 0, 0, 0, 1450330112, 0, 0, + 0, 2900660224, 0, 0, 0, 1506353153, 0, 0, 0, 3012706307, 0, 0, 0, 1730445319, 0, 0, + 0, 3460890639, 0, 0, 0, 2626813983, 0, 0, 0, 958660670, 0, 0, 0, 1917321341, 0, 0, + 0, 3834642683, 0, 0, 0, 3374318071, 0, 0, 0, 2453668846, 0, 0, 0, 612370397, 0, 0, + 0, 1224740795, 0, 0, 0, 2449481590, 0, 0, 0, 603995885, 0, 0, 0, 1207991771, 0, 0, + 0, 2415983542, 0, 0, 0, 536999789, 0, 0, 0, 1073999578, 0, 0, 0, 2147999156, 0, 0, + 0, 1031016, 0, 0, 0, 2062032, 0, 0, 0, 1431947552, 0, 0, 0, 2863895104, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2656366736, 0, 0, 0, 1017766176, 0, 0, 0, 2035532352, 0, 0, 0, 4071064704, 0, + 0, 0, 3847162112, 0, 0, 0, 3399356928, 0, 0, 0, 2503746561, 0, 0, 0, 712525826, 0, + 0, 0, 1425051652, 0, 0, 0, 2850103304, 0, 0, 0, 1405239313, 0, 0, 0, 2810478627, 0, + 0, 0, 1325989958, 0, 0, 0, 2651979917, 0, 0, 0, 1008992539, 0, 0, 0, 3860656807, 0, + 0, 0, 3426346319, 0, 0, 0, 2557725342, 0, 0, 0, 820483388, 0, 0, 0, 1640966776, 0, + 0, 0, 3281933552, 0, 0, 0, 2268899809, 0, 0, 0, 242832322, 0, 0, 0, 485664644, 0, + 0, 0, 971329289, 0, 0, 0, 1942658578, 0, 0, 0, 3885317156, 0, 0, 0, 3475667016, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2304, + 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, + 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, 0, 0, 0, 589824, + 0, 0, 0, 1179648, 0, 0, 0, 2359296, 0, 0, 0, 4718593, 0, 0, 0, 9437186, + 0, 0, 0, 18874372, 0, 0, 0, 37748744, 0, 0, 0, 75497488, 0, 0, 0, 150994976, + 0, 0, 0, 301989953, 0, 0, 0, 603979906, 0, 0, 0, 1207959812, 0, 0, 0, 2415919624, + 0, 0, 0, 536871952, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + }, + { + 0, 0, 0, 0, 2271571420, 0, 0, 0, 248175545, 0, 0, 0, 496351090, 0, 0, 0, + 992702181, 0, 0, 0, 1985404363, 0, 0, 0, 3970808727, 0, 0, 0, 3646650159, 0, 0, 0, + 2998333022, 0, 0, 0, 1701698748, 0, 0, 0, 3403397496, 0, 0, 0, 2511827697, 0, 0, 0, + 728688099, 0, 0, 0, 1457376199, 0, 0, 0, 2914752399, 0, 0, 0, 1534537503, 0, 0, 0, + 3069075006, 0, 0, 0, 1843182716, 0, 0, 0, 3686365432, 0, 0, 0, 3077763568, 0, 0, 0, + 1860559841, 0, 0, 0, 3721119683, 0, 0, 0, 3147272070, 0, 0, 0, 1999576845, 0, 0, 0, + 3999153690, 0, 0, 0, 3703340085, 0, 0, 0, 1042126263, 0, 0, 0, 2084252526, 0, 0, 0, + 4168505053, 0, 0, 0, 4042042811, 0, 0, 0, 3789118327, 0, 0, 0, 3283269358, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2740852545, 0, 0, + 0, 1186737794, 0, 0, 0, 2373475589, 0, 0, 0, 451983883, 0, 0, 0, 903967767, 0, 0, + 0, 1807935534, 0, 0, 0, 3615871068, 0, 0, 0, 2936774840, 0, 0, 0, 1578582385, 0, 0, + 0, 3157164770, 0, 0, 0, 2019362244, 0, 0, 0, 4038724489, 0, 0, 0, 3782481683, 0, 0, + 0, 3269996070, 0, 0, 0, 2245024844, 0, 0, 0, 195082393, 0, 0, 0, 390164786, 0, 0, + 0, 780329572, 0, 0, 0, 1560659145, 0, 0, 0, 3121318290, 0, 0, 0, 1947669285, 0, 0, + 0, 3895338570, 0, 0, 0, 3495709844, 0, 0, 0, 2696452393, 0, 0, 0, 1097937490, 0, 0, + 0, 2195874980, 0, 0, 0, 96782664, 0, 0, 0, 2832696784, 0, 0, 0, 1370426272, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2424557694, 0, 0, 0, 554148093, 0, 0, 0, 1108296186, 0, 0, 0, 2216592373, 0, + 0, 0, 138217450, 0, 0, 0, 276434901, 0, 0, 0, 552869802, 0, 0, 0, 1105739604, 0, + 0, 0, 2211479208, 0, 0, 0, 127991121, 0, 0, 0, 255982243, 0, 0, 0, 511964486, 0, + 0, 0, 1023928972, 0, 0, 0, 2047857944, 0, 0, 0, 4095715888, 0, 0, 0, 2025620510, 0, + 0, 0, 4051241021, 0, 0, 0, 3807514746, 0, 0, 0, 3320062196, 0, 0, 0, 2345157096, 0, + 0, 0, 395346896, 0, 0, 0, 790693792, 0, 0, 0, 1581387585, 0, 0, 0, 3162775171, 0, + 0, 0, 2030583047, 0, 0, 0, 4061166095, 0, 0, 0, 3827364895, 0, 0, 0, 3359762495, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8390658, + 0, 0, 0, 16781316, 0, 0, 0, 33562633, 0, 0, 0, 67125266, 0, 0, 0, 134250532, + 0, 0, 0, 268501064, 0, 0, 0, 537002128, 0, 0, 0, 1074004256, 0, 0, 0, 2148008512, + 0, 0, 0, 1049728, 0, 0, 0, 2099456, 0, 0, 0, 4198913, 0, 0, 0, 8397826, + 0, 0, 0, 16795652, 0, 0, 0, 33591305, 0, 0, 0, 67182610, 0, 0, 0, 134365220, + 0, 0, 0, 268730440, 0, 0, 0, 537460880, 0, 0, 0, 1074921760, 0, 0, 0, 2149843520, + 0, 0, 0, 4719745, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195329, + }, + { + 0, 0, 0, 0, 1739616249, 0, 0, 0, 3479232498, 0, 0, 0, 2663497700, 0, 0, 0, + 1032028104, 0, 0, 0, 2064056209, 0, 0, 0, 4128112418, 0, 0, 0, 3961257541, 0, 0, 0, + 3627547787, 0, 0, 0, 2960128279, 0, 0, 0, 1625289262, 0, 0, 0, 3250578525, 0, 0, 0, + 2206189754, 0, 0, 0, 117412213, 0, 0, 0, 234824426, 0, 0, 0, 469648853, 0, 0, 0, + 939297707, 0, 0, 0, 1878595415, 0, 0, 0, 3757190830, 0, 0, 0, 3219414364, 0, 0, 0, + 2143861433, 0, 0, 0, 4287722866, 0, 0, 0, 4280478436, 0, 0, 0, 4265989576, 0, 0, 0, + 4237011857, 0, 0, 0, 4179056419, 0, 0, 0, 2510209471, 0, 0, 0, 725451647, 0, 0, 0, + 1450903295, 0, 0, 0, 2901806591, 0, 0, 0, 1508645886, 0, 0, 0, 3017291772, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3220345106, 0, 0, + 0, 2145722916, 0, 0, 0, 4291445832, 0, 0, 0, 4287924368, 0, 0, 0, 4280881440, 0, 0, + 0, 4266795584, 0, 0, 0, 4238623872, 0, 0, 0, 4182280448, 0, 0, 0, 4069593601, 0, 0, + 0, 3844219907, 0, 0, 0, 3393472519, 0, 0, 0, 2491977742, 0, 0, 0, 688988188, 0, 0, + 0, 1377976376, 0, 0, 0, 2755952753, 0, 0, 0, 1216938211, 0, 0, 0, 2433876422, 0, 0, + 0, 572785549, 0, 0, 0, 1145571098, 0, 0, 0, 2291142197, 0, 0, 0, 287317098, 0, 0, + 0, 574634197, 0, 0, 0, 1149268394, 0, 0, 0, 2298536789, 0, 0, 0, 302106282, 0, 0, + 0, 604212565, 0, 0, 0, 1208425131, 0, 0, 0, 805086276, 0, 0, 0, 1610172553, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 70539992, 0, 0, 0, 141079985, 0, 0, 0, 282159971, 0, 0, 0, 564319943, 0, + 0, 0, 1128639887, 0, 0, 0, 2257279774, 0, 0, 0, 219592253, 0, 0, 0, 439184507, 0, + 0, 0, 878369014, 0, 0, 0, 1756738029, 0, 0, 0, 3513476059, 0, 0, 0, 2731984823, 0, + 0, 0, 1169002351, 0, 0, 0, 2338004702, 0, 0, 0, 381042109, 0, 0, 0, 693641634, 0, + 0, 0, 1387283269, 0, 0, 0, 2774566539, 0, 0, 0, 1254165782, 0, 0, 0, 2508331565, 0, + 0, 0, 721695834, 0, 0, 0, 1443391669, 0, 0, 0, 2886783339, 0, 0, 0, 1478599382, 0, + 0, 0, 2957198765, 0, 0, 0, 1619430235, 0, 0, 0, 3238860470, 0, 0, 0, 2182753644, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144703526, + 0, 0, 0, 289407053, 0, 0, 0, 578814107, 0, 0, 0, 1157628214, 0, 0, 0, 2315256429, + 0, 0, 0, 335545562, 0, 0, 0, 671091124, 0, 0, 0, 1342182248, 0, 0, 0, 2684364496, + 0, 0, 0, 1073761696, 0, 0, 0, 2147523392, 0, 0, 0, 79488, 0, 0, 0, 158976, + 0, 0, 0, 317952, 0, 0, 0, 635904, 0, 0, 0, 1271808, 0, 0, 0, 2543616, + 0, 0, 0, 5087233, 0, 0, 0, 10174466, 0, 0, 0, 20348932, 0, 0, 0, 40697864, + 0, 0, 0, 81395729, 0, 0, 0, 18087940, 0, 0, 0, 36175881, 0, 0, 0, 72351763, + }, + { + 0, 0, 0, 0, 1915396941, 0, 0, 0, 3830793883, 0, 0, 0, 3366620471, 0, 0, 0, + 2438273647, 0, 0, 0, 581579999, 0, 0, 0, 1163159998, 0, 0, 0, 2326319996, 0, 0, 0, + 357672696, 0, 0, 0, 715345393, 0, 0, 0, 1430690786, 0, 0, 0, 2861381572, 0, 0, 0, + 1427795848, 0, 0, 0, 2855591696, 0, 0, 0, 1416216096, 0, 0, 0, 2832432193, 0, 0, 0, + 1369897090, 0, 0, 0, 2739794180, 0, 0, 0, 1184621065, 0, 0, 0, 2369242131, 0, 0, 0, + 443516967, 0, 0, 0, 887033934, 0, 0, 0, 1774067868, 0, 0, 0, 3548135736, 0, 0, 0, + 2801304176, 0, 0, 0, 1307641056, 0, 0, 0, 3922242189, 0, 0, 0, 3549517082, 0, 0, 0, + 2804066868, 0, 0, 0, 1313166441, 0, 0, 0, 2626332883, 0, 0, 0, 957698470, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 348140706, 0, 0, + 0, 696281412, 0, 0, 0, 1392562824, 0, 0, 0, 2785125649, 0, 0, 0, 1275284003, 0, 0, + 0, 2550568007, 0, 0, 0, 806168719, 0, 0, 0, 1612337439, 0, 0, 0, 3224674878, 0, 0, + 0, 2154382460, 0, 0, 0, 13797624, 0, 0, 0, 27595248, 0, 0, 0, 55190496, 0, 0, + 0, 110380992, 0, 0, 0, 220761985, 0, 0, 0, 441523971, 0, 0, 0, 883047943, 0, 0, + 0, 1766095886, 0, 0, 0, 3532191772, 0, 0, 0, 2769416249, 0, 0, 0, 1243865203, 0, 0, + 0, 2487730406, 0, 0, 0, 680493516, 0, 0, 0, 1360987032, 0, 0, 0, 2721974065, 0, 0, + 0, 1148980834, 0, 0, 0, 2297961669, 0, 0, 0, 87035176, 0, 0, 0, 174070353, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1770227281, 0, 0, 0, 3540454563, 0, 0, 0, 2785941830, 0, 0, 0, 1276916364, 0, + 0, 0, 2553832729, 0, 0, 0, 812698162, 0, 0, 0, 1625396325, 0, 0, 0, 3250792650, 0, + 0, 0, 2206618004, 0, 0, 0, 118268713, 0, 0, 0, 236537426, 0, 0, 0, 473074852, 0, + 0, 0, 946149705, 0, 0, 0, 1892299411, 0, 0, 0, 3784598822, 0, 0, 0, 2863352860, 0, + 0, 0, 1431738424, 0, 0, 0, 2863476848, 0, 0, 0, 1431986401, 0, 0, 0, 2863972803, 0, + 0, 0, 1432978310, 0, 0, 0, 2865956620, 0, 0, 0, 1436945945, 0, 0, 0, 2873891890, 0, + 0, 0, 1452816485, 0, 0, 0, 2905632970, 0, 0, 0, 1516298644, 0, 0, 0, 3032597288, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135049252, + 0, 0, 0, 270098504, 0, 0, 0, 540197008, 0, 0, 0, 1080394017, 0, 0, 0, 2160788035, + 0, 0, 0, 26608774, 0, 0, 0, 53217549, 0, 0, 0, 106435098, 0, 0, 0, 212870196, + 0, 0, 0, 425740393, 0, 0, 0, 851480786, 0, 0, 0, 1702961572, 0, 0, 0, 3405923145, + 0, 0, 0, 2516878995, 0, 0, 0, 738790694, 0, 0, 0, 1477581388, 0, 0, 0, 2955162776, + 0, 0, 0, 1615358257, 0, 0, 0, 3230716514, 0, 0, 0, 2166465732, 0, 0, 0, 37964168, + 0, 0, 0, 75928336, 0, 0, 0, 16881156, 0, 0, 0, 33762313, 0, 0, 0, 67524626, + }, + { + 0, 0, 0, 0, 4038889453, 0, 0, 0, 3782811611, 0, 0, 0, 3270655926, 0, 0, 0, + 2246344557, 0, 0, 0, 197721819, 0, 0, 0, 395443639, 0, 0, 0, 790887279, 0, 0, 0, + 1581774559, 0, 0, 0, 3163549119, 0, 0, 0, 2032130942, 0, 0, 0, 4064261884, 0, 0, 0, + 3833556473, 0, 0, 0, 3372145651, 0, 0, 0, 2449324007, 0, 0, 0, 603680719, 0, 0, 0, + 1207361439, 0, 0, 0, 2414722878, 0, 0, 0, 534478461, 0, 0, 0, 1068956923, 0, 0, 0, + 2137913847, 0, 0, 0, 4275827694, 0, 0, 0, 4256688093, 0, 0, 0, 4218408890, 0, 0, 0, + 4141850484, 0, 0, 0, 3988733673, 0, 0, 0, 734196287, 0, 0, 0, 1468392575, 0, 0, 0, + 2936785150, 0, 0, 0, 1578603005, 0, 0, 0, 3157206011, 0, 0, 0, 2019444726, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2692483345, 0, 0, + 0, 1089999394, 0, 0, 0, 2179998788, 0, 0, 0, 65030280, 0, 0, 0, 130060560, 0, 0, + 0, 260121121, 0, 0, 0, 520242243, 0, 0, 0, 1040484486, 0, 0, 0, 2080968972, 0, 0, + 0, 4161937944, 0, 0, 0, 4028908593, 0, 0, 0, 3762849891, 0, 0, 0, 3230732486, 0, 0, + 0, 2166497676, 0, 0, 0, 38028056, 0, 0, 0, 76056112, 0, 0, 0, 152112225, 0, 0, + 0, 304224450, 0, 0, 0, 608448901, 0, 0, 0, 1216897803, 0, 0, 0, 2433795606, 0, 0, + 0, 572623917, 0, 0, 0, 1145247834, 0, 0, 0, 2290495669, 0, 0, 0, 286024042, 0, 0, + 0, 572048085, 0, 0, 0, 1144096170, 0, 0, 0, 673120836, 0, 0, 0, 1346241672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 302254233, 0, 0, 0, 604508466, 0, 0, 0, 1209016932, 0, 0, 0, 2418033865, 0, + 0, 0, 541100435, 0, 0, 0, 1082200870, 0, 0, 0, 2164401740, 0, 0, 0, 33836184, 0, + 0, 0, 67672369, 0, 0, 0, 135344738, 0, 0, 0, 270689477, 0, 0, 0, 541378955, 0, + 0, 0, 1082757911, 0, 0, 0, 2165515822, 0, 0, 0, 36064348, 0, 0, 0, 373854240, 0, + 0, 0, 747708480, 0, 0, 0, 1495416961, 0, 0, 0, 2990833922, 0, 0, 0, 1686700548, 0, + 0, 0, 3373401096, 0, 0, 0, 2451834897, 0, 0, 0, 608702498, 0, 0, 0, 1217404996, 0, + 0, 0, 2434809993, 0, 0, 0, 574652691, 0, 0, 0, 1149305382, 0, 0, 0, 2298610764, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3961969350, + 0, 0, 0, 3628971405, 0, 0, 0, 2962975514, 0, 0, 0, 1630983732, 0, 0, 0, 3261967464, + 0, 0, 0, 2228967633, 0, 0, 0, 162967970, 0, 0, 0, 325935940, 0, 0, 0, 651871880, + 0, 0, 0, 1303743760, 0, 0, 0, 2607487520, 0, 0, 0, 920007744, 0, 0, 0, 1840015488, + 0, 0, 0, 3680030976, 0, 0, 0, 3065094657, 0, 0, 0, 1835222019, 0, 0, 0, 3670444038, + 0, 0, 0, 3045920780, 0, 0, 0, 1796874265, 0, 0, 0, 3593748531, 0, 0, 0, 2892529767, + 0, 0, 0, 1490092239, 0, 0, 0, 1568987992, 0, 0, 0, 3137975985, 0, 0, 0, 1980984675, + }, + { + 0, 0, 0, 0, 3824344628, 0, 0, 0, 3353721960, 0, 0, 0, 2412476624, 0, 0, 0, + 529985953, 0, 0, 0, 1059971907, 0, 0, 0, 2119943815, 0, 0, 0, 4239887631, 0, 0, 0, + 4184807967, 0, 0, 0, 4074648638, 0, 0, 0, 3854329981, 0, 0, 0, 3413692666, 0, 0, 0, + 2532418036, 0, 0, 0, 769868776, 0, 0, 0, 1539737553, 0, 0, 0, 3079475106, 0, 0, 0, + 1863982917, 0, 0, 0, 3727965834, 0, 0, 0, 3160964373, 0, 0, 0, 2026961450, 0, 0, 0, + 4053922901, 0, 0, 0, 3812878506, 0, 0, 0, 3330789716, 0, 0, 0, 2366612137, 0, 0, 0, + 438256979, 0, 0, 0, 876513958, 0, 0, 0, 2341456760, 0, 0, 0, 387946225, 0, 0, 0, + 775892451, 0, 0, 0, 1551784902, 0, 0, 0, 3103569805, 0, 0, 0, 1912172314, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1473249760, 0, 0, + 0, 2946499520, 0, 0, 0, 1598031745, 0, 0, 0, 3196063490, 0, 0, 0, 2097159684, 0, 0, + 0, 4194319368, 0, 0, 0, 4093671441, 0, 0, 0, 3892375586, 0, 0, 0, 3489783876, 0, 0, + 0, 2684600457, 0, 0, 0, 1074233618, 0, 0, 0, 2148467236, 0, 0, 0, 1967176, 0, 0, + 0, 3934352, 0, 0, 0, 7868704, 0, 0, 0, 15737408, 0, 0, 0, 31474816, 0, 0, + 0, 62949632, 0, 0, 0, 125899264, 0, 0, 0, 251798529, 0, 0, 0, 503597059, 0, 0, + 0, 1007194118, 0, 0, 0, 2014388236, 0, 0, 0, 4028776473, 0, 0, 0, 3762585651, 0, 0, + 0, 3230204006, 0, 0, 0, 2165440716, 0, 0, 0, 1442054264, 0, 0, 0, 2884108528, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 536903683, 0, 0, 0, 1073807366, 0, 0, 0, 2147614732, 0, 0, 0, 262168, 0, + 0, 0, 524336, 0, 0, 0, 1048672, 0, 0, 0, 2097344, 0, 0, 0, 4194688, 0, + 0, 0, 8389376, 0, 0, 0, 16778752, 0, 0, 0, 33557504, 0, 0, 0, 67115008, 0, + 0, 0, 134230016, 0, 0, 0, 268460033, 0, 0, 0, 536920067, 0, 0, 0, 1610678276, 0, + 0, 0, 3221356552, 0, 0, 0, 2147745808, 0, 0, 0, 524320, 0, 0, 0, 1048640, 0, + 0, 0, 2097280, 0, 0, 0, 4194560, 0, 0, 0, 8389120, 0, 0, 0, 16778240, 0, + 0, 0, 33556480, 0, 0, 0, 67112960, 0, 0, 0, 134225920, 0, 0, 0, 268451841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3548126500, + 0, 0, 0, 2801285704, 0, 0, 0, 1307604113, 0, 0, 0, 2615208226, 0, 0, 0, 935449156, + 0, 0, 0, 1870898313, 0, 0, 0, 3741796627, 0, 0, 0, 3188625959, 0, 0, 0, 2082284622, + 0, 0, 0, 4164569244, 0, 0, 0, 4034171193, 0, 0, 0, 3773375091, 0, 0, 0, 3251782887, + 0, 0, 0, 2208598479, 0, 0, 0, 122229662, 0, 0, 0, 244459325, 0, 0, 0, 488918650, + 0, 0, 0, 977837300, 0, 0, 0, 1955674600, 0, 0, 0, 3911349200, 0, 0, 0, 3527731104, + 0, 0, 0, 2760494912, 0, 0, 0, 2590999460, 0, 0, 0, 887031625, 0, 0, 0, 1774063250, + }, + { + 0, 0, 0, 0, 1099834596, 0, 0, 0, 2199669192, 0, 0, 0, 104371089, 0, 0, 0, + 208742178, 0, 0, 0, 417484356, 0, 0, 0, 834968712, 0, 0, 0, 1669937425, 0, 0, 0, + 3339874850, 0, 0, 0, 2384782404, 0, 0, 0, 474597512, 0, 0, 0, 949195024, 0, 0, 0, + 1898390048, 0, 0, 0, 3796780096, 0, 0, 0, 3298592897, 0, 0, 0, 2302218499, 0, 0, 0, + 309469703, 0, 0, 0, 618939406, 0, 0, 0, 1237878812, 0, 0, 0, 2475757624, 0, 0, 0, + 656547953, 0, 0, 0, 1313095907, 0, 0, 0, 2626191815, 0, 0, 0, 957416334, 0, 0, 0, + 1914832669, 0, 0, 0, 3829665339, 0, 0, 0, 2298886291, 0, 0, 0, 302805287, 0, 0, 0, + 605610574, 0, 0, 0, 1211221148, 0, 0, 0, 2422442297, 0, 0, 0, 549917298, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1442838528, 0, 0, + 0, 2885677056, 0, 0, 0, 1476386816, 0, 0, 0, 2952773632, 0, 0, 0, 1610579969, 0, 0, + 0, 3221159938, 0, 0, 0, 2147352580, 0, 0, 0, 4294705160, 0, 0, 0, 4294443024, 0, 0, + 0, 4293918752, 0, 0, 0, 4292870208, 0, 0, 0, 4290773120, 0, 0, 0, 4286578944, 0, 0, + 0, 4278190592, 0, 0, 0, 4261413888, 0, 0, 0, 4227860480, 0, 0, 0, 4160753664, 0, 0, + 0, 4026540033, 0, 0, 0, 3758112771, 0, 0, 0, 3221258246, 0, 0, 0, 2147549196, 0, 0, + 0, 131096, 0, 0, 0, 262192, 0, 0, 0, 524384, 0, 0, 0, 1048768, 0, 0, + 0, 2097536, 0, 0, 0, 4195072, 0, 0, 0, 1434451456, 0, 0, 0, 2868902912, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 262152, 0, 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, + 0, 0, 4194432, 0, 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, + 0, 0, 67110912, 0, 0, 0, 134221824, 0, 0, 0, 268443649, 0, 0, 0, 536887298, 0, + 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, 0, 0, 131092, 0, 0, 0, 32, 0, + 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, + 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, + 0, 0, 16384, 0, 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357270623, + 0, 0, 0, 714541247, 0, 0, 0, 1429082494, 0, 0, 0, 2858164988, 0, 0, 0, 1421362680, + 0, 0, 0, 2842725361, 0, 0, 0, 1390483426, 0, 0, 0, 2780966853, 0, 0, 0, 1266966411, + 0, 0, 0, 2533932823, 0, 0, 0, 772898351, 0, 0, 0, 1545796702, 0, 0, 0, 3091593405, + 0, 0, 0, 1888219514, 0, 0, 0, 3776439028, 0, 0, 0, 3257910761, 0, 0, 0, 2220854227, + 0, 0, 0, 146741158, 0, 0, 0, 293482317, 0, 0, 0, 586964634, 0, 0, 0, 1173929269, + 0, 0, 0, 2347858538, 0, 0, 0, 44658827, 0, 0, 0, 89317655, 0, 0, 0, 178635311, + }, + { + 0, 0, 0, 0, 3400190380, 0, 0, 0, 2505413465, 0, 0, 0, 715859635, 0, 0, 0, + 1431719270, 0, 0, 0, 2863438540, 0, 0, 0, 1431909784, 0, 0, 0, 2863819568, 0, 0, 0, + 1432671840, 0, 0, 0, 2865343680, 0, 0, 0, 1435720064, 0, 0, 0, 2871440128, 0, 0, 0, + 1447912961, 0, 0, 0, 2895825923, 0, 0, 0, 1496684550, 0, 0, 0, 2993369100, 0, 0, 0, + 1691770904, 0, 0, 0, 3383541809, 0, 0, 0, 2472116322, 0, 0, 0, 649265349, 0, 0, 0, + 1298530698, 0, 0, 0, 2597061396, 0, 0, 0, 899155496, 0, 0, 0, 1798310993, 0, 0, 0, + 3596621986, 0, 0, 0, 2898276677, 0, 0, 0, 2469047078, 0, 0, 0, 643126861, 0, 0, 0, + 1286253722, 0, 0, 0, 2572507445, 0, 0, 0, 850047595, 0, 0, 0, 1700095190, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, + 0, 32, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, + 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, + 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, + 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, + 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, + 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, + 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483652, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1073774613, 0, 0, 0, 2147549226, 0, 0, 0, 131156, 0, 0, 0, 262312, 0, + 0, 0, 524624, 0, 0, 0, 1049248, 0, 0, 0, 2098496, 0, 0, 0, 4196992, 0, + 0, 0, 8393984, 0, 0, 0, 16787968, 0, 0, 0, 33575936, 0, 0, 0, 67151873, 0, + 0, 0, 134303746, 0, 0, 0, 268607492, 0, 0, 0, 537214984, 0, 0, 0, 655364, 0, + 0, 0, 1310728, 0, 0, 0, 2621456, 0, 0, 0, 5242912, 0, 0, 0, 10485824, 0, + 0, 0, 20971648, 0, 0, 0, 41943296, 0, 0, 0, 83886592, 0, 0, 0, 167773184, 0, + 0, 0, 335546369, 0, 0, 0, 671092738, 0, 0, 0, 1342185477, 0, 0, 0, 2684370954, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3265725291, + 0, 0, 0, 2236483287, 0, 0, 0, 177999279, 0, 0, 0, 355998558, 0, 0, 0, 711997116, + 0, 0, 0, 1423994233, 0, 0, 0, 2847988467, 0, 0, 0, 1401009639, 0, 0, 0, 2802019279, + 0, 0, 0, 1309071263, 0, 0, 0, 2618142526, 0, 0, 0, 941317756, 0, 0, 0, 1882635512, + 0, 0, 0, 3765271025, 0, 0, 0, 3235574755, 0, 0, 0, 2176182214, 0, 0, 0, 57397132, + 0, 0, 0, 114794264, 0, 0, 0, 229588528, 0, 0, 0, 459177056, 0, 0, 0, 918354113, + 0, 0, 0, 1836708227, 0, 0, 0, 408215661, 0, 0, 0, 816431322, 0, 0, 0, 1632862645, + }, + { + 0, 0, 0, 0, 2928597988, 0, 0, 0, 1562228680, 0, 0, 0, 3124457360, 0, 0, 0, + 1953947424, 0, 0, 0, 3907894849, 0, 0, 0, 3520822403, 0, 0, 0, 2746677510, 0, 0, 0, + 1198387725, 0, 0, 0, 2396775450, 0, 0, 0, 498583604, 0, 0, 0, 997167209, 0, 0, 0, + 1994334419, 0, 0, 0, 3988668839, 0, 0, 0, 3682370382, 0, 0, 0, 3069773468, 0, 0, 0, + 1844579640, 0, 0, 0, 3689159280, 0, 0, 0, 3083351264, 0, 0, 0, 1871735233, 0, 0, 0, + 3743470466, 0, 0, 0, 3191973636, 0, 0, 0, 2088979976, 0, 0, 0, 4177959953, 0, 0, 0, + 4060952610, 0, 0, 0, 3826937925, 0, 0, 0, 1723480943, 0, 0, 0, 3446961887, 0, 0, 0, + 2598956478, 0, 0, 0, 902945660, 0, 0, 0, 1805891321, 0, 0, 0, 3611782642, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 18087960, 0, 0, 0, 36175920, 0, 0, 0, 72351840, 0, 0, 0, 144703680, 0, + 0, 0, 289407361, 0, 0, 0, 578814722, 0, 0, 0, 1157629444, 0, 0, 0, 2315258888, 0, + 0, 0, 335550481, 0, 0, 0, 671100962, 0, 0, 0, 1342201925, 0, 0, 0, 2684403851, 0, + 0, 0, 1073840407, 0, 0, 0, 2147680814, 0, 0, 0, 394332, 0, 0, 0, 18352288, 0, + 0, 0, 36704576, 0, 0, 0, 73409152, 0, 0, 0, 146818304, 0, 0, 0, 293636608, 0, + 0, 0, 587273216, 0, 0, 0, 1174546432, 0, 0, 0, 2349092864, 0, 0, 0, 403218432, 0, + 0, 0, 806436865, 0, 0, 0, 1612873731, 0, 0, 0, 3225747462, 0, 0, 0, 2156527628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3612118070, + 0, 0, 0, 2929268845, 0, 0, 0, 1563570394, 0, 0, 0, 3127140788, 0, 0, 0, 1959314281, + 0, 0, 0, 3918628562, 0, 0, 0, 3542289829, 0, 0, 0, 2789612362, 0, 0, 0, 1284257428, + 0, 0, 0, 2568514856, 0, 0, 0, 842062417, 0, 0, 0, 1684124835, 0, 0, 0, 3368249671, + 0, 0, 0, 2441532046, 0, 0, 0, 588096797, 0, 0, 0, 1176193595, 0, 0, 0, 2352387190, + 0, 0, 0, 409807085, 0, 0, 0, 819614171, 0, 0, 0, 1639228342, 0, 0, 0, 3278456684, + 0, 0, 0, 2261946072, 0, 0, 0, 3672740230, 0, 0, 0, 3050513165, 0, 0, 0, 1806059035, + }, +}; + +// clang-format on +// clang-format off +static const unsigned int h_lfsr113_sequence_jump_matrices[LFSR113_JUMP_MATRICES][LFSR113_SIZE] = { + { + 0, 0, 0, 0, 2928597988, 0, 0, 0, 1562228680, 0, 0, 0, 3124457360, 0, 0, 0, + 1953947424, 0, 0, 0, 3907894849, 0, 0, 0, 3520822403, 0, 0, 0, 2746677510, 0, 0, 0, + 1198387725, 0, 0, 0, 2396775450, 0, 0, 0, 498583604, 0, 0, 0, 997167209, 0, 0, 0, + 1994334419, 0, 0, 0, 3988668839, 0, 0, 0, 3682370382, 0, 0, 0, 3069773468, 0, 0, 0, + 1844579640, 0, 0, 0, 3689159280, 0, 0, 0, 3083351264, 0, 0, 0, 1871735233, 0, 0, 0, + 3743470466, 0, 0, 0, 3191973636, 0, 0, 0, 2088979976, 0, 0, 0, 4177959953, 0, 0, 0, + 4060952610, 0, 0, 0, 3826937925, 0, 0, 0, 1723480943, 0, 0, 0, 3446961887, 0, 0, 0, + 2598956478, 0, 0, 0, 902945660, 0, 0, 0, 1805891321, 0, 0, 0, 3611782642, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1442838528, 0, 0, + 0, 2885677056, 0, 0, 0, 1476386816, 0, 0, 0, 2952773632, 0, 0, 0, 1610579969, 0, 0, + 0, 3221159938, 0, 0, 0, 2147352580, 0, 0, 0, 4294705160, 0, 0, 0, 4294443024, 0, 0, + 0, 4293918752, 0, 0, 0, 4292870208, 0, 0, 0, 4290773120, 0, 0, 0, 4286578944, 0, 0, + 0, 4278190592, 0, 0, 0, 4261413888, 0, 0, 0, 4227860480, 0, 0, 0, 4160753664, 0, 0, + 0, 4026540033, 0, 0, 0, 3758112771, 0, 0, 0, 3221258246, 0, 0, 0, 2147549196, 0, 0, + 0, 131096, 0, 0, 0, 262192, 0, 0, 0, 524384, 0, 0, 0, 1048768, 0, 0, + 0, 2097536, 0, 0, 0, 4195072, 0, 0, 0, 1434451456, 0, 0, 0, 2868902912, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 536903683, 0, 0, 0, 1073807366, 0, 0, 0, 2147614732, 0, 0, 0, 262168, 0, + 0, 0, 524336, 0, 0, 0, 1048672, 0, 0, 0, 2097344, 0, 0, 0, 4194688, 0, + 0, 0, 8389376, 0, 0, 0, 16778752, 0, 0, 0, 33557504, 0, 0, 0, 67115008, 0, + 0, 0, 134230016, 0, 0, 0, 268460033, 0, 0, 0, 536920067, 0, 0, 0, 1610678276, 0, + 0, 0, 3221356552, 0, 0, 0, 2147745808, 0, 0, 0, 524320, 0, 0, 0, 1048640, 0, + 0, 0, 2097280, 0, 0, 0, 4194560, 0, 0, 0, 8389120, 0, 0, 0, 16778240, 0, + 0, 0, 33556480, 0, 0, 0, 67112960, 0, 0, 0, 134225920, 0, 0, 0, 268451841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144703526, + 0, 0, 0, 289407053, 0, 0, 0, 578814107, 0, 0, 0, 1157628214, 0, 0, 0, 2315256429, + 0, 0, 0, 335545562, 0, 0, 0, 671091124, 0, 0, 0, 1342182248, 0, 0, 0, 2684364496, + 0, 0, 0, 1073761696, 0, 0, 0, 2147523392, 0, 0, 0, 79488, 0, 0, 0, 158976, + 0, 0, 0, 317952, 0, 0, 0, 635904, 0, 0, 0, 1271808, 0, 0, 0, 2543616, + 0, 0, 0, 5087233, 0, 0, 0, 10174466, 0, 0, 0, 20348932, 0, 0, 0, 40697864, + 0, 0, 0, 81395729, 0, 0, 0, 18087940, 0, 0, 0, 36175881, 0, 0, 0, 72351763, + }, + { + 0, 0, 0, 0, 1874778401, 0, 0, 0, 3749556802, 0, 0, 0, 3204146308, 0, 0, 0, + 2113325320, 0, 0, 0, 4226650640, 0, 0, 0, 4158333984, 0, 0, 0, 4021700672, 0, 0, 0, + 3748434048, 0, 0, 0, 3201900800, 0, 0, 0, 2108834304, 0, 0, 0, 4217668608, 0, 0, 0, + 4140369920, 0, 0, 0, 3985772545, 0, 0, 0, 3676577794, 0, 0, 0, 3058188292, 0, 0, 0, + 1821409288, 0, 0, 0, 3642818577, 0, 0, 0, 2990669858, 0, 0, 0, 1686372420, 0, 0, 0, + 3372744841, 0, 0, 0, 2450522386, 0, 0, 0, 606077476, 0, 0, 0, 1212154952, 0, 0, 0, + 2424309905, 0, 0, 0, 553652514, 0, 0, 0, 767490916, 0, 0, 0, 1534981833, 0, 0, 0, + 3069963666, 0, 0, 0, 1844960036, 0, 0, 0, 3689920072, 0, 0, 0, 3084872848, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, + 0, 32, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, + 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, + 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, + 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, + 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, + 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, + 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483652, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 262152, 0, 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, + 0, 0, 4194432, 0, 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, + 0, 0, 67110912, 0, 0, 0, 134221824, 0, 0, 0, 268443649, 0, 0, 0, 536887298, 0, + 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, 0, 0, 131092, 0, 0, 0, 32, 0, + 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, + 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, + 0, 0, 16384, 0, 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135049252, + 0, 0, 0, 270098504, 0, 0, 0, 540197008, 0, 0, 0, 1080394017, 0, 0, 0, 2160788035, + 0, 0, 0, 26608774, 0, 0, 0, 53217549, 0, 0, 0, 106435098, 0, 0, 0, 212870196, + 0, 0, 0, 425740393, 0, 0, 0, 851480786, 0, 0, 0, 1702961572, 0, 0, 0, 3405923145, + 0, 0, 0, 2516878995, 0, 0, 0, 738790694, 0, 0, 0, 1477581388, 0, 0, 0, 2955162776, + 0, 0, 0, 1615358257, 0, 0, 0, 3230716514, 0, 0, 0, 2166465732, 0, 0, 0, 37964168, + 0, 0, 0, 75928336, 0, 0, 0, 16881156, 0, 0, 0, 33762313, 0, 0, 0, 67524626, + }, + { + 0, 0, 0, 0, 882001920, 0, 0, 0, 1764003840, 0, 0, 0, 3528007680, 0, 0, 0, + 2761048065, 0, 0, 0, 1227128834, 0, 0, 0, 2454257668, 0, 0, 0, 613548040, 0, 0, 0, + 1227096080, 0, 0, 0, 2454192160, 0, 0, 0, 613417024, 0, 0, 0, 1226834048, 0, 0, 0, + 2453668096, 0, 0, 0, 612368896, 0, 0, 0, 1224737792, 0, 0, 0, 2449475584, 0, 0, 0, + 603983872, 0, 0, 0, 1207967744, 0, 0, 0, 2415935489, 0, 0, 0, 536903682, 0, 0, 0, + 1073807364, 0, 0, 0, 2147614729, 0, 0, 0, 262162, 0, 0, 0, 524324, 0, 0, 0, + 1048648, 0, 0, 0, 2097296, 0, 0, 0, 886196512, 0, 0, 0, 1772393024, 0, 0, 0, + 3544786048, 0, 0, 0, 2794604800, 0, 0, 0, 1294242304, 0, 0, 0, 2588484608, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1073774613, 0, 0, 0, 2147549226, 0, 0, 0, 131156, 0, 0, 0, 262312, 0, + 0, 0, 524624, 0, 0, 0, 1049248, 0, 0, 0, 2098496, 0, 0, 0, 4196992, 0, + 0, 0, 8393984, 0, 0, 0, 16787968, 0, 0, 0, 33575936, 0, 0, 0, 67151873, 0, + 0, 0, 134303746, 0, 0, 0, 268607492, 0, 0, 0, 537214984, 0, 0, 0, 655364, 0, + 0, 0, 1310728, 0, 0, 0, 2621456, 0, 0, 0, 5242912, 0, 0, 0, 10485824, 0, + 0, 0, 20971648, 0, 0, 0, 41943296, 0, 0, 0, 83886592, 0, 0, 0, 167773184, 0, + 0, 0, 335546369, 0, 0, 0, 671092738, 0, 0, 0, 1342185477, 0, 0, 0, 2684370954, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3961969350, + 0, 0, 0, 3628971405, 0, 0, 0, 2962975514, 0, 0, 0, 1630983732, 0, 0, 0, 3261967464, + 0, 0, 0, 2228967633, 0, 0, 0, 162967970, 0, 0, 0, 325935940, 0, 0, 0, 651871880, + 0, 0, 0, 1303743760, 0, 0, 0, 2607487520, 0, 0, 0, 920007744, 0, 0, 0, 1840015488, + 0, 0, 0, 3680030976, 0, 0, 0, 3065094657, 0, 0, 0, 1835222019, 0, 0, 0, 3670444038, + 0, 0, 0, 3045920780, 0, 0, 0, 1796874265, 0, 0, 0, 3593748531, 0, 0, 0, 2892529767, + 0, 0, 0, 1490092239, 0, 0, 0, 1568987992, 0, 0, 0, 3137975985, 0, 0, 0, 1980984675, + }, + { + 0, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, + 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, + 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, + 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, + 33554433, 0, 0, 0, 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, + 536870928, 0, 0, 0, 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, + 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 18087960, 0, 0, 0, 36175920, 0, 0, 0, 72351840, 0, 0, 0, 144703680, 0, + 0, 0, 289407361, 0, 0, 0, 578814722, 0, 0, 0, 1157629444, 0, 0, 0, 2315258888, 0, + 0, 0, 335550481, 0, 0, 0, 671100962, 0, 0, 0, 1342201925, 0, 0, 0, 2684403851, 0, + 0, 0, 1073840407, 0, 0, 0, 2147680814, 0, 0, 0, 394332, 0, 0, 0, 18352288, 0, + 0, 0, 36704576, 0, 0, 0, 73409152, 0, 0, 0, 146818304, 0, 0, 0, 293636608, 0, + 0, 0, 587273216, 0, 0, 0, 1174546432, 0, 0, 0, 2349092864, 0, 0, 0, 403218432, 0, + 0, 0, 806436865, 0, 0, 0, 1612873731, 0, 0, 0, 3225747462, 0, 0, 0, 2156527628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3548126500, + 0, 0, 0, 2801285704, 0, 0, 0, 1307604113, 0, 0, 0, 2615208226, 0, 0, 0, 935449156, + 0, 0, 0, 1870898313, 0, 0, 0, 3741796627, 0, 0, 0, 3188625959, 0, 0, 0, 2082284622, + 0, 0, 0, 4164569244, 0, 0, 0, 4034171193, 0, 0, 0, 3773375091, 0, 0, 0, 3251782887, + 0, 0, 0, 2208598479, 0, 0, 0, 122229662, 0, 0, 0, 244459325, 0, 0, 0, 488918650, + 0, 0, 0, 977837300, 0, 0, 0, 1955674600, 0, 0, 0, 3911349200, 0, 0, 0, 3527731104, + 0, 0, 0, 2760494912, 0, 0, 0, 2590999460, 0, 0, 0, 887031625, 0, 0, 0, 1774063250, + }, + { + 0, 0, 0, 0, 4160, 0, 0, 0, 8320, 0, 0, 0, 16640, 0, 0, 0, + 33280, 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, + 532480, 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259840, 0, 0, 0, + 8519680, 0, 0, 0, 17039360, 0, 0, 0, 34078721, 0, 0, 0, 68157442, 0, 0, 0, + 136314884, 0, 0, 0, 272629768, 0, 0, 0, 545259536, 0, 0, 0, 1090519072, 0, 0, 0, + 2181038144, 0, 0, 0, 67108992, 0, 0, 0, 134217984, 0, 0, 0, 268435968, 0, 0, 0, + 536871936, 0, 0, 0, 1073743872, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, + 260, 0, 0, 0, 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8704, 0, 0, + 0, 17408, 0, 0, 0, 34816, 0, 0, 0, 69632, 0, 0, 0, 139264, 0, 0, + 0, 278528, 0, 0, 0, 557056, 0, 0, 0, 1114112, 0, 0, 0, 2228224, 0, 0, + 0, 4456448, 0, 0, 0, 8912896, 0, 0, 0, 17825792, 0, 0, 0, 35651584, 0, 0, + 0, 71303168, 0, 0, 0, 142606337, 0, 0, 0, 285212674, 0, 0, 0, 570425349, 0, 0, + 0, 1140850698, 0, 0, 0, 2281701397, 0, 0, 0, 268435498, 0, 0, 0, 536870997, 0, 0, + 0, 1073741994, 0, 0, 0, 2147483988, 0, 0, 0, 680, 0, 0, 0, 1360, 0, 0, + 0, 2720, 0, 0, 0, 5440, 0, 0, 0, 2176, 0, 0, 0, 4352, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1417720836, 0, 0, 0, 2835441672, 0, 0, 0, 1375916048, 0, 0, 0, 2751832097, 0, + 0, 0, 1208696898, 0, 0, 0, 2417393796, 0, 0, 0, 539820296, 0, 0, 0, 1079640592, 0, + 0, 0, 2159281184, 0, 0, 0, 23595072, 0, 0, 0, 47190144, 0, 0, 0, 94380288, 0, + 0, 0, 188760576, 0, 0, 0, 377521152, 0, 0, 0, 755042304, 0, 0, 0, 243442693, 0, + 0, 0, 486885387, 0, 0, 0, 973770774, 0, 0, 0, 1947541549, 0, 0, 0, 3895083098, 0, + 0, 0, 3495198900, 0, 0, 0, 2695430504, 0, 0, 0, 1095893712, 0, 0, 0, 2191787424, 0, + 0, 0, 88607552, 0, 0, 0, 177215104, 0, 0, 0, 354430209, 0, 0, 0, 708860418, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357270623, + 0, 0, 0, 714541247, 0, 0, 0, 1429082494, 0, 0, 0, 2858164988, 0, 0, 0, 1421362680, + 0, 0, 0, 2842725361, 0, 0, 0, 1390483426, 0, 0, 0, 2780966853, 0, 0, 0, 1266966411, + 0, 0, 0, 2533932823, 0, 0, 0, 772898351, 0, 0, 0, 1545796702, 0, 0, 0, 3091593405, + 0, 0, 0, 1888219514, 0, 0, 0, 3776439028, 0, 0, 0, 3257910761, 0, 0, 0, 2220854227, + 0, 0, 0, 146741158, 0, 0, 0, 293482317, 0, 0, 0, 586964634, 0, 0, 0, 1173929269, + 0, 0, 0, 2347858538, 0, 0, 0, 44658827, 0, 0, 0, 89317655, 0, 0, 0, 178635311, + }, + { + 0, 0, 0, 0, 3162112, 0, 0, 0, 6324224, 0, 0, 0, 12648448, 0, 0, 0, + 25296896, 0, 0, 0, 50593793, 0, 0, 0, 101187587, 0, 0, 0, 202375174, 0, 0, 0, + 404750348, 0, 0, 0, 809500696, 0, 0, 0, 1619001392, 0, 0, 0, 3238002785, 0, 0, 0, + 2181038274, 0, 0, 0, 67109252, 0, 0, 0, 134218504, 0, 0, 0, 268437008, 0, 0, 0, + 536874016, 0, 0, 0, 1073748032, 0, 0, 0, 2147496065, 0, 0, 0, 24834, 0, 0, 0, + 49668, 0, 0, 0, 99336, 0, 0, 0, 198672, 0, 0, 0, 397344, 0, 0, 0, + 794688, 0, 0, 0, 1589376, 0, 0, 0, 49408, 0, 0, 0, 98816, 0, 0, 0, + 197632, 0, 0, 0, 395264, 0, 0, 0, 790528, 0, 0, 0, 1581056, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 134299649, 0, 0, + 0, 268599298, 0, 0, 0, 537198597, 0, 0, 0, 1074397194, 0, 0, 0, 2148794388, 0, 0, + 0, 2621480, 0, 0, 0, 5242960, 0, 0, 0, 10485920, 0, 0, 0, 20971840, 0, 0, + 0, 41943680, 0, 0, 0, 83887360, 0, 0, 0, 167774721, 0, 0, 0, 335549442, 0, 0, + 0, 671098884, 0, 0, 0, 1342197768, 0, 0, 0, 2684395537, 0, 0, 0, 1073823778, 0, 0, + 0, 2147647556, 0, 0, 0, 327816, 0, 0, 0, 655632, 0, 0, 0, 1311264, 0, 0, + 0, 2622528, 0, 0, 0, 5245056, 0, 0, 0, 10490112, 0, 0, 0, 20980224, 0, 0, + 0, 41960448, 0, 0, 0, 83920896, 0, 0, 0, 33574912, 0, 0, 0, 67149824, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 844445257, 0, 0, 0, 1688890514, 0, 0, 0, 3377781029, 0, 0, 0, 2460594762, 0, + 0, 0, 626222228, 0, 0, 0, 1252444457, 0, 0, 0, 2504888914, 0, 0, 0, 714810532, 0, + 0, 0, 1429621065, 0, 0, 0, 2859242131, 0, 0, 0, 1423516967, 0, 0, 0, 2847033934, 0, + 0, 0, 1399100572, 0, 0, 0, 2798201144, 0, 0, 0, 1301434992, 0, 0, 0, 2842792617, 0, + 0, 0, 1390617939, 0, 0, 0, 2781235878, 0, 0, 0, 1267504461, 0, 0, 0, 2535008923, 0, + 0, 0, 775050550, 0, 0, 0, 1550101100, 0, 0, 0, 3100202201, 0, 0, 0, 1905437106, 0, + 0, 0, 3810874212, 0, 0, 0, 3326781129, 0, 0, 0, 2358594962, 0, 0, 0, 422222628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3265725291, + 0, 0, 0, 2236483287, 0, 0, 0, 177999279, 0, 0, 0, 355998558, 0, 0, 0, 711997116, + 0, 0, 0, 1423994233, 0, 0, 0, 2847988467, 0, 0, 0, 1401009639, 0, 0, 0, 2802019279, + 0, 0, 0, 1309071263, 0, 0, 0, 2618142526, 0, 0, 0, 941317756, 0, 0, 0, 1882635512, + 0, 0, 0, 3765271025, 0, 0, 0, 3235574755, 0, 0, 0, 2176182214, 0, 0, 0, 57397132, + 0, 0, 0, 114794264, 0, 0, 0, 229588528, 0, 0, 0, 459177056, 0, 0, 0, 918354113, + 0, 0, 0, 1836708227, 0, 0, 0, 408215661, 0, 0, 0, 816431322, 0, 0, 0, 1632862645, + }, + { + 0, 0, 0, 0, 2554888269, 0, 0, 0, 814809242, 0, 0, 0, 1629618484, 0, 0, 0, + 3259236968, 0, 0, 0, 2223506641, 0, 0, 0, 152045986, 0, 0, 0, 304091973, 0, 0, 0, + 608183946, 0, 0, 0, 1216367892, 0, 0, 0, 2432735785, 0, 0, 0, 570504275, 0, 0, 0, + 1141008550, 0, 0, 0, 2282017101, 0, 0, 0, 269066906, 0, 0, 0, 538133812, 0, 0, 0, + 1076267624, 0, 0, 0, 2152535249, 0, 0, 0, 10103202, 0, 0, 0, 20206404, 0, 0, 0, + 40412809, 0, 0, 0, 80825618, 0, 0, 0, 161651236, 0, 0, 0, 323302473, 0, 0, 0, + 646604947, 0, 0, 0, 1293209894, 0, 0, 0, 39920129, 0, 0, 0, 79840258, 0, 0, 0, + 159680516, 0, 0, 0, 319361033, 0, 0, 0, 638722067, 0, 0, 0, 1277444134, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336678914, 0, 0, + 0, 673357828, 0, 0, 0, 1346715656, 0, 0, 0, 2693431313, 0, 0, 0, 1091895330, 0, 0, + 0, 2183790660, 0, 0, 0, 72614024, 0, 0, 0, 145228049, 0, 0, 0, 290456098, 0, 0, + 0, 580912197, 0, 0, 0, 1161824394, 0, 0, 0, 2323648789, 0, 0, 0, 352330282, 0, 0, + 0, 704660564, 0, 0, 0, 1409321128, 0, 0, 0, 2818642256, 0, 0, 0, 1342317216, 0, 0, + 0, 2684634433, 0, 0, 0, 1074301570, 0, 0, 0, 2148603140, 0, 0, 0, 2238984, 0, 0, + 0, 4477968, 0, 0, 0, 8955936, 0, 0, 0, 17911872, 0, 0, 0, 35823744, 0, 0, + 0, 71647488, 0, 0, 0, 143294977, 0, 0, 0, 84169728, 0, 0, 0, 168339457, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3001529316, 0, 0, 0, 1708091336, 0, 0, 0, 3416182673, 0, 0, 0, 2537398050, 0, + 0, 0, 779828804, 0, 0, 0, 1559657609, 0, 0, 0, 3119315218, 0, 0, 0, 1943663140, 0, + 0, 0, 3887326281, 0, 0, 0, 3479685267, 0, 0, 0, 2664403238, 0, 0, 0, 1033839181, 0, + 0, 0, 2067678363, 0, 0, 0, 4135356726, 0, 0, 0, 3975746156, 0, 0, 0, 1796577085, 0, + 0, 0, 3593154171, 0, 0, 0, 2891341046, 0, 0, 0, 1487714796, 0, 0, 0, 2975429593, 0, + 0, 0, 1655891891, 0, 0, 0, 3311783783, 0, 0, 0, 2328600271, 0, 0, 0, 362233247, 0, + 0, 0, 724466494, 0, 0, 0, 1448932988, 0, 0, 0, 2897865977, 0, 0, 0, 1500764658, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3612118070, + 0, 0, 0, 2929268845, 0, 0, 0, 1563570394, 0, 0, 0, 3127140788, 0, 0, 0, 1959314281, + 0, 0, 0, 3918628562, 0, 0, 0, 3542289829, 0, 0, 0, 2789612362, 0, 0, 0, 1284257428, + 0, 0, 0, 2568514856, 0, 0, 0, 842062417, 0, 0, 0, 1684124835, 0, 0, 0, 3368249671, + 0, 0, 0, 2441532046, 0, 0, 0, 588096797, 0, 0, 0, 1176193595, 0, 0, 0, 2352387190, + 0, 0, 0, 409807085, 0, 0, 0, 819614171, 0, 0, 0, 1639228342, 0, 0, 0, 3278456684, + 0, 0, 0, 2261946072, 0, 0, 0, 3672740230, 0, 0, 0, 3050513165, 0, 0, 0, 1806059035, + }, + { + 0, 0, 0, 0, 1164254896, 0, 0, 0, 2328509792, 0, 0, 0, 362052288, 0, 0, 0, + 724104577, 0, 0, 0, 1448209155, 0, 0, 0, 2896418311, 0, 0, 0, 1497869326, 0, 0, 0, + 2995738652, 0, 0, 0, 1696510008, 0, 0, 0, 3393020016, 0, 0, 0, 2491072737, 0, 0, 0, + 687178178, 0, 0, 0, 1374356356, 0, 0, 0, 2748712712, 0, 0, 0, 1202458129, 0, 0, 0, + 2404916258, 0, 0, 0, 514865221, 0, 0, 0, 1029730442, 0, 0, 0, 2059460885, 0, 0, 0, + 4118921771, 0, 0, 0, 3942876246, 0, 0, 0, 3590785196, 0, 0, 0, 2886603097, 0, 0, 0, + 1478238898, 0, 0, 0, 2956477797, 0, 0, 0, 622171258, 0, 0, 0, 1244342517, 0, 0, 0, + 2488685035, 0, 0, 0, 682402774, 0, 0, 0, 1364805548, 0, 0, 0, 2729611096, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339960066, 0, 0, + 0, 679920132, 0, 0, 0, 1359840264, 0, 0, 0, 2719680529, 0, 0, 0, 1144393762, 0, 0, + 0, 2288787525, 0, 0, 0, 282607754, 0, 0, 0, 565215509, 0, 0, 0, 1130431018, 0, 0, + 0, 2260862036, 0, 0, 0, 226756777, 0, 0, 0, 453513555, 0, 0, 0, 907027111, 0, 0, + 0, 1814054222, 0, 0, 0, 3628108445, 0, 0, 0, 2961249595, 0, 0, 0, 1627531895, 0, 0, + 0, 3255063790, 0, 0, 0, 2215160284, 0, 0, 0, 135353273, 0, 0, 0, 270706546, 0, 0, + 0, 541413093, 0, 0, 0, 1082826186, 0, 0, 0, 2165652372, 0, 0, 0, 36337448, 0, 0, + 0, 72674896, 0, 0, 0, 145349793, 0, 0, 0, 84990016, 0, 0, 0, 169980033, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2938358989, 0, 0, 0, 1581750682, 0, 0, 0, 3163501365, 0, 0, 0, 2032035435, 0, + 0, 0, 4064070870, 0, 0, 0, 3833174445, 0, 0, 0, 3371381594, 0, 0, 0, 2447795893, 0, + 0, 0, 600624491, 0, 0, 0, 1201248983, 0, 0, 0, 2402497966, 0, 0, 0, 510028637, 0, + 0, 0, 1020057274, 0, 0, 0, 2040114548, 0, 0, 0, 4080229097, 0, 0, 0, 1229289758, 0, + 0, 0, 2458579516, 0, 0, 0, 622191737, 0, 0, 0, 1244383475, 0, 0, 0, 2488766950, 0, + 0, 0, 682566604, 0, 0, 0, 1365133209, 0, 0, 0, 2730266419, 0, 0, 0, 1165565542, 0, + 0, 0, 2331131084, 0, 0, 0, 367294873, 0, 0, 0, 734589747, 0, 0, 0, 1469179494, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 490940283, + 0, 0, 0, 981880567, 0, 0, 0, 1963761134, 0, 0, 0, 3927522269, 0, 0, 0, 3560077242, + 0, 0, 0, 2825187189, 0, 0, 0, 1355407083, 0, 0, 0, 2710814166, 0, 0, 0, 1126661037, + 0, 0, 0, 2253322074, 0, 0, 0, 211676852, 0, 0, 0, 423353704, 0, 0, 0, 846707408, + 0, 0, 0, 1693414817, 0, 0, 0, 3386829635, 0, 0, 0, 2478691975, 0, 0, 0, 662416654, + 0, 0, 0, 1324833308, 0, 0, 0, 2649666617, 0, 0, 0, 1004365938, 0, 0, 0, 2008731877, + 0, 0, 0, 4017463754, 0, 0, 0, 3282593007, 0, 0, 0, 2270218718, 0, 0, 0, 245470141, + }, + { + 0, 0, 0, 0, 126298325, 0, 0, 0, 252596651, 0, 0, 0, 505193303, 0, 0, 0, + 1010386606, 0, 0, 0, 2020773212, 0, 0, 0, 4041546425, 0, 0, 0, 3788125555, 0, 0, 0, + 3281283814, 0, 0, 0, 2267600332, 0, 0, 0, 240233369, 0, 0, 0, 480466738, 0, 0, 0, + 960933476, 0, 0, 0, 1921866953, 0, 0, 0, 3843733907, 0, 0, 0, 3392500518, 0, 0, 0, + 2490033741, 0, 0, 0, 685100186, 0, 0, 0, 1370200372, 0, 0, 0, 2740400744, 0, 0, 0, + 1185834193, 0, 0, 0, 2371668387, 0, 0, 0, 448369479, 0, 0, 0, 896738958, 0, 0, 0, + 1793477917, 0, 0, 0, 3586955835, 0, 0, 0, 2887654563, 0, 0, 0, 1480341830, 0, 0, 0, + 2960683661, 0, 0, 0, 1626400026, 0, 0, 0, 3252800053, 0, 0, 0, 2210632810, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1457526048, 0, 0, + 0, 2915052096, 0, 0, 0, 1535136897, 0, 0, 0, 3070273795, 0, 0, 0, 1845580294, 0, 0, + 0, 3691160589, 0, 0, 0, 3087353882, 0, 0, 0, 1879740469, 0, 0, 0, 3759480939, 0, 0, + 0, 3223994582, 0, 0, 0, 2153021868, 0, 0, 0, 11076440, 0, 0, 0, 22152880, 0, 0, + 0, 44305760, 0, 0, 0, 88611520, 0, 0, 0, 177223041, 0, 0, 0, 354446082, 0, 0, + 0, 708892164, 0, 0, 0, 1417784328, 0, 0, 0, 2835568656, 0, 0, 0, 1376170016, 0, 0, + 0, 2752340033, 0, 0, 0, 1209712771, 0, 0, 0, 2419425542, 0, 0, 0, 543883789, 0, 0, + 0, 1087767578, 0, 0, 0, 2175535156, 0, 0, 0, 1438123336, 0, 0, 0, 2876246672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1801783692, 0, 0, 0, 3603567385, 0, 0, 0, 2912167474, 0, 0, 0, 1529367653, 0, + 0, 0, 3058735306, 0, 0, 0, 1822503316, 0, 0, 0, 3645006633, 0, 0, 0, 2995045970, 0, + 0, 0, 1695124645, 0, 0, 0, 3390249290, 0, 0, 0, 2485531285, 0, 0, 0, 676095274, 0, + 0, 0, 1352190548, 0, 0, 0, 2704381097, 0, 0, 0, 1113794898, 0, 0, 0, 4020460328, 0, + 0, 0, 3745953360, 0, 0, 0, 3196939425, 0, 0, 0, 2098911554, 0, 0, 0, 4197823108, 0, + 0, 0, 4100678921, 0, 0, 0, 3906390547, 0, 0, 0, 3517813798, 0, 0, 0, 2740660300, 0, + 0, 0, 1186353304, 0, 0, 0, 2372706609, 0, 0, 0, 450445923, 0, 0, 0, 900891846, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42029323, + 0, 0, 0, 84058646, 0, 0, 0, 168117293, 0, 0, 0, 336234586, 0, 0, 0, 672469172, + 0, 0, 0, 1344938344, 0, 0, 0, 2689876689, 0, 0, 0, 1084786082, 0, 0, 0, 2169572165, + 0, 0, 0, 44177035, 0, 0, 0, 88354071, 0, 0, 0, 176708143, 0, 0, 0, 353416286, + 0, 0, 0, 706832573, 0, 0, 0, 1413665147, 0, 0, 0, 2827330294, 0, 0, 0, 1359693292, + 0, 0, 0, 2719386585, 0, 0, 0, 1143805874, 0, 0, 0, 2287611749, 0, 0, 0, 280256202, + 0, 0, 0, 560512405, 0, 0, 0, 1078995489, 0, 0, 0, 2157990978, 0, 0, 0, 21014661, + }, + { + 0, 0, 0, 0, 3480795388, 0, 0, 0, 2666623480, 0, 0, 0, 1038279664, 0, 0, 0, + 2076559329, 0, 0, 0, 4153118658, 0, 0, 0, 4011270020, 0, 0, 0, 3727572744, 0, 0, 0, + 3160178193, 0, 0, 0, 2025389090, 0, 0, 0, 4050778181, 0, 0, 0, 3806589066, 0, 0, 0, + 3318210837, 0, 0, 0, 2341454378, 0, 0, 0, 387941461, 0, 0, 0, 775882923, 0, 0, 0, + 1551765846, 0, 0, 0, 3103531693, 0, 0, 0, 1912096090, 0, 0, 0, 3824192180, 0, 0, 0, + 3353417064, 0, 0, 0, 2411866832, 0, 0, 0, 528766369, 0, 0, 0, 1057532739, 0, 0, 0, + 2115065479, 0, 0, 0, 4230130959, 0, 0, 0, 926802659, 0, 0, 0, 1853605319, 0, 0, 0, + 3707210639, 0, 0, 0, 3119453983, 0, 0, 0, 1943940671, 0, 0, 0, 3887881342, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4124527681, 0, 0, + 0, 3954088066, 0, 0, 0, 3613208836, 0, 0, 0, 2931450376, 0, 0, 0, 1567933457, 0, 0, + 0, 3135866914, 0, 0, 0, 1976766533, 0, 0, 0, 3953533066, 0, 0, 0, 3612098836, 0, 0, + 0, 2929230376, 0, 0, 0, 1563493457, 0, 0, 0, 3126986914, 0, 0, 0, 1959006533, 0, 0, + 0, 3918013066, 0, 0, 0, 3541058836, 0, 0, 0, 2787150377, 0, 0, 0, 1279333459, 0, 0, + 0, 2558666919, 0, 0, 0, 822366543, 0, 0, 0, 1644733087, 0, 0, 0, 3289466174, 0, 0, + 0, 2283965053, 0, 0, 0, 272962810, 0, 0, 0, 545925621, 0, 0, 0, 1091851242, 0, 0, + 0, 2183702484, 0, 0, 0, 72437672, 0, 0, 0, 4252357392, 0, 0, 0, 4209747488, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2656366736, 0, 0, 0, 1017766176, 0, 0, 0, 2035532352, 0, 0, 0, 4071064704, 0, + 0, 0, 3847162112, 0, 0, 0, 3399356928, 0, 0, 0, 2503746561, 0, 0, 0, 712525826, 0, + 0, 0, 1425051652, 0, 0, 0, 2850103304, 0, 0, 0, 1405239313, 0, 0, 0, 2810478627, 0, + 0, 0, 1325989958, 0, 0, 0, 2651979917, 0, 0, 0, 1008992539, 0, 0, 0, 3860656807, 0, + 0, 0, 3426346319, 0, 0, 0, 2557725342, 0, 0, 0, 820483388, 0, 0, 0, 1640966776, 0, + 0, 0, 3281933552, 0, 0, 0, 2268899809, 0, 0, 0, 242832322, 0, 0, 0, 485664644, 0, + 0, 0, 971329289, 0, 0, 0, 1942658578, 0, 0, 0, 3885317156, 0, 0, 0, 3475667016, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 537135248, + 0, 0, 0, 1074270496, 0, 0, 0, 2148540992, 0, 0, 0, 2114688, 0, 0, 0, 4229377, + 0, 0, 0, 8458754, 0, 0, 0, 16917508, 0, 0, 0, 33835017, 0, 0, 0, 67670034, + 0, 0, 0, 135340068, 0, 0, 0, 270680136, 0, 0, 0, 541360273, 0, 0, 0, 1082720546, + 0, 0, 0, 2165441092, 0, 0, 0, 35914889, 0, 0, 0, 71829779, 0, 0, 0, 143659558, + 0, 0, 0, 287319116, 0, 0, 0, 574638232, 0, 0, 0, 1149276464, 0, 0, 0, 2298552928, + 0, 0, 0, 302138561, 0, 0, 0, 67141906, 0, 0, 0, 134283812, 0, 0, 0, 268567624, + }, + { + 0, 0, 0, 0, 460238757, 0, 0, 0, 920477515, 0, 0, 0, 1840955030, 0, 0, 0, + 3681910060, 0, 0, 0, 3068852824, 0, 0, 0, 1842738352, 0, 0, 0, 3685476704, 0, 0, 0, + 3075986112, 0, 0, 0, 1857004929, 0, 0, 0, 3714009859, 0, 0, 0, 3133052422, 0, 0, 0, + 1971137548, 0, 0, 0, 3942275096, 0, 0, 0, 3589582897, 0, 0, 0, 2884198498, 0, 0, 0, + 1473429701, 0, 0, 0, 2946859402, 0, 0, 0, 1598751509, 0, 0, 0, 3197503018, 0, 0, 0, + 2100038740, 0, 0, 0, 4200077480, 0, 0, 0, 4105187665, 0, 0, 0, 3915408035, 0, 0, 0, + 3535848774, 0, 0, 0, 2776730253, 0, 0, 0, 1349368510, 0, 0, 0, 2698737021, 0, 0, 0, + 1102506746, 0, 0, 0, 2205013492, 0, 0, 0, 115059689, 0, 0, 0, 230119378, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3560175108, 0, 0, + 0, 2825382920, 0, 0, 0, 1355798544, 0, 0, 0, 2711597089, 0, 0, 0, 1128226882, 0, 0, + 0, 2256453764, 0, 0, 0, 217940233, 0, 0, 0, 435880467, 0, 0, 0, 871760935, 0, 0, + 0, 1743521871, 0, 0, 0, 3487043743, 0, 0, 0, 2679120191, 0, 0, 0, 1063273086, 0, 0, + 0, 2126546172, 0, 0, 0, 4253092344, 0, 0, 0, 4211217392, 0, 0, 0, 4127467489, 0, 0, + 0, 3959967682, 0, 0, 0, 3624968069, 0, 0, 0, 2954968843, 0, 0, 0, 1614970391, 0, 0, + 0, 3229940782, 0, 0, 0, 2164914268, 0, 0, 0, 34861240, 0, 0, 0, 69722480, 0, 0, + 0, 139444961, 0, 0, 0, 278889922, 0, 0, 0, 4111269249, 0, 0, 0, 3927571202, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2424557694, 0, 0, 0, 554148093, 0, 0, 0, 1108296186, 0, 0, 0, 2216592373, 0, + 0, 0, 138217450, 0, 0, 0, 276434901, 0, 0, 0, 552869802, 0, 0, 0, 1105739604, 0, + 0, 0, 2211479208, 0, 0, 0, 127991121, 0, 0, 0, 255982243, 0, 0, 0, 511964486, 0, + 0, 0, 1023928972, 0, 0, 0, 2047857944, 0, 0, 0, 4095715888, 0, 0, 0, 2025620510, 0, + 0, 0, 4051241021, 0, 0, 0, 3807514746, 0, 0, 0, 3320062196, 0, 0, 0, 2345157096, 0, + 0, 0, 395346896, 0, 0, 0, 790693792, 0, 0, 0, 1581387585, 0, 0, 0, 3162775171, 0, + 0, 0, 2030583047, 0, 0, 0, 4061166095, 0, 0, 0, 3827364895, 0, 0, 0, 3359762495, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, + 0, 0, 0, 2097152, 0, 0, 0, 4194305, 0, 0, 0, 8388610, 0, 0, 0, 16777220, + 0, 0, 0, 33554441, 0, 0, 0, 67108882, 0, 0, 0, 134217764, 0, 0, 0, 268435528, + 0, 0, 0, 536871056, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + 0, 0, 0, 2304, 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, + 0, 0, 0, 36864, 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, + 0, 0, 0, 589824, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, + }, + { + 0, 0, 0, 0, 1980210557, 0, 0, 0, 3960421115, 0, 0, 0, 3625874935, 0, 0, 0, + 2956782575, 0, 0, 0, 1618597854, 0, 0, 0, 3237195709, 0, 0, 0, 2179424123, 0, 0, 0, + 63880951, 0, 0, 0, 127761903, 0, 0, 0, 255523807, 0, 0, 0, 511047615, 0, 0, 0, + 1022095230, 0, 0, 0, 2044190460, 0, 0, 0, 4088380920, 0, 0, 0, 3881794544, 0, 0, 0, + 3468621792, 0, 0, 0, 2642276289, 0, 0, 0, 989585283, 0, 0, 0, 1979170566, 0, 0, 0, + 3958341132, 0, 0, 0, 3621714968, 0, 0, 0, 2948462640, 0, 0, 0, 1601957985, 0, 0, 0, + 3203915970, 0, 0, 0, 2112864644, 0, 0, 0, 2379751029, 0, 0, 0, 464534763, 0, 0, 0, + 929069527, 0, 0, 0, 1858139055, 0, 0, 0, 3716278111, 0, 0, 0, 3137588926, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3625984309, 0, 0, + 0, 2957001323, 0, 0, 0, 1619035351, 0, 0, 0, 3238070702, 0, 0, 0, 2181174108, 0, 0, + 0, 67380920, 0, 0, 0, 134761841, 0, 0, 0, 269523682, 0, 0, 0, 539047365, 0, 0, + 0, 1078094730, 0, 0, 0, 2156189460, 0, 0, 0, 17411624, 0, 0, 0, 34823248, 0, 0, + 0, 69646496, 0, 0, 0, 139292993, 0, 0, 0, 278585986, 0, 0, 0, 557171973, 0, 0, + 0, 1114343946, 0, 0, 0, 2228687892, 0, 0, 0, 162408489, 0, 0, 0, 324816978, 0, 0, + 0, 649633957, 0, 0, 0, 1299267915, 0, 0, 0, 2598535831, 0, 0, 0, 902104367, 0, 0, + 0, 1804208734, 0, 0, 0, 3608417468, 0, 0, 0, 1980237901, 0, 0, 0, 3960475802, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 70539992, 0, 0, 0, 141079985, 0, 0, 0, 282159971, 0, 0, 0, 564319943, 0, + 0, 0, 1128639887, 0, 0, 0, 2257279774, 0, 0, 0, 219592253, 0, 0, 0, 439184507, 0, + 0, 0, 878369014, 0, 0, 0, 1756738029, 0, 0, 0, 3513476059, 0, 0, 0, 2731984823, 0, + 0, 0, 1169002351, 0, 0, 0, 2338004702, 0, 0, 0, 381042109, 0, 0, 0, 693641634, 0, + 0, 0, 1387283269, 0, 0, 0, 2774566539, 0, 0, 0, 1254165782, 0, 0, 0, 2508331565, 0, + 0, 0, 721695834, 0, 0, 0, 1443391669, 0, 0, 0, 2886783339, 0, 0, 0, 1478599382, 0, + 0, 0, 2957198765, 0, 0, 0, 1619430235, 0, 0, 0, 3238860470, 0, 0, 0, 2182753644, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33280, + 0, 0, 0, 66560, 0, 0, 0, 133120, 0, 0, 0, 266240, 0, 0, 0, 532480, + 0, 0, 0, 1064960, 0, 0, 0, 2129920, 0, 0, 0, 4259841, 0, 0, 0, 8519682, + 0, 0, 0, 17039364, 0, 0, 0, 34078729, 0, 0, 0, 68157458, 0, 0, 0, 136314916, + 0, 0, 0, 272629833, 0, 0, 0, 545259666, 0, 0, 0, 1090519332, 0, 0, 0, 2181038665, + 0, 0, 0, 67110034, 0, 0, 0, 134220068, 0, 0, 0, 268440136, 0, 0, 0, 536880272, + 0, 0, 0, 1073760544, 0, 0, 0, 2147487808, 0, 0, 0, 8320, 0, 0, 0, 16640, + }, + { + 0, 0, 0, 0, 893107048, 0, 0, 0, 1786214097, 0, 0, 0, 3572428195, 0, 0, 0, + 2849889095, 0, 0, 0, 1404810895, 0, 0, 0, 2809621790, 0, 0, 0, 1324276285, 0, 0, 0, + 2648552571, 0, 0, 0, 1002137847, 0, 0, 0, 2004275695, 0, 0, 0, 4008551390, 0, 0, 0, + 3722135485, 0, 0, 0, 3149303674, 0, 0, 0, 2003640053, 0, 0, 0, 4007280106, 0, 0, 0, + 3719592917, 0, 0, 0, 3144218538, 0, 0, 0, 1993469781, 0, 0, 0, 3986939563, 0, 0, 0, + 3678911830, 0, 0, 0, 3062856364, 0, 0, 0, 1830745432, 0, 0, 0, 3661490864, 0, 0, 0, + 3028014433, 0, 0, 0, 1761061570, 0, 0, 0, 3839160045, 0, 0, 0, 3383352795, 0, 0, 0, + 2471738294, 0, 0, 0, 648509293, 0, 0, 0, 1297018586, 0, 0, 0, 2594037172, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99817600, 0, 0, + 0, 199635201, 0, 0, 0, 399270402, 0, 0, 0, 798540804, 0, 0, 0, 1597081609, 0, 0, + 0, 3194163218, 0, 0, 0, 2093359140, 0, 0, 0, 4186718280, 0, 0, 0, 4078469265, 0, 0, + 0, 3861971235, 0, 0, 0, 3428975175, 0, 0, 0, 2562983055, 0, 0, 0, 830998815, 0, 0, + 0, 1661997631, 0, 0, 0, 3323995262, 0, 0, 0, 2353023229, 0, 0, 0, 411079163, 0, 0, + 0, 822158327, 0, 0, 0, 1644316655, 0, 0, 0, 3288633310, 0, 0, 0, 2282299325, 0, 0, + 0, 269631354, 0, 0, 0, 539262709, 0, 0, 0, 1078525418, 0, 0, 0, 2157050836, 0, 0, + 0, 19134376, 0, 0, 0, 38268752, 0, 0, 0, 24954400, 0, 0, 0, 49908800, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1770227281, 0, 0, 0, 3540454563, 0, 0, 0, 2785941830, 0, 0, 0, 1276916364, 0, + 0, 0, 2553832729, 0, 0, 0, 812698162, 0, 0, 0, 1625396325, 0, 0, 0, 3250792650, 0, + 0, 0, 2206618004, 0, 0, 0, 118268713, 0, 0, 0, 236537426, 0, 0, 0, 473074852, 0, + 0, 0, 946149705, 0, 0, 0, 1892299411, 0, 0, 0, 3784598822, 0, 0, 0, 2863352860, 0, + 0, 0, 1431738424, 0, 0, 0, 2863476848, 0, 0, 0, 1431986401, 0, 0, 0, 2863972803, 0, + 0, 0, 1432978310, 0, 0, 0, 2865956620, 0, 0, 0, 1436945945, 0, 0, 0, 2873891890, 0, + 0, 0, 1452816485, 0, 0, 0, 2905632970, 0, 0, 0, 1516298644, 0, 0, 0, 3032597288, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180224, + 0, 0, 0, 360448, 0, 0, 0, 720896, 0, 0, 0, 1441792, 0, 0, 0, 2883584, + 0, 0, 0, 5767169, 0, 0, 0, 11534338, 0, 0, 0, 23068677, 0, 0, 0, 46137354, + 0, 0, 0, 92274708, 0, 0, 0, 184549417, 0, 0, 0, 369098835, 0, 0, 0, 738197670, + 0, 0, 0, 1476395340, 0, 0, 0, 2952790680, 0, 0, 0, 1610614064, 0, 0, 0, 3221228128, + 0, 0, 0, 2147488960, 0, 0, 0, 10624, 0, 0, 0, 21248, 0, 0, 0, 42496, + 0, 0, 0, 84992, 0, 0, 0, 22528, 0, 0, 0, 45056, 0, 0, 0, 90112, + }, + { + 0, 0, 0, 0, 2113541784, 0, 0, 0, 4227083568, 0, 0, 0, 4159199840, 0, 0, 0, + 4023432384, 0, 0, 0, 3751897472, 0, 0, 0, 3208827648, 0, 0, 0, 2122688001, 0, 0, 0, + 4245376003, 0, 0, 0, 4195784710, 0, 0, 0, 4096602125, 0, 0, 0, 3898236955, 0, 0, 0, + 3501506615, 0, 0, 0, 2708045935, 0, 0, 0, 1121124575, 0, 0, 0, 2242249151, 0, 0, 0, + 189531007, 0, 0, 0, 379062015, 0, 0, 0, 758124030, 0, 0, 0, 1516248061, 0, 0, 0, + 3032496123, 0, 0, 0, 1770024950, 0, 0, 0, 3540049900, 0, 0, 0, 2785132504, 0, 0, 0, + 1275297712, 0, 0, 0, 2550595425, 0, 0, 0, 1308092506, 0, 0, 0, 2616185012, 0, 0, 0, + 937402729, 0, 0, 0, 1874805459, 0, 0, 0, 3749610918, 0, 0, 0, 3204254540, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1409835056, 0, 0, + 0, 2819670112, 0, 0, 0, 1344372928, 0, 0, 0, 2688745857, 0, 0, 0, 1082524418, 0, 0, + 0, 2165048836, 0, 0, 0, 35130376, 0, 0, 0, 70260752, 0, 0, 0, 140521505, 0, 0, + 0, 281043010, 0, 0, 0, 562086021, 0, 0, 0, 1124172042, 0, 0, 0, 2248344084, 0, 0, + 0, 201720873, 0, 0, 0, 403441747, 0, 0, 0, 806883495, 0, 0, 0, 1613766991, 0, 0, + 0, 3227533982, 0, 0, 0, 2160100668, 0, 0, 0, 25234040, 0, 0, 0, 50468080, 0, 0, + 0, 100936160, 0, 0, 0, 201872321, 0, 0, 0, 403744643, 0, 0, 0, 807489287, 0, 0, + 0, 1614978575, 0, 0, 0, 3229957150, 0, 0, 0, 3573684236, 0, 0, 0, 2852401176, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 302254233, 0, 0, 0, 604508466, 0, 0, 0, 1209016932, 0, 0, 0, 2418033865, 0, + 0, 0, 541100435, 0, 0, 0, 1082200870, 0, 0, 0, 2164401740, 0, 0, 0, 33836184, 0, + 0, 0, 67672369, 0, 0, 0, 135344738, 0, 0, 0, 270689477, 0, 0, 0, 541378955, 0, + 0, 0, 1082757911, 0, 0, 0, 2165515822, 0, 0, 0, 36064348, 0, 0, 0, 373854240, 0, + 0, 0, 747708480, 0, 0, 0, 1495416961, 0, 0, 0, 2990833922, 0, 0, 0, 1686700548, 0, + 0, 0, 3373401096, 0, 0, 0, 2451834897, 0, 0, 0, 608702498, 0, 0, 0, 1217404996, 0, + 0, 0, 2434809993, 0, 0, 0, 574652691, 0, 0, 0, 1149305382, 0, 0, 0, 2298610764, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37905416, + 0, 0, 0, 75810832, 0, 0, 0, 151621664, 0, 0, 0, 303243329, 0, 0, 0, 606486658, + 0, 0, 0, 1212973317, 0, 0, 0, 2425946634, 0, 0, 0, 556925972, 0, 0, 0, 1113851944, + 0, 0, 0, 2227703889, 0, 0, 0, 160440482, 0, 0, 0, 320880965, 0, 0, 0, 641761930, + 0, 0, 0, 1283523860, 0, 0, 0, 2567047720, 0, 0, 0, 839128145, 0, 0, 0, 1678256290, + 0, 0, 0, 3356512580, 0, 0, 0, 2418057864, 0, 0, 0, 541148433, 0, 0, 0, 1082296866, + 0, 0, 0, 2164593732, 0, 0, 0, 4738177, 0, 0, 0, 9476354, 0, 0, 0, 18952708, + }, + { + 0, 0, 0, 0, 307855197, 0, 0, 0, 615710394, 0, 0, 0, 1231420788, 0, 0, 0, + 2462841576, 0, 0, 0, 630715856, 0, 0, 0, 1261431713, 0, 0, 0, 2522863426, 0, 0, 0, + 750759556, 0, 0, 0, 1501519112, 0, 0, 0, 3003038224, 0, 0, 0, 1711109152, 0, 0, 0, + 3422218304, 0, 0, 0, 2549469312, 0, 0, 0, 803971329, 0, 0, 0, 1607942659, 0, 0, 0, + 3215885318, 0, 0, 0, 2136803341, 0, 0, 0, 4273606682, 0, 0, 0, 4252246069, 0, 0, 0, + 4209524842, 0, 0, 0, 4124082389, 0, 0, 0, 3953197482, 0, 0, 0, 3611427668, 0, 0, 0, + 2927888040, 0, 0, 0, 1560808784, 0, 0, 0, 2823382525, 0, 0, 0, 1351797754, 0, 0, 0, + 2703595509, 0, 0, 0, 1112223723, 0, 0, 0, 2224447447, 0, 0, 0, 153927598, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1430273920, 0, 0, + 0, 2860547840, 0, 0, 0, 1426128384, 0, 0, 0, 2852256768, 0, 0, 0, 1409546240, 0, 0, + 0, 2819092480, 0, 0, 0, 1343217664, 0, 0, 0, 2686435329, 0, 0, 0, 1077903362, 0, 0, + 0, 2155806724, 0, 0, 0, 16646152, 0, 0, 0, 33292304, 0, 0, 0, 66584608, 0, 0, + 0, 133169216, 0, 0, 0, 266338433, 0, 0, 0, 532676867, 0, 0, 0, 1065353734, 0, 0, + 0, 2130707468, 0, 0, 0, 4261414936, 0, 0, 0, 4227862576, 0, 0, 0, 4160757856, 0, 0, + 0, 4026548417, 0, 0, 0, 3758129539, 0, 0, 0, 3221291782, 0, 0, 0, 2147616268, 0, 0, + 0, 265240, 0, 0, 0, 530480, 0, 0, 0, 1431310304, 0, 0, 0, 2862620608, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 536903683, 0, 0, 0, 1073807366, 0, 0, 0, 2147614732, 0, 0, 0, 262168, 0, + 0, 0, 524336, 0, 0, 0, 1048672, 0, 0, 0, 2097344, 0, 0, 0, 4194688, 0, + 0, 0, 8389376, 0, 0, 0, 16778752, 0, 0, 0, 33557504, 0, 0, 0, 67115008, 0, + 0, 0, 134230016, 0, 0, 0, 268460033, 0, 0, 0, 536920067, 0, 0, 0, 1610678276, 0, + 0, 0, 3221356552, 0, 0, 0, 2147745808, 0, 0, 0, 524320, 0, 0, 0, 1048640, 0, + 0, 0, 2097280, 0, 0, 0, 4194560, 0, 0, 0, 8389120, 0, 0, 0, 16778240, 0, + 0, 0, 33556480, 0, 0, 0, 67112960, 0, 0, 0, 134225920, 0, 0, 0, 268451841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2731147994, + 0, 0, 0, 1167328692, 0, 0, 0, 2334657385, 0, 0, 0, 374347474, 0, 0, 0, 748694948, + 0, 0, 0, 1497389897, 0, 0, 0, 2994779795, 0, 0, 0, 1694592294, 0, 0, 0, 3389184589, + 0, 0, 0, 2483401882, 0, 0, 0, 671836468, 0, 0, 0, 1343672936, 0, 0, 0, 2687345872, + 0, 0, 0, 1079724449, 0, 0, 0, 2159448898, 0, 0, 0, 23930501, 0, 0, 0, 47861002, + 0, 0, 0, 95722004, 0, 0, 0, 191444008, 0, 0, 0, 382888016, 0, 0, 0, 765776032, + 0, 0, 0, 1531552064, 0, 0, 0, 341393499, 0, 0, 0, 682786998, 0, 0, 0, 1365573997, + }, + { + 0, 0, 0, 0, 1769338744, 0, 0, 0, 3538677488, 0, 0, 0, 2782387681, 0, 0, 0, + 1269808067, 0, 0, 0, 2539616134, 0, 0, 0, 784264973, 0, 0, 0, 1568529946, 0, 0, 0, + 3137059892, 0, 0, 0, 1979152488, 0, 0, 0, 3958304976, 0, 0, 0, 3621642656, 0, 0, 0, + 2948318016, 0, 0, 0, 1601668737, 0, 0, 0, 3203337474, 0, 0, 0, 2111707652, 0, 0, 0, + 4223415304, 0, 0, 0, 4151863312, 0, 0, 0, 4008759328, 0, 0, 0, 3722551361, 0, 0, 0, + 3150135426, 0, 0, 0, 2005303557, 0, 0, 0, 4010607114, 0, 0, 0, 3726246932, 0, 0, 0, + 3157526569, 0, 0, 0, 2020085842, 0, 0, 0, 2577782749, 0, 0, 0, 860598203, 0, 0, 0, + 1721196407, 0, 0, 0, 3442392815, 0, 0, 0, 2589818334, 0, 0, 0, 884669372, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4294705152, 0, 0, + 0, 4294443008, 0, 0, 0, 4293918720, 0, 0, 0, 4292870144, 0, 0, 0, 4290772992, 0, 0, + 0, 4286578688, 0, 0, 0, 4278190080, 0, 0, 0, 4261412864, 0, 0, 0, 4227858432, 0, 0, + 0, 4160749568, 0, 0, 0, 4026531841, 0, 0, 0, 3758096387, 0, 0, 0, 3221225478, 0, 0, + 0, 2147483660, 0, 0, 0, 24, 0, 0, 0, 48, 0, 0, 0, 96, 0, 0, + 0, 192, 0, 0, 0, 384, 0, 0, 0, 768, 0, 0, 0, 1536, 0, 0, + 0, 3072, 0, 0, 0, 6144, 0, 0, 0, 12288, 0, 0, 0, 24576, 0, 0, + 0, 49152, 0, 0, 0, 98304, 0, 0, 0, 4294901760, 0, 0, 0, 4294836224, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 262152, 0, 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, + 0, 0, 4194432, 0, 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, + 0, 0, 67110912, 0, 0, 0, 134221824, 0, 0, 0, 268443649, 0, 0, 0, 536887298, 0, + 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, 0, 0, 131092, 0, 0, 0, 32, 0, + 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, + 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, + 0, 0, 16384, 0, 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3732057110, + 0, 0, 0, 3169146925, 0, 0, 0, 2043326555, 0, 0, 0, 4086653111, 0, 0, 0, 3878338927, + 0, 0, 0, 3461710558, 0, 0, 0, 2628453820, 0, 0, 0, 961940345, 0, 0, 0, 1923880691, + 0, 0, 0, 3847761383, 0, 0, 0, 3400555471, 0, 0, 0, 2506143647, 0, 0, 0, 717319998, + 0, 0, 0, 1434639996, 0, 0, 0, 2869279993, 0, 0, 0, 1443592691, 0, 0, 0, 2887185382, + 0, 0, 0, 1479403468, 0, 0, 0, 2958806937, 0, 0, 0, 1622646578, 0, 0, 0, 3245293157, + 0, 0, 0, 2195619018, 0, 0, 0, 3687732610, 0, 0, 0, 3080497925, 0, 0, 0, 1866028555, + }, + { + 0, 0, 0, 0, 2045181768, 0, 0, 0, 4090363536, 0, 0, 0, 3885759776, 0, 0, 0, + 3476552256, 0, 0, 0, 2658137216, 0, 0, 0, 1021307136, 0, 0, 0, 2042614272, 0, 0, 0, + 4085228544, 0, 0, 0, 3875489792, 0, 0, 0, 3456012289, 0, 0, 0, 2617057282, 0, 0, 0, + 939147269, 0, 0, 0, 1878294539, 0, 0, 0, 3756589078, 0, 0, 0, 3218210860, 0, 0, 0, + 2141454425, 0, 0, 0, 4282908850, 0, 0, 0, 4270850404, 0, 0, 0, 4246733513, 0, 0, 0, + 4198499730, 0, 0, 0, 4102032165, 0, 0, 0, 3909097035, 0, 0, 0, 3523226774, 0, 0, 0, + 2751486253, 0, 0, 0, 1208005210, 0, 0, 0, 3924270077, 0, 0, 0, 3553572858, 0, 0, 0, + 2812178420, 0, 0, 0, 1329389545, 0, 0, 0, 2658779090, 0, 0, 0, 1022590884, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, + 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, 0, + 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, 0, + 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, + 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, + 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554432, 0, 0, + 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, 0, 536870917, 0, 0, + 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1073774613, 0, 0, 0, 2147549226, 0, 0, 0, 131156, 0, 0, 0, 262312, 0, + 0, 0, 524624, 0, 0, 0, 1049248, 0, 0, 0, 2098496, 0, 0, 0, 4196992, 0, + 0, 0, 8393984, 0, 0, 0, 16787968, 0, 0, 0, 33575936, 0, 0, 0, 67151873, 0, + 0, 0, 134303746, 0, 0, 0, 268607492, 0, 0, 0, 537214984, 0, 0, 0, 655364, 0, + 0, 0, 1310728, 0, 0, 0, 2621456, 0, 0, 0, 5242912, 0, 0, 0, 10485824, 0, + 0, 0, 20971648, 0, 0, 0, 41943296, 0, 0, 0, 83886592, 0, 0, 0, 167773184, 0, + 0, 0, 335546369, 0, 0, 0, 671092738, 0, 0, 0, 1342185477, 0, 0, 0, 2684370954, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2033130968, + 0, 0, 0, 4066261936, 0, 0, 0, 3837556576, 0, 0, 0, 3380145857, 0, 0, 0, 2465324418, + 0, 0, 0, 635681541, 0, 0, 0, 1271363082, 0, 0, 0, 2542726165, 0, 0, 0, 790485035, + 0, 0, 0, 1580970071, 0, 0, 0, 3161940143, 0, 0, 0, 2028912991, 0, 0, 0, 4057825983, + 0, 0, 0, 3820684671, 0, 0, 0, 3346402046, 0, 0, 0, 2397836796, 0, 0, 0, 500706297, + 0, 0, 0, 1001412595, 0, 0, 0, 2002825190, 0, 0, 0, 4005650380, 0, 0, 0, 3716333464, + 0, 0, 0, 3137699633, 0, 0, 0, 254141371, 0, 0, 0, 508282742, 0, 0, 0, 1016565484, + }, + { + 0, 0, 0, 0, 698651200, 0, 0, 0, 1397302401, 0, 0, 0, 2794604802, 0, 0, 0, + 1294242308, 0, 0, 0, 2588484616, 0, 0, 0, 882001936, 0, 0, 0, 1764003872, 0, 0, 0, + 3528007744, 0, 0, 0, 2761048193, 0, 0, 0, 1227129090, 0, 0, 0, 2454258180, 0, 0, 0, + 613549064, 0, 0, 0, 1227098128, 0, 0, 0, 2454196256, 0, 0, 0, 613425216, 0, 0, 0, + 1226850432, 0, 0, 0, 2453700864, 0, 0, 0, 612434432, 0, 0, 0, 1224868864, 0, 0, 0, + 2449737728, 0, 0, 0, 604508160, 0, 0, 0, 1209016320, 0, 0, 0, 2418032641, 0, 0, 0, + 541097986, 0, 0, 0, 1082195972, 0, 0, 0, 2829488713, 0, 0, 0, 1364010130, 0, 0, 0, + 2728020260, 0, 0, 0, 1161073224, 0, 0, 0, 2322146448, 0, 0, 0, 349325600, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 18087960, 0, 0, 0, 36175920, 0, 0, 0, 72351840, 0, 0, 0, 144703680, 0, + 0, 0, 289407361, 0, 0, 0, 578814722, 0, 0, 0, 1157629444, 0, 0, 0, 2315258888, 0, + 0, 0, 335550481, 0, 0, 0, 671100962, 0, 0, 0, 1342201925, 0, 0, 0, 2684403851, 0, + 0, 0, 1073840407, 0, 0, 0, 2147680814, 0, 0, 0, 394332, 0, 0, 0, 18352288, 0, + 0, 0, 36704576, 0, 0, 0, 73409152, 0, 0, 0, 146818304, 0, 0, 0, 293636608, 0, + 0, 0, 587273216, 0, 0, 0, 1174546432, 0, 0, 0, 2349092864, 0, 0, 0, 403218432, 0, + 0, 0, 806436865, 0, 0, 0, 1612873731, 0, 0, 0, 3225747462, 0, 0, 0, 2156527628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2199499341, + 0, 0, 0, 104031387, 0, 0, 0, 208062775, 0, 0, 0, 416125551, 0, 0, 0, 832251102, + 0, 0, 0, 1664502205, 0, 0, 0, 3329004410, 0, 0, 0, 2363041525, 0, 0, 0, 431115754, + 0, 0, 0, 862231508, 0, 0, 0, 1724463016, 0, 0, 0, 3448926032, 0, 0, 0, 2602884769, + 0, 0, 0, 910802242, 0, 0, 0, 1821604484, 0, 0, 0, 3643208968, 0, 0, 0, 2991450640, + 0, 0, 0, 1687933984, 0, 0, 0, 3375867968, 0, 0, 0, 2456768640, 0, 0, 0, 618569985, + 0, 0, 0, 1237139970, 0, 0, 0, 274937417, 0, 0, 0, 549874835, 0, 0, 0, 1099749670, + }, + { + 0, 0, 0, 0, 1226833920, 0, 0, 0, 2453667840, 0, 0, 0, 612368384, 0, 0, 0, + 1224736768, 0, 0, 0, 2449473536, 0, 0, 0, 603979776, 0, 0, 0, 1207959552, 0, 0, 0, + 2415919105, 0, 0, 0, 536870914, 0, 0, 0, 1073741828, 0, 0, 0, 2147483657, 0, 0, 0, + 18, 0, 0, 0, 36, 0, 0, 0, 72, 0, 0, 0, 144, 0, 0, 0, + 288, 0, 0, 0, 576, 0, 0, 0, 1152, 0, 0, 0, 2304, 0, 0, 0, + 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, 0, 0, 0, + 73728, 0, 0, 0, 147456, 0, 0, 0, 1227128832, 0, 0, 0, 2454257664, 0, 0, 0, + 613548032, 0, 0, 0, 1227096064, 0, 0, 0, 2454192128, 0, 0, 0, 613416960, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 320, 0, 0, + 0, 640, 0, 0, 0, 1280, 0, 0, 0, 2560, 0, 0, 0, 5120, 0, 0, + 0, 10240, 0, 0, 0, 20480, 0, 0, 0, 40960, 0, 0, 0, 81920, 0, 0, + 0, 163840, 0, 0, 0, 327680, 0, 0, 0, 655360, 0, 0, 0, 1310720, 0, 0, + 0, 2621440, 0, 0, 0, 5242880, 0, 0, 0, 10485760, 0, 0, 0, 20971520, 0, 0, + 0, 41943040, 0, 0, 0, 83886080, 0, 0, 0, 167772161, 0, 0, 0, 335544322, 0, 0, + 0, 671088644, 0, 0, 0, 1342177288, 0, 0, 0, 2684354577, 0, 0, 0, 1073741858, 0, 0, + 0, 2147483716, 0, 0, 0, 136, 0, 0, 0, 80, 0, 0, 0, 160, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1417720836, 0, 0, 0, 2835441672, 0, 0, 0, 1375916048, 0, 0, 0, 2751832097, 0, + 0, 0, 1208696898, 0, 0, 0, 2417393796, 0, 0, 0, 539820296, 0, 0, 0, 1079640592, 0, + 0, 0, 2159281184, 0, 0, 0, 23595072, 0, 0, 0, 47190144, 0, 0, 0, 94380288, 0, + 0, 0, 188760576, 0, 0, 0, 377521152, 0, 0, 0, 755042304, 0, 0, 0, 243442693, 0, + 0, 0, 486885387, 0, 0, 0, 973770774, 0, 0, 0, 1947541549, 0, 0, 0, 3895083098, 0, + 0, 0, 3495198900, 0, 0, 0, 2695430504, 0, 0, 0, 1095893712, 0, 0, 0, 2191787424, 0, + 0, 0, 88607552, 0, 0, 0, 177215104, 0, 0, 0, 354430209, 0, 0, 0, 708860418, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4196822677, + 0, 0, 0, 4098678059, 0, 0, 0, 3902388822, 0, 0, 0, 3509810348, 0, 0, 0, 2724653400, + 0, 0, 0, 1154339505, 0, 0, 0, 2308679010, 0, 0, 0, 322390725, 0, 0, 0, 644781450, + 0, 0, 0, 1289562901, 0, 0, 0, 2579125802, 0, 0, 0, 863284308, 0, 0, 0, 1726568616, + 0, 0, 0, 3453137233, 0, 0, 0, 2611307171, 0, 0, 0, 927647046, 0, 0, 0, 1855294093, + 0, 0, 0, 3710588186, 0, 0, 0, 3126209076, 0, 0, 0, 1957450856, 0, 0, 0, 3914901713, + 0, 0, 0, 3534836131, 0, 0, 0, 1598344658, 0, 0, 0, 3196689317, 0, 0, 0, 2098411338, + }, + { + 0, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, + 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, 0, 33554433, 0, 0, 0, + 67108866, 0, 0, 0, 134217732, 0, 0, 0, 268435464, 0, 0, 0, 536870928, 0, 0, 0, + 1073741856, 0, 0, 0, 2147483713, 0, 0, 0, 130, 0, 0, 0, 260, 0, 0, 0, + 520, 0, 0, 0, 1040, 0, 0, 0, 2080, 0, 0, 0, 4160, 0, 0, 0, + 8320, 0, 0, 0, 16640, 0, 0, 0, 33280, 0, 0, 0, 66560, 0, 0, 0, + 133120, 0, 0, 0, 266240, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, + 32768, 0, 0, 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8421376, 0, 0, + 0, 16842752, 0, 0, 0, 33685504, 0, 0, 0, 67371008, 0, 0, 0, 134742017, 0, 0, + 0, 269484034, 0, 0, 0, 538968069, 0, 0, 0, 1077936138, 0, 0, 0, 2155872276, 0, 0, + 0, 16777256, 0, 0, 0, 33554512, 0, 0, 0, 67109024, 0, 0, 0, 134218049, 0, 0, + 0, 268436098, 0, 0, 0, 536872197, 0, 0, 0, 1073744394, 0, 0, 0, 2147488788, 0, 0, + 0, 10280, 0, 0, 0, 20560, 0, 0, 0, 41120, 0, 0, 0, 82240, 0, 0, + 0, 164480, 0, 0, 0, 328960, 0, 0, 0, 657920, 0, 0, 0, 1315840, 0, 0, + 0, 2631680, 0, 0, 0, 5263360, 0, 0, 0, 2105344, 0, 0, 0, 4210688, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 844445257, 0, 0, 0, 1688890514, 0, 0, 0, 3377781029, 0, 0, 0, 2460594762, 0, + 0, 0, 626222228, 0, 0, 0, 1252444457, 0, 0, 0, 2504888914, 0, 0, 0, 714810532, 0, + 0, 0, 1429621065, 0, 0, 0, 2859242131, 0, 0, 0, 1423516967, 0, 0, 0, 2847033934, 0, + 0, 0, 1399100572, 0, 0, 0, 2798201144, 0, 0, 0, 1301434992, 0, 0, 0, 2842792617, 0, + 0, 0, 1390617939, 0, 0, 0, 2781235878, 0, 0, 0, 1267504461, 0, 0, 0, 2535008923, 0, + 0, 0, 775050550, 0, 0, 0, 1550101100, 0, 0, 0, 3100202201, 0, 0, 0, 1905437106, 0, + 0, 0, 3810874212, 0, 0, 0, 3326781129, 0, 0, 0, 2358594962, 0, 0, 0, 422222628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2034053848, + 0, 0, 0, 4068107696, 0, 0, 0, 3841248097, 0, 0, 0, 3387528899, 0, 0, 0, 2480090502, + 0, 0, 0, 665213709, 0, 0, 0, 1330427418, 0, 0, 0, 2660854837, 0, 0, 0, 1026742378, + 0, 0, 0, 2053484756, 0, 0, 0, 4106969513, 0, 0, 0, 3918971730, 0, 0, 0, 3542976165, + 0, 0, 0, 2790985034, 0, 0, 0, 1287002772, 0, 0, 0, 2574005545, 0, 0, 0, 853043794, + 0, 0, 0, 1706087588, 0, 0, 0, 3412175176, 0, 0, 0, 2529383056, 0, 0, 0, 763798816, + 0, 0, 0, 1527597633, 0, 0, 0, 3475482203, 0, 0, 0, 2655997110, 0, 0, 0, 1017026924, + }, + { + 0, 0, 0, 0, 8390656, 0, 0, 0, 16781312, 0, 0, 0, 33562625, 0, 0, 0, + 67125250, 0, 0, 0, 134250500, 0, 0, 0, 268501000, 0, 0, 0, 537002000, 0, 0, 0, + 1074004000, 0, 0, 0, 2148008001, 0, 0, 0, 1048706, 0, 0, 0, 2097412, 0, 0, 0, + 4194824, 0, 0, 0, 8389648, 0, 0, 0, 16779296, 0, 0, 0, 33558593, 0, 0, 0, + 67117186, 0, 0, 0, 134234372, 0, 0, 0, 268468744, 0, 0, 0, 536937488, 0, 0, 0, + 1073874976, 0, 0, 0, 2147749953, 0, 0, 0, 532610, 0, 0, 0, 1065220, 0, 0, 0, + 2130440, 0, 0, 0, 4260880, 0, 0, 0, 131104, 0, 0, 0, 262208, 0, 0, 0, + 524416, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195328, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 591396869, 0, 0, + 0, 1182793738, 0, 0, 0, 2365587477, 0, 0, 0, 436207659, 0, 0, 0, 872415319, 0, 0, + 0, 1744830638, 0, 0, 0, 3489661276, 0, 0, 0, 2684355257, 0, 0, 0, 1073743218, 0, 0, + 0, 2147486436, 0, 0, 0, 5576, 0, 0, 0, 11152, 0, 0, 0, 22304, 0, 0, + 0, 44608, 0, 0, 0, 89216, 0, 0, 0, 178432, 0, 0, 0, 356864, 0, 0, + 0, 713728, 0, 0, 0, 1427456, 0, 0, 0, 2854912, 0, 0, 0, 5709824, 0, 0, + 0, 11419648, 0, 0, 0, 22839296, 0, 0, 0, 45678592, 0, 0, 0, 91357184, 0, 0, + 0, 182714369, 0, 0, 0, 365428738, 0, 0, 0, 147849217, 0, 0, 0, 295698434, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3001529316, 0, 0, 0, 1708091336, 0, 0, 0, 3416182673, 0, 0, 0, 2537398050, 0, + 0, 0, 779828804, 0, 0, 0, 1559657609, 0, 0, 0, 3119315218, 0, 0, 0, 1943663140, 0, + 0, 0, 3887326281, 0, 0, 0, 3479685267, 0, 0, 0, 2664403238, 0, 0, 0, 1033839181, 0, + 0, 0, 2067678363, 0, 0, 0, 4135356726, 0, 0, 0, 3975746156, 0, 0, 0, 1796577085, 0, + 0, 0, 3593154171, 0, 0, 0, 2891341046, 0, 0, 0, 1487714796, 0, 0, 0, 2975429593, 0, + 0, 0, 1655891891, 0, 0, 0, 3311783783, 0, 0, 0, 2328600271, 0, 0, 0, 362233247, 0, + 0, 0, 724466494, 0, 0, 0, 1448932988, 0, 0, 0, 2897865977, 0, 0, 0, 1500764658, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30521863, + 0, 0, 0, 61043727, 0, 0, 0, 122087454, 0, 0, 0, 244174909, 0, 0, 0, 488349818, + 0, 0, 0, 976699637, 0, 0, 0, 1953399275, 0, 0, 0, 3906798551, 0, 0, 0, 3518629806, + 0, 0, 0, 2742292316, 0, 0, 0, 1189617336, 0, 0, 0, 2379234673, 0, 0, 0, 463502051, + 0, 0, 0, 927004102, 0, 0, 0, 1854008205, 0, 0, 0, 3708016410, 0, 0, 0, 3121065525, + 0, 0, 0, 1947163754, 0, 0, 0, 3894327508, 0, 0, 0, 3493687720, 0, 0, 0, 2692408145, + 0, 0, 0, 1089848995, 0, 0, 0, 2151298880, 0, 0, 0, 7630465, 0, 0, 0, 15260931, + }, + { + 0, 0, 0, 0, 134300932, 0, 0, 0, 268601864, 0, 0, 0, 537203728, 0, 0, 0, + 1074407456, 0, 0, 0, 2148814913, 0, 0, 0, 2662530, 0, 0, 0, 5325060, 0, 0, 0, + 10650120, 0, 0, 0, 21300240, 0, 0, 0, 42600481, 0, 0, 0, 85200962, 0, 0, 0, + 170401925, 0, 0, 0, 340803850, 0, 0, 0, 681607700, 0, 0, 0, 1363215400, 0, 0, 0, + 2726430800, 0, 0, 0, 1157894304, 0, 0, 0, 2315788608, 0, 0, 0, 336609920, 0, 0, 0, + 673219840, 0, 0, 0, 1346439680, 0, 0, 0, 2692879361, 0, 0, 0, 1090791426, 0, 0, 0, + 2181582852, 0, 0, 0, 68198408, 0, 0, 0, 2098452, 0, 0, 0, 4196904, 0, 0, 0, + 8393808, 0, 0, 0, 16787616, 0, 0, 0, 33575233, 0, 0, 0, 67150466, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 661652741, 0, 0, + 0, 1323305483, 0, 0, 0, 2646610967, 0, 0, 0, 998254638, 0, 0, 0, 1996509277, 0, 0, + 0, 3993018554, 0, 0, 0, 3691069813, 0, 0, 0, 3087172330, 0, 0, 0, 1879377365, 0, 0, + 0, 3758754731, 0, 0, 0, 3222542166, 0, 0, 0, 2150117036, 0, 0, 0, 5266776, 0, 0, + 0, 10533552, 0, 0, 0, 21067104, 0, 0, 0, 42134208, 0, 0, 0, 84268416, 0, 0, + 0, 168536833, 0, 0, 0, 337073666, 0, 0, 0, 674147332, 0, 0, 0, 1348294664, 0, 0, + 0, 2696589329, 0, 0, 0, 1098211362, 0, 0, 0, 2196422724, 0, 0, 0, 97878152, 0, 0, + 0, 195756305, 0, 0, 0, 391512610, 0, 0, 0, 165413185, 0, 0, 0, 330826370, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2938358989, 0, 0, 0, 1581750682, 0, 0, 0, 3163501365, 0, 0, 0, 2032035435, 0, + 0, 0, 4064070870, 0, 0, 0, 3833174445, 0, 0, 0, 3371381594, 0, 0, 0, 2447795893, 0, + 0, 0, 600624491, 0, 0, 0, 1201248983, 0, 0, 0, 2402497966, 0, 0, 0, 510028637, 0, + 0, 0, 1020057274, 0, 0, 0, 2040114548, 0, 0, 0, 4080229097, 0, 0, 0, 1229289758, 0, + 0, 0, 2458579516, 0, 0, 0, 622191737, 0, 0, 0, 1244383475, 0, 0, 0, 2488766950, 0, + 0, 0, 682566604, 0, 0, 0, 1365133209, 0, 0, 0, 2730266419, 0, 0, 0, 1165565542, 0, + 0, 0, 2331131084, 0, 0, 0, 367294873, 0, 0, 0, 734589747, 0, 0, 0, 1469179494, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33833481, + 0, 0, 0, 67666962, 0, 0, 0, 135333924, 0, 0, 0, 270667848, 0, 0, 0, 541335697, + 0, 0, 0, 1082671394, 0, 0, 0, 2165342788, 0, 0, 0, 35718281, 0, 0, 0, 71436563, + 0, 0, 0, 142873126, 0, 0, 0, 285746252, 0, 0, 0, 571492505, 0, 0, 0, 1142985010, + 0, 0, 0, 2285970021, 0, 0, 0, 276972746, 0, 0, 0, 553945492, 0, 0, 0, 1107890985, + 0, 0, 0, 2215781970, 0, 0, 0, 136596644, 0, 0, 0, 273193289, 0, 0, 0, 546386578, + 0, 0, 0, 1092773156, 0, 0, 0, 2151712833, 0, 0, 0, 8458370, 0, 0, 0, 16916740, + }, + { + 0, 0, 0, 0, 826542200, 0, 0, 0, 1653084401, 0, 0, 0, 3306168803, 0, 0, 0, + 2317370310, 0, 0, 0, 339773324, 0, 0, 0, 679546648, 0, 0, 0, 1359093296, 0, 0, 0, + 2718186592, 0, 0, 0, 1141405888, 0, 0, 0, 2282811777, 0, 0, 0, 270656258, 0, 0, 0, + 541312516, 0, 0, 0, 1082625032, 0, 0, 0, 2165250065, 0, 0, 0, 35532835, 0, 0, 0, + 71065670, 0, 0, 0, 142131340, 0, 0, 0, 284262680, 0, 0, 0, 568525360, 0, 0, 0, + 1137050721, 0, 0, 0, 2274101442, 0, 0, 0, 253235589, 0, 0, 0, 506471179, 0, 0, 0, + 1012942358, 0, 0, 0, 2025884716, 0, 0, 0, 3234140193, 0, 0, 0, 2173313091, 0, 0, 0, + 51658887, 0, 0, 0, 103317775, 0, 0, 0, 206635550, 0, 0, 0, 413271100, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2815455249, 0, 0, + 0, 1335943203, 0, 0, 0, 2671886407, 0, 0, 0, 1048805518, 0, 0, 0, 2097611036, 0, 0, + 0, 4195222072, 0, 0, 0, 4095476849, 0, 0, 0, 3895986402, 0, 0, 0, 3497005508, 0, 0, + 0, 2699043721, 0, 0, 0, 1103120146, 0, 0, 0, 2206240292, 0, 0, 0, 117513288, 0, 0, + 0, 235026577, 0, 0, 0, 470053155, 0, 0, 0, 940106310, 0, 0, 0, 1880212621, 0, 0, + 0, 3760425243, 0, 0, 0, 3225883190, 0, 0, 0, 2156799084, 0, 0, 0, 18630872, 0, 0, + 0, 37261744, 0, 0, 0, 74523488, 0, 0, 0, 149046977, 0, 0, 0, 298093954, 0, 0, + 0, 596187909, 0, 0, 0, 1192375818, 0, 0, 0, 703863812, 0, 0, 0, 1407727624, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1801783692, 0, 0, 0, 3603567385, 0, 0, 0, 2912167474, 0, 0, 0, 1529367653, 0, + 0, 0, 3058735306, 0, 0, 0, 1822503316, 0, 0, 0, 3645006633, 0, 0, 0, 2995045970, 0, + 0, 0, 1695124645, 0, 0, 0, 3390249290, 0, 0, 0, 2485531285, 0, 0, 0, 676095274, 0, + 0, 0, 1352190548, 0, 0, 0, 2704381097, 0, 0, 0, 1113794898, 0, 0, 0, 4020460328, 0, + 0, 0, 3745953360, 0, 0, 0, 3196939425, 0, 0, 0, 2098911554, 0, 0, 0, 4197823108, 0, + 0, 0, 4100678921, 0, 0, 0, 3906390547, 0, 0, 0, 3517813798, 0, 0, 0, 2740660300, 0, + 0, 0, 1186353304, 0, 0, 0, 2372706609, 0, 0, 0, 450445923, 0, 0, 0, 900891846, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67141650, + 0, 0, 0, 134283300, 0, 0, 0, 268566600, 0, 0, 0, 537133200, 0, 0, 0, 1074266400, + 0, 0, 0, 2148532800, 0, 0, 0, 2098304, 0, 0, 0, 4196609, 0, 0, 0, 8393218, + 0, 0, 0, 16786436, 0, 0, 0, 33572873, 0, 0, 0, 67145746, 0, 0, 0, 134291492, + 0, 0, 0, 268582984, 0, 0, 0, 537165968, 0, 0, 0, 1074331936, 0, 0, 0, 2148663872, + 0, 0, 0, 2360448, 0, 0, 0, 4720897, 0, 0, 0, 9441794, 0, 0, 0, 18883588, + 0, 0, 0, 37767176, 0, 0, 0, 8392706, 0, 0, 0, 16785412, 0, 0, 0, 33570825, + }, + { + 0, 0, 0, 0, 2499687121, 0, 0, 0, 704406946, 0, 0, 0, 1408813893, 0, 0, 0, + 2817627786, 0, 0, 0, 1340288277, 0, 0, 0, 2680576554, 0, 0, 0, 1066185813, 0, 0, 0, + 2132371627, 0, 0, 0, 4264743254, 0, 0, 0, 4234519213, 0, 0, 0, 4174071131, 0, 0, 0, + 4053174967, 0, 0, 0, 3811382638, 0, 0, 0, 3327797980, 0, 0, 0, 2360628665, 0, 0, 0, + 426290034, 0, 0, 0, 852580069, 0, 0, 0, 1705160138, 0, 0, 0, 3410320276, 0, 0, 0, + 2525673256, 0, 0, 0, 756379216, 0, 0, 0, 1512758433, 0, 0, 0, 3025516867, 0, 0, 0, + 1756066438, 0, 0, 0, 3512132877, 0, 0, 0, 911472843, 0, 0, 0, 1822945686, 0, 0, 0, + 3645891373, 0, 0, 0, 2996815450, 0, 0, 0, 1698663604, 0, 0, 0, 3397327208, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1432822912, 0, 0, + 0, 2865645824, 0, 0, 0, 1436324352, 0, 0, 0, 2872648704, 0, 0, 0, 1450330112, 0, 0, + 0, 2900660224, 0, 0, 0, 1506353153, 0, 0, 0, 3012706307, 0, 0, 0, 1730445319, 0, 0, + 0, 3460890639, 0, 0, 0, 2626813983, 0, 0, 0, 958660670, 0, 0, 0, 1917321341, 0, 0, + 0, 3834642683, 0, 0, 0, 3374318071, 0, 0, 0, 2453668846, 0, 0, 0, 612370397, 0, 0, + 0, 1224740795, 0, 0, 0, 2449481590, 0, 0, 0, 603995885, 0, 0, 0, 1207991771, 0, 0, + 0, 2415983542, 0, 0, 0, 536999789, 0, 0, 0, 1073999578, 0, 0, 0, 2147999156, 0, 0, + 0, 1031016, 0, 0, 0, 2062032, 0, 0, 0, 1431947552, 0, 0, 0, 2863895104, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2656366736, 0, 0, 0, 1017766176, 0, 0, 0, 2035532352, 0, 0, 0, 4071064704, 0, + 0, 0, 3847162112, 0, 0, 0, 3399356928, 0, 0, 0, 2503746561, 0, 0, 0, 712525826, 0, + 0, 0, 1425051652, 0, 0, 0, 2850103304, 0, 0, 0, 1405239313, 0, 0, 0, 2810478627, 0, + 0, 0, 1325989958, 0, 0, 0, 2651979917, 0, 0, 0, 1008992539, 0, 0, 0, 3860656807, 0, + 0, 0, 3426346319, 0, 0, 0, 2557725342, 0, 0, 0, 820483388, 0, 0, 0, 1640966776, 0, + 0, 0, 3281933552, 0, 0, 0, 2268899809, 0, 0, 0, 242832322, 0, 0, 0, 485664644, 0, + 0, 0, 971329289, 0, 0, 0, 1942658578, 0, 0, 0, 3885317156, 0, 0, 0, 3475667016, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2304, + 0, 0, 0, 4608, 0, 0, 0, 9216, 0, 0, 0, 18432, 0, 0, 0, 36864, + 0, 0, 0, 73728, 0, 0, 0, 147456, 0, 0, 0, 294912, 0, 0, 0, 589824, + 0, 0, 0, 1179648, 0, 0, 0, 2359296, 0, 0, 0, 4718593, 0, 0, 0, 9437186, + 0, 0, 0, 18874372, 0, 0, 0, 37748744, 0, 0, 0, 75497488, 0, 0, 0, 150994976, + 0, 0, 0, 301989953, 0, 0, 0, 603979906, 0, 0, 0, 1207959812, 0, 0, 0, 2415919624, + 0, 0, 0, 536871952, 0, 0, 0, 1073742112, 0, 0, 0, 2147484224, 0, 0, 0, 1152, + }, + { + 0, 0, 0, 0, 2271571420, 0, 0, 0, 248175545, 0, 0, 0, 496351090, 0, 0, 0, + 992702181, 0, 0, 0, 1985404363, 0, 0, 0, 3970808727, 0, 0, 0, 3646650159, 0, 0, 0, + 2998333022, 0, 0, 0, 1701698748, 0, 0, 0, 3403397496, 0, 0, 0, 2511827697, 0, 0, 0, + 728688099, 0, 0, 0, 1457376199, 0, 0, 0, 2914752399, 0, 0, 0, 1534537503, 0, 0, 0, + 3069075006, 0, 0, 0, 1843182716, 0, 0, 0, 3686365432, 0, 0, 0, 3077763568, 0, 0, 0, + 1860559841, 0, 0, 0, 3721119683, 0, 0, 0, 3147272070, 0, 0, 0, 1999576845, 0, 0, 0, + 3999153690, 0, 0, 0, 3703340085, 0, 0, 0, 1042126263, 0, 0, 0, 2084252526, 0, 0, 0, + 4168505053, 0, 0, 0, 4042042811, 0, 0, 0, 3789118327, 0, 0, 0, 3283269358, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2740852545, 0, 0, + 0, 1186737794, 0, 0, 0, 2373475589, 0, 0, 0, 451983883, 0, 0, 0, 903967767, 0, 0, + 0, 1807935534, 0, 0, 0, 3615871068, 0, 0, 0, 2936774840, 0, 0, 0, 1578582385, 0, 0, + 0, 3157164770, 0, 0, 0, 2019362244, 0, 0, 0, 4038724489, 0, 0, 0, 3782481683, 0, 0, + 0, 3269996070, 0, 0, 0, 2245024844, 0, 0, 0, 195082393, 0, 0, 0, 390164786, 0, 0, + 0, 780329572, 0, 0, 0, 1560659145, 0, 0, 0, 3121318290, 0, 0, 0, 1947669285, 0, 0, + 0, 3895338570, 0, 0, 0, 3495709844, 0, 0, 0, 2696452393, 0, 0, 0, 1097937490, 0, 0, + 0, 2195874980, 0, 0, 0, 96782664, 0, 0, 0, 2832696784, 0, 0, 0, 1370426272, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2424557694, 0, 0, 0, 554148093, 0, 0, 0, 1108296186, 0, 0, 0, 2216592373, 0, + 0, 0, 138217450, 0, 0, 0, 276434901, 0, 0, 0, 552869802, 0, 0, 0, 1105739604, 0, + 0, 0, 2211479208, 0, 0, 0, 127991121, 0, 0, 0, 255982243, 0, 0, 0, 511964486, 0, + 0, 0, 1023928972, 0, 0, 0, 2047857944, 0, 0, 0, 4095715888, 0, 0, 0, 2025620510, 0, + 0, 0, 4051241021, 0, 0, 0, 3807514746, 0, 0, 0, 3320062196, 0, 0, 0, 2345157096, 0, + 0, 0, 395346896, 0, 0, 0, 790693792, 0, 0, 0, 1581387585, 0, 0, 0, 3162775171, 0, + 0, 0, 2030583047, 0, 0, 0, 4061166095, 0, 0, 0, 3827364895, 0, 0, 0, 3359762495, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8390658, + 0, 0, 0, 16781316, 0, 0, 0, 33562633, 0, 0, 0, 67125266, 0, 0, 0, 134250532, + 0, 0, 0, 268501064, 0, 0, 0, 537002128, 0, 0, 0, 1074004256, 0, 0, 0, 2148008512, + 0, 0, 0, 1049728, 0, 0, 0, 2099456, 0, 0, 0, 4198913, 0, 0, 0, 8397826, + 0, 0, 0, 16795652, 0, 0, 0, 33591305, 0, 0, 0, 67182610, 0, 0, 0, 134365220, + 0, 0, 0, 268730440, 0, 0, 0, 537460880, 0, 0, 0, 1074921760, 0, 0, 0, 2149843520, + 0, 0, 0, 4719745, 0, 0, 0, 1048832, 0, 0, 0, 2097664, 0, 0, 0, 4195329, + }, + { + 0, 0, 0, 0, 1739616249, 0, 0, 0, 3479232498, 0, 0, 0, 2663497700, 0, 0, 0, + 1032028104, 0, 0, 0, 2064056209, 0, 0, 0, 4128112418, 0, 0, 0, 3961257541, 0, 0, 0, + 3627547787, 0, 0, 0, 2960128279, 0, 0, 0, 1625289262, 0, 0, 0, 3250578525, 0, 0, 0, + 2206189754, 0, 0, 0, 117412213, 0, 0, 0, 234824426, 0, 0, 0, 469648853, 0, 0, 0, + 939297707, 0, 0, 0, 1878595415, 0, 0, 0, 3757190830, 0, 0, 0, 3219414364, 0, 0, 0, + 2143861433, 0, 0, 0, 4287722866, 0, 0, 0, 4280478436, 0, 0, 0, 4265989576, 0, 0, 0, + 4237011857, 0, 0, 0, 4179056419, 0, 0, 0, 2510209471, 0, 0, 0, 725451647, 0, 0, 0, + 1450903295, 0, 0, 0, 2901806591, 0, 0, 0, 1508645886, 0, 0, 0, 3017291772, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3220345106, 0, 0, + 0, 2145722916, 0, 0, 0, 4291445832, 0, 0, 0, 4287924368, 0, 0, 0, 4280881440, 0, 0, + 0, 4266795584, 0, 0, 0, 4238623872, 0, 0, 0, 4182280448, 0, 0, 0, 4069593601, 0, 0, + 0, 3844219907, 0, 0, 0, 3393472519, 0, 0, 0, 2491977742, 0, 0, 0, 688988188, 0, 0, + 0, 1377976376, 0, 0, 0, 2755952753, 0, 0, 0, 1216938211, 0, 0, 0, 2433876422, 0, 0, + 0, 572785549, 0, 0, 0, 1145571098, 0, 0, 0, 2291142197, 0, 0, 0, 287317098, 0, 0, + 0, 574634197, 0, 0, 0, 1149268394, 0, 0, 0, 2298536789, 0, 0, 0, 302106282, 0, 0, + 0, 604212565, 0, 0, 0, 1208425131, 0, 0, 0, 805086276, 0, 0, 0, 1610172553, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 70539992, 0, 0, 0, 141079985, 0, 0, 0, 282159971, 0, 0, 0, 564319943, 0, + 0, 0, 1128639887, 0, 0, 0, 2257279774, 0, 0, 0, 219592253, 0, 0, 0, 439184507, 0, + 0, 0, 878369014, 0, 0, 0, 1756738029, 0, 0, 0, 3513476059, 0, 0, 0, 2731984823, 0, + 0, 0, 1169002351, 0, 0, 0, 2338004702, 0, 0, 0, 381042109, 0, 0, 0, 693641634, 0, + 0, 0, 1387283269, 0, 0, 0, 2774566539, 0, 0, 0, 1254165782, 0, 0, 0, 2508331565, 0, + 0, 0, 721695834, 0, 0, 0, 1443391669, 0, 0, 0, 2886783339, 0, 0, 0, 1478599382, 0, + 0, 0, 2957198765, 0, 0, 0, 1619430235, 0, 0, 0, 3238860470, 0, 0, 0, 2182753644, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144703526, + 0, 0, 0, 289407053, 0, 0, 0, 578814107, 0, 0, 0, 1157628214, 0, 0, 0, 2315256429, + 0, 0, 0, 335545562, 0, 0, 0, 671091124, 0, 0, 0, 1342182248, 0, 0, 0, 2684364496, + 0, 0, 0, 1073761696, 0, 0, 0, 2147523392, 0, 0, 0, 79488, 0, 0, 0, 158976, + 0, 0, 0, 317952, 0, 0, 0, 635904, 0, 0, 0, 1271808, 0, 0, 0, 2543616, + 0, 0, 0, 5087233, 0, 0, 0, 10174466, 0, 0, 0, 20348932, 0, 0, 0, 40697864, + 0, 0, 0, 81395729, 0, 0, 0, 18087940, 0, 0, 0, 36175881, 0, 0, 0, 72351763, + }, + { + 0, 0, 0, 0, 1915396941, 0, 0, 0, 3830793883, 0, 0, 0, 3366620471, 0, 0, 0, + 2438273647, 0, 0, 0, 581579999, 0, 0, 0, 1163159998, 0, 0, 0, 2326319996, 0, 0, 0, + 357672696, 0, 0, 0, 715345393, 0, 0, 0, 1430690786, 0, 0, 0, 2861381572, 0, 0, 0, + 1427795848, 0, 0, 0, 2855591696, 0, 0, 0, 1416216096, 0, 0, 0, 2832432193, 0, 0, 0, + 1369897090, 0, 0, 0, 2739794180, 0, 0, 0, 1184621065, 0, 0, 0, 2369242131, 0, 0, 0, + 443516967, 0, 0, 0, 887033934, 0, 0, 0, 1774067868, 0, 0, 0, 3548135736, 0, 0, 0, + 2801304176, 0, 0, 0, 1307641056, 0, 0, 0, 3922242189, 0, 0, 0, 3549517082, 0, 0, 0, + 2804066868, 0, 0, 0, 1313166441, 0, 0, 0, 2626332883, 0, 0, 0, 957698470, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 348140706, 0, 0, + 0, 696281412, 0, 0, 0, 1392562824, 0, 0, 0, 2785125649, 0, 0, 0, 1275284003, 0, 0, + 0, 2550568007, 0, 0, 0, 806168719, 0, 0, 0, 1612337439, 0, 0, 0, 3224674878, 0, 0, + 0, 2154382460, 0, 0, 0, 13797624, 0, 0, 0, 27595248, 0, 0, 0, 55190496, 0, 0, + 0, 110380992, 0, 0, 0, 220761985, 0, 0, 0, 441523971, 0, 0, 0, 883047943, 0, 0, + 0, 1766095886, 0, 0, 0, 3532191772, 0, 0, 0, 2769416249, 0, 0, 0, 1243865203, 0, 0, + 0, 2487730406, 0, 0, 0, 680493516, 0, 0, 0, 1360987032, 0, 0, 0, 2721974065, 0, 0, + 0, 1148980834, 0, 0, 0, 2297961669, 0, 0, 0, 87035176, 0, 0, 0, 174070353, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1770227281, 0, 0, 0, 3540454563, 0, 0, 0, 2785941830, 0, 0, 0, 1276916364, 0, + 0, 0, 2553832729, 0, 0, 0, 812698162, 0, 0, 0, 1625396325, 0, 0, 0, 3250792650, 0, + 0, 0, 2206618004, 0, 0, 0, 118268713, 0, 0, 0, 236537426, 0, 0, 0, 473074852, 0, + 0, 0, 946149705, 0, 0, 0, 1892299411, 0, 0, 0, 3784598822, 0, 0, 0, 2863352860, 0, + 0, 0, 1431738424, 0, 0, 0, 2863476848, 0, 0, 0, 1431986401, 0, 0, 0, 2863972803, 0, + 0, 0, 1432978310, 0, 0, 0, 2865956620, 0, 0, 0, 1436945945, 0, 0, 0, 2873891890, 0, + 0, 0, 1452816485, 0, 0, 0, 2905632970, 0, 0, 0, 1516298644, 0, 0, 0, 3032597288, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135049252, + 0, 0, 0, 270098504, 0, 0, 0, 540197008, 0, 0, 0, 1080394017, 0, 0, 0, 2160788035, + 0, 0, 0, 26608774, 0, 0, 0, 53217549, 0, 0, 0, 106435098, 0, 0, 0, 212870196, + 0, 0, 0, 425740393, 0, 0, 0, 851480786, 0, 0, 0, 1702961572, 0, 0, 0, 3405923145, + 0, 0, 0, 2516878995, 0, 0, 0, 738790694, 0, 0, 0, 1477581388, 0, 0, 0, 2955162776, + 0, 0, 0, 1615358257, 0, 0, 0, 3230716514, 0, 0, 0, 2166465732, 0, 0, 0, 37964168, + 0, 0, 0, 75928336, 0, 0, 0, 16881156, 0, 0, 0, 33762313, 0, 0, 0, 67524626, + }, + { + 0, 0, 0, 0, 4038889453, 0, 0, 0, 3782811611, 0, 0, 0, 3270655926, 0, 0, 0, + 2246344557, 0, 0, 0, 197721819, 0, 0, 0, 395443639, 0, 0, 0, 790887279, 0, 0, 0, + 1581774559, 0, 0, 0, 3163549119, 0, 0, 0, 2032130942, 0, 0, 0, 4064261884, 0, 0, 0, + 3833556473, 0, 0, 0, 3372145651, 0, 0, 0, 2449324007, 0, 0, 0, 603680719, 0, 0, 0, + 1207361439, 0, 0, 0, 2414722878, 0, 0, 0, 534478461, 0, 0, 0, 1068956923, 0, 0, 0, + 2137913847, 0, 0, 0, 4275827694, 0, 0, 0, 4256688093, 0, 0, 0, 4218408890, 0, 0, 0, + 4141850484, 0, 0, 0, 3988733673, 0, 0, 0, 734196287, 0, 0, 0, 1468392575, 0, 0, 0, + 2936785150, 0, 0, 0, 1578603005, 0, 0, 0, 3157206011, 0, 0, 0, 2019444726, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2692483345, 0, 0, + 0, 1089999394, 0, 0, 0, 2179998788, 0, 0, 0, 65030280, 0, 0, 0, 130060560, 0, 0, + 0, 260121121, 0, 0, 0, 520242243, 0, 0, 0, 1040484486, 0, 0, 0, 2080968972, 0, 0, + 0, 4161937944, 0, 0, 0, 4028908593, 0, 0, 0, 3762849891, 0, 0, 0, 3230732486, 0, 0, + 0, 2166497676, 0, 0, 0, 38028056, 0, 0, 0, 76056112, 0, 0, 0, 152112225, 0, 0, + 0, 304224450, 0, 0, 0, 608448901, 0, 0, 0, 1216897803, 0, 0, 0, 2433795606, 0, 0, + 0, 572623917, 0, 0, 0, 1145247834, 0, 0, 0, 2290495669, 0, 0, 0, 286024042, 0, 0, + 0, 572048085, 0, 0, 0, 1144096170, 0, 0, 0, 673120836, 0, 0, 0, 1346241672, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 302254233, 0, 0, 0, 604508466, 0, 0, 0, 1209016932, 0, 0, 0, 2418033865, 0, + 0, 0, 541100435, 0, 0, 0, 1082200870, 0, 0, 0, 2164401740, 0, 0, 0, 33836184, 0, + 0, 0, 67672369, 0, 0, 0, 135344738, 0, 0, 0, 270689477, 0, 0, 0, 541378955, 0, + 0, 0, 1082757911, 0, 0, 0, 2165515822, 0, 0, 0, 36064348, 0, 0, 0, 373854240, 0, + 0, 0, 747708480, 0, 0, 0, 1495416961, 0, 0, 0, 2990833922, 0, 0, 0, 1686700548, 0, + 0, 0, 3373401096, 0, 0, 0, 2451834897, 0, 0, 0, 608702498, 0, 0, 0, 1217404996, 0, + 0, 0, 2434809993, 0, 0, 0, 574652691, 0, 0, 0, 1149305382, 0, 0, 0, 2298610764, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3961969350, + 0, 0, 0, 3628971405, 0, 0, 0, 2962975514, 0, 0, 0, 1630983732, 0, 0, 0, 3261967464, + 0, 0, 0, 2228967633, 0, 0, 0, 162967970, 0, 0, 0, 325935940, 0, 0, 0, 651871880, + 0, 0, 0, 1303743760, 0, 0, 0, 2607487520, 0, 0, 0, 920007744, 0, 0, 0, 1840015488, + 0, 0, 0, 3680030976, 0, 0, 0, 3065094657, 0, 0, 0, 1835222019, 0, 0, 0, 3670444038, + 0, 0, 0, 3045920780, 0, 0, 0, 1796874265, 0, 0, 0, 3593748531, 0, 0, 0, 2892529767, + 0, 0, 0, 1490092239, 0, 0, 0, 1568987992, 0, 0, 0, 3137975985, 0, 0, 0, 1980984675, + }, + { + 0, 0, 0, 0, 3824344628, 0, 0, 0, 3353721960, 0, 0, 0, 2412476624, 0, 0, 0, + 529985953, 0, 0, 0, 1059971907, 0, 0, 0, 2119943815, 0, 0, 0, 4239887631, 0, 0, 0, + 4184807967, 0, 0, 0, 4074648638, 0, 0, 0, 3854329981, 0, 0, 0, 3413692666, 0, 0, 0, + 2532418036, 0, 0, 0, 769868776, 0, 0, 0, 1539737553, 0, 0, 0, 3079475106, 0, 0, 0, + 1863982917, 0, 0, 0, 3727965834, 0, 0, 0, 3160964373, 0, 0, 0, 2026961450, 0, 0, 0, + 4053922901, 0, 0, 0, 3812878506, 0, 0, 0, 3330789716, 0, 0, 0, 2366612137, 0, 0, 0, + 438256979, 0, 0, 0, 876513958, 0, 0, 0, 2341456760, 0, 0, 0, 387946225, 0, 0, 0, + 775892451, 0, 0, 0, 1551784902, 0, 0, 0, 3103569805, 0, 0, 0, 1912172314, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1473249760, 0, 0, + 0, 2946499520, 0, 0, 0, 1598031745, 0, 0, 0, 3196063490, 0, 0, 0, 2097159684, 0, 0, + 0, 4194319368, 0, 0, 0, 4093671441, 0, 0, 0, 3892375586, 0, 0, 0, 3489783876, 0, 0, + 0, 2684600457, 0, 0, 0, 1074233618, 0, 0, 0, 2148467236, 0, 0, 0, 1967176, 0, 0, + 0, 3934352, 0, 0, 0, 7868704, 0, 0, 0, 15737408, 0, 0, 0, 31474816, 0, 0, + 0, 62949632, 0, 0, 0, 125899264, 0, 0, 0, 251798529, 0, 0, 0, 503597059, 0, 0, + 0, 1007194118, 0, 0, 0, 2014388236, 0, 0, 0, 4028776473, 0, 0, 0, 3762585651, 0, 0, + 0, 3230204006, 0, 0, 0, 2165440716, 0, 0, 0, 1442054264, 0, 0, 0, 2884108528, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 536903683, 0, 0, 0, 1073807366, 0, 0, 0, 2147614732, 0, 0, 0, 262168, 0, + 0, 0, 524336, 0, 0, 0, 1048672, 0, 0, 0, 2097344, 0, 0, 0, 4194688, 0, + 0, 0, 8389376, 0, 0, 0, 16778752, 0, 0, 0, 33557504, 0, 0, 0, 67115008, 0, + 0, 0, 134230016, 0, 0, 0, 268460033, 0, 0, 0, 536920067, 0, 0, 0, 1610678276, 0, + 0, 0, 3221356552, 0, 0, 0, 2147745808, 0, 0, 0, 524320, 0, 0, 0, 1048640, 0, + 0, 0, 2097280, 0, 0, 0, 4194560, 0, 0, 0, 8389120, 0, 0, 0, 16778240, 0, + 0, 0, 33556480, 0, 0, 0, 67112960, 0, 0, 0, 134225920, 0, 0, 0, 268451841, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3548126500, + 0, 0, 0, 2801285704, 0, 0, 0, 1307604113, 0, 0, 0, 2615208226, 0, 0, 0, 935449156, + 0, 0, 0, 1870898313, 0, 0, 0, 3741796627, 0, 0, 0, 3188625959, 0, 0, 0, 2082284622, + 0, 0, 0, 4164569244, 0, 0, 0, 4034171193, 0, 0, 0, 3773375091, 0, 0, 0, 3251782887, + 0, 0, 0, 2208598479, 0, 0, 0, 122229662, 0, 0, 0, 244459325, 0, 0, 0, 488918650, + 0, 0, 0, 977837300, 0, 0, 0, 1955674600, 0, 0, 0, 3911349200, 0, 0, 0, 3527731104, + 0, 0, 0, 2760494912, 0, 0, 0, 2590999460, 0, 0, 0, 887031625, 0, 0, 0, 1774063250, + }, + { + 0, 0, 0, 0, 1099834596, 0, 0, 0, 2199669192, 0, 0, 0, 104371089, 0, 0, 0, + 208742178, 0, 0, 0, 417484356, 0, 0, 0, 834968712, 0, 0, 0, 1669937425, 0, 0, 0, + 3339874850, 0, 0, 0, 2384782404, 0, 0, 0, 474597512, 0, 0, 0, 949195024, 0, 0, 0, + 1898390048, 0, 0, 0, 3796780096, 0, 0, 0, 3298592897, 0, 0, 0, 2302218499, 0, 0, 0, + 309469703, 0, 0, 0, 618939406, 0, 0, 0, 1237878812, 0, 0, 0, 2475757624, 0, 0, 0, + 656547953, 0, 0, 0, 1313095907, 0, 0, 0, 2626191815, 0, 0, 0, 957416334, 0, 0, 0, + 1914832669, 0, 0, 0, 3829665339, 0, 0, 0, 2298886291, 0, 0, 0, 302805287, 0, 0, 0, + 605610574, 0, 0, 0, 1211221148, 0, 0, 0, 2422442297, 0, 0, 0, 549917298, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1442838528, 0, 0, + 0, 2885677056, 0, 0, 0, 1476386816, 0, 0, 0, 2952773632, 0, 0, 0, 1610579969, 0, 0, + 0, 3221159938, 0, 0, 0, 2147352580, 0, 0, 0, 4294705160, 0, 0, 0, 4294443024, 0, 0, + 0, 4293918752, 0, 0, 0, 4292870208, 0, 0, 0, 4290773120, 0, 0, 0, 4286578944, 0, 0, + 0, 4278190592, 0, 0, 0, 4261413888, 0, 0, 0, 4227860480, 0, 0, 0, 4160753664, 0, 0, + 0, 4026540033, 0, 0, 0, 3758112771, 0, 0, 0, 3221258246, 0, 0, 0, 2147549196, 0, 0, + 0, 131096, 0, 0, 0, 262192, 0, 0, 0, 524384, 0, 0, 0, 1048768, 0, 0, + 0, 2097536, 0, 0, 0, 4195072, 0, 0, 0, 1434451456, 0, 0, 0, 2868902912, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 262152, 0, 0, 0, 524304, 0, 0, 0, 1048608, 0, 0, 0, 2097216, 0, + 0, 0, 4194432, 0, 0, 0, 8388864, 0, 0, 0, 16777728, 0, 0, 0, 33555456, 0, + 0, 0, 67110912, 0, 0, 0, 134221824, 0, 0, 0, 268443649, 0, 0, 0, 536887298, 0, + 0, 0, 1073774597, 0, 0, 0, 2147549194, 0, 0, 0, 131092, 0, 0, 0, 32, 0, + 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, 0, 512, 0, + 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, 0, 8192, 0, + 0, 0, 16384, 0, 0, 0, 32769, 0, 0, 0, 65538, 0, 0, 0, 131076, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357270623, + 0, 0, 0, 714541247, 0, 0, 0, 1429082494, 0, 0, 0, 2858164988, 0, 0, 0, 1421362680, + 0, 0, 0, 2842725361, 0, 0, 0, 1390483426, 0, 0, 0, 2780966853, 0, 0, 0, 1266966411, + 0, 0, 0, 2533932823, 0, 0, 0, 772898351, 0, 0, 0, 1545796702, 0, 0, 0, 3091593405, + 0, 0, 0, 1888219514, 0, 0, 0, 3776439028, 0, 0, 0, 3257910761, 0, 0, 0, 2220854227, + 0, 0, 0, 146741158, 0, 0, 0, 293482317, 0, 0, 0, 586964634, 0, 0, 0, 1173929269, + 0, 0, 0, 2347858538, 0, 0, 0, 44658827, 0, 0, 0, 89317655, 0, 0, 0, 178635311, + }, + { + 0, 0, 0, 0, 3400190380, 0, 0, 0, 2505413465, 0, 0, 0, 715859635, 0, 0, 0, + 1431719270, 0, 0, 0, 2863438540, 0, 0, 0, 1431909784, 0, 0, 0, 2863819568, 0, 0, 0, + 1432671840, 0, 0, 0, 2865343680, 0, 0, 0, 1435720064, 0, 0, 0, 2871440128, 0, 0, 0, + 1447912961, 0, 0, 0, 2895825923, 0, 0, 0, 1496684550, 0, 0, 0, 2993369100, 0, 0, 0, + 1691770904, 0, 0, 0, 3383541809, 0, 0, 0, 2472116322, 0, 0, 0, 649265349, 0, 0, 0, + 1298530698, 0, 0, 0, 2597061396, 0, 0, 0, 899155496, 0, 0, 0, 1798310993, 0, 0, 0, + 3596621986, 0, 0, 0, 2898276677, 0, 0, 0, 2469047078, 0, 0, 0, 643126861, 0, 0, 0, + 1286253722, 0, 0, 0, 2572507445, 0, 0, 0, 850047595, 0, 0, 0, 1700095190, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, + 0, 32, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 0, 256, 0, 0, + 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, 0, 4096, 0, 0, + 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, 0, 65536, 0, 0, + 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, 0, 1048576, 0, 0, + 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, 0, 16777216, 0, 0, + 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, 0, 268435458, 0, 0, + 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483652, 0, 0, 0, 8, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1073774613, 0, 0, 0, 2147549226, 0, 0, 0, 131156, 0, 0, 0, 262312, 0, + 0, 0, 524624, 0, 0, 0, 1049248, 0, 0, 0, 2098496, 0, 0, 0, 4196992, 0, + 0, 0, 8393984, 0, 0, 0, 16787968, 0, 0, 0, 33575936, 0, 0, 0, 67151873, 0, + 0, 0, 134303746, 0, 0, 0, 268607492, 0, 0, 0, 537214984, 0, 0, 0, 655364, 0, + 0, 0, 1310728, 0, 0, 0, 2621456, 0, 0, 0, 5242912, 0, 0, 0, 10485824, 0, + 0, 0, 20971648, 0, 0, 0, 41943296, 0, 0, 0, 83886592, 0, 0, 0, 167773184, 0, + 0, 0, 335546369, 0, 0, 0, 671092738, 0, 0, 0, 1342185477, 0, 0, 0, 2684370954, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3265725291, + 0, 0, 0, 2236483287, 0, 0, 0, 177999279, 0, 0, 0, 355998558, 0, 0, 0, 711997116, + 0, 0, 0, 1423994233, 0, 0, 0, 2847988467, 0, 0, 0, 1401009639, 0, 0, 0, 2802019279, + 0, 0, 0, 1309071263, 0, 0, 0, 2618142526, 0, 0, 0, 941317756, 0, 0, 0, 1882635512, + 0, 0, 0, 3765271025, 0, 0, 0, 3235574755, 0, 0, 0, 2176182214, 0, 0, 0, 57397132, + 0, 0, 0, 114794264, 0, 0, 0, 229588528, 0, 0, 0, 459177056, 0, 0, 0, 918354113, + 0, 0, 0, 1836708227, 0, 0, 0, 408215661, 0, 0, 0, 816431322, 0, 0, 0, 1632862645, + }, + { + 0, 0, 0, 0, 2928597988, 0, 0, 0, 1562228680, 0, 0, 0, 3124457360, 0, 0, 0, + 1953947424, 0, 0, 0, 3907894849, 0, 0, 0, 3520822403, 0, 0, 0, 2746677510, 0, 0, 0, + 1198387725, 0, 0, 0, 2396775450, 0, 0, 0, 498583604, 0, 0, 0, 997167209, 0, 0, 0, + 1994334419, 0, 0, 0, 3988668839, 0, 0, 0, 3682370382, 0, 0, 0, 3069773468, 0, 0, 0, + 1844579640, 0, 0, 0, 3689159280, 0, 0, 0, 3083351264, 0, 0, 0, 1871735233, 0, 0, 0, + 3743470466, 0, 0, 0, 3191973636, 0, 0, 0, 2088979976, 0, 0, 0, 4177959953, 0, 0, 0, + 4060952610, 0, 0, 0, 3826937925, 0, 0, 0, 1723480943, 0, 0, 0, 3446961887, 0, 0, 0, + 2598956478, 0, 0, 0, 902945660, 0, 0, 0, 1805891321, 0, 0, 0, 3611782642, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 256, 0, 0, 0, 512, 0, 0, 0, 1024, 0, 0, 0, 2048, 0, 0, + 0, 4096, 0, 0, 0, 8192, 0, 0, 0, 16384, 0, 0, 0, 32768, 0, 0, + 0, 65536, 0, 0, 0, 131072, 0, 0, 0, 262144, 0, 0, 0, 524288, 0, 0, + 0, 1048576, 0, 0, 0, 2097152, 0, 0, 0, 4194304, 0, 0, 0, 8388608, 0, 0, + 0, 16777216, 0, 0, 0, 33554432, 0, 0, 0, 67108864, 0, 0, 0, 134217729, 0, 0, + 0, 268435458, 0, 0, 0, 536870917, 0, 0, 0, 1073741834, 0, 0, 0, 2147483668, 0, 0, + 0, 40, 0, 0, 0, 80, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 18087960, 0, 0, 0, 36175920, 0, 0, 0, 72351840, 0, 0, 0, 144703680, 0, + 0, 0, 289407361, 0, 0, 0, 578814722, 0, 0, 0, 1157629444, 0, 0, 0, 2315258888, 0, + 0, 0, 335550481, 0, 0, 0, 671100962, 0, 0, 0, 1342201925, 0, 0, 0, 2684403851, 0, + 0, 0, 1073840407, 0, 0, 0, 2147680814, 0, 0, 0, 394332, 0, 0, 0, 18352288, 0, + 0, 0, 36704576, 0, 0, 0, 73409152, 0, 0, 0, 146818304, 0, 0, 0, 293636608, 0, + 0, 0, 587273216, 0, 0, 0, 1174546432, 0, 0, 0, 2349092864, 0, 0, 0, 403218432, 0, + 0, 0, 806436865, 0, 0, 0, 1612873731, 0, 0, 0, 3225747462, 0, 0, 0, 2156527628, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3612118070, + 0, 0, 0, 2929268845, 0, 0, 0, 1563570394, 0, 0, 0, 3127140788, 0, 0, 0, 1959314281, + 0, 0, 0, 3918628562, 0, 0, 0, 3542289829, 0, 0, 0, 2789612362, 0, 0, 0, 1284257428, + 0, 0, 0, 2568514856, 0, 0, 0, 842062417, 0, 0, 0, 1684124835, 0, 0, 0, 3368249671, + 0, 0, 0, 2441532046, 0, 0, 0, 588096797, 0, 0, 0, 1176193595, 0, 0, 0, 2352387190, + 0, 0, 0, 409807085, 0, 0, 0, 819614171, 0, 0, 0, 1639228342, 0, 0, 0, 3278456684, + 0, 0, 0, 2261946072, 0, 0, 0, 3672740230, 0, 0, 0, 3050513165, 0, 0, 0, 1806059035, + }, +}; + +// clang-format on + +#endif // ROCRAND_LFSR113_PRECOMPUTED_H_ diff --git a/library/include/rocrand/rocrand_log_normal.h b/library/include/rocrand/rocrand_log_normal.h index a9e5a80f..068426b3 100644 --- a/library/include/rocrand/rocrand_log_normal.h +++ b/library/include/rocrand/rocrand_log_normal.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_LOG_NORMAL_H_ #define ROCRAND_LOG_NORMAL_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - /** \rocrand_internal \addtogroup rocranddevice * * @{ @@ -64,9 +60,9 @@ * * \return Log-normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE -FQUALIFIERS -float rocrand_log_normal(rocrand_state_philox4x32_10 * state, float mean, float stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_philox4x32_10* state, float mean, float stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -82,7 +78,7 @@ float rocrand_log_normal(rocrand_state_philox4x32_10 * state, float mean, float bm_helper::save_float(state, r.y); return expf(mean + (stddev * r.x)); } -#endif // ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p float values. @@ -98,8 +94,9 @@ float rocrand_log_normal(rocrand_state_philox4x32_10 * state, float mean, float * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_log_normal2(rocrand_state_philox4x32_10 * state, float mean, float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_philox4x32_10* state, + float mean, + float stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -125,8 +122,9 @@ float2 rocrand_log_normal2(rocrand_state_philox4x32_10 * state, float mean, floa * * \return Four log-normally distributed \p float value as \p float4 */ -FQUALIFIERS -float4 rocrand_log_normal4(rocrand_state_philox4x32_10 * state, float mean, float stddev) +__forceinline__ __device__ __host__ float4 rocrand_log_normal4(rocrand_state_philox4x32_10* state, + float mean, + float stddev) { float4 r = rocrand_device::detail::normal_distribution4(rocrand4(state)); return float4 { @@ -152,9 +150,9 @@ float4 rocrand_log_normal4(rocrand_state_philox4x32_10 * state, float mean, floa * * \return Log-normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_philox4x32_10 * state, double mean, double stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_philox4x32_10* state, double mean, double stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -166,7 +164,7 @@ double rocrand_log_normal_double(rocrand_state_philox4x32_10 * state, double mea bm_helper::save_double(state, r.y); return exp(mean + r.x * stddev); } -#endif // ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p double values. @@ -182,8 +180,8 @@ double rocrand_log_normal_double(rocrand_state_philox4x32_10 * state, double mea * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS -double2 rocrand_log_normal_double2(rocrand_state_philox4x32_10 * state, double mean, double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_philox4x32_10* state, double mean, double stddev) { double2 r = rocrand_device::detail::normal_distribution_double2(rocrand4(state)); return double2 { @@ -206,8 +204,8 @@ double2 rocrand_log_normal_double2(rocrand_state_philox4x32_10 * state, double m * * \return Four log-normally distributed \p double values as \p double4 */ -FQUALIFIERS -double4 rocrand_log_normal_double4(rocrand_state_philox4x32_10 * state, double mean, double stddev) +__forceinline__ __device__ __host__ double4 + rocrand_log_normal_double4(rocrand_state_philox4x32_10* state, double mean, double stddev) { double2 r1, r2; r1 = rocrand_log_normal_double2(state, mean, stddev); @@ -232,8 +230,9 @@ double4 rocrand_log_normal_double4(rocrand_state_philox4x32_10 * state, double m * * \return Log-normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE -FQUALIFIERS float rocrand_log_normal(rocrand_state_mrg31k3p* state, float mean, float stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_mrg31k3p* state, float mean, float stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -250,7 +249,7 @@ FQUALIFIERS float rocrand_log_normal(rocrand_state_mrg31k3p* state, float mean, bm_helper::save_float(state, r.y); return expf(mean + (stddev * r.x)); } -#endif // ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p float values. @@ -266,7 +265,9 @@ FQUALIFIERS float rocrand_log_normal(rocrand_state_mrg31k3p* state, float mean, * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_mrg31k3p* state, float mean, float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_mrg31k3p* state, + float mean, + float stddev) { auto state1 = state->next(); auto state2 = state->next(); @@ -291,8 +292,8 @@ FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_mrg31k3p* state, float mean * * \return Log-normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE -FQUALIFIERS double +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double rocrand_log_normal_double(rocrand_state_mrg31k3p* state, double mean, double stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -311,7 +312,7 @@ FQUALIFIERS double bm_helper::save_double(state, r.y); return exp(mean + r.x * stddev); } -#endif // ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p double values. @@ -327,9 +328,8 @@ FQUALIFIERS double * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_mrg31k3p* state, - double mean, - double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_mrg31k3p* state, double mean, double stddev) { auto state1 = state->next(); auto state2 = state->next(); @@ -355,9 +355,9 @@ FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_mrg31k3p* state, * * \return Log-normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE -FQUALIFIERS -float rocrand_log_normal(rocrand_state_mrg32k3a * state, float mean, float stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_mrg32k3a* state, float mean, float stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -374,7 +374,7 @@ float rocrand_log_normal(rocrand_state_mrg32k3a * state, float mean, float stdde bm_helper::save_float(state, r.y); return expf(mean + (stddev * r.x)); } -#endif // ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p float values. @@ -390,8 +390,9 @@ float rocrand_log_normal(rocrand_state_mrg32k3a * state, float mean, float stdde * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_log_normal2(rocrand_state_mrg32k3a * state, float mean, float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_mrg32k3a* state, + float mean, + float stddev) { auto state1 = state->next(); auto state2 = state->next(); @@ -419,9 +420,9 @@ float2 rocrand_log_normal2(rocrand_state_mrg32k3a * state, float mean, float std * * \return Log-normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_mrg32k3a * state, double mean, double stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_mrg32k3a* state, double mean, double stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -439,7 +440,7 @@ double rocrand_log_normal_double(rocrand_state_mrg32k3a * state, double mean, do bm_helper::save_double(state, r.y); return exp(mean + r.x * stddev); } -#endif // ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p double values. @@ -455,8 +456,8 @@ double rocrand_log_normal_double(rocrand_state_mrg32k3a * state, double mean, do * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS -double2 rocrand_log_normal_double2(rocrand_state_mrg32k3a * state, double mean, double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_mrg32k3a* state, double mean, double stddev) { auto state1 = state->next(); auto state2 = state->next(); @@ -485,9 +486,9 @@ double2 rocrand_log_normal_double2(rocrand_state_mrg32k3a * state, double mean, * * \return Log-normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE -FQUALIFIERS -float rocrand_log_normal(rocrand_state_xorwow * state, float mean, float stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_xorwow* state, float mean, float stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -503,7 +504,7 @@ float rocrand_log_normal(rocrand_state_xorwow * state, float mean, float stddev) bm_helper::save_float(state, r.y); return expf(mean + (stddev * r.x)); } -#endif // ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p float values. @@ -519,8 +520,9 @@ float rocrand_log_normal(rocrand_state_xorwow * state, float mean, float stddev) * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_log_normal2(rocrand_state_xorwow * state, float mean, float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_xorwow* state, + float mean, + float stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -547,9 +549,9 @@ float2 rocrand_log_normal2(rocrand_state_xorwow * state, float mean, float stdde * * \return Log-normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_xorwow * state, double mean, double stddev) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_xorwow* state, double mean, double stddev) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -569,7 +571,7 @@ double rocrand_log_normal_double(rocrand_state_xorwow * state, double mean, doub bm_helper::save_double(state, r.y); return exp(mean + (stddev * r.x)); } -#endif // ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two log-normally distributed \p double values. @@ -585,8 +587,9 @@ double rocrand_log_normal_double(rocrand_state_xorwow * state, double mean, doub * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS -double2 rocrand_log_normal_double2(rocrand_state_xorwow * state, double mean, double stddev) +__forceinline__ __device__ __host__ double2 rocrand_log_normal_double2(rocrand_state_xorwow* state, + double mean, + double stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -614,13 +617,38 @@ double2 rocrand_log_normal_double2(rocrand_state_xorwow * state, double mean, do * * \return Log-normally distributed \p float value */ -FQUALIFIERS -float rocrand_log_normal(rocrand_state_mtgp32 * state, float mean, float stddev) +__forceinline__ __device__ float + rocrand_log_normal(rocrand_state_mtgp32* state, float mean, float stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); return expf(mean + (stddev * r)); } +/** + * \brief Returns two log-normally distributed \p float values. + * + * Generates and returns two log-normally distributed \p float values using MTGP32 + * generator in \p state, and increments position of the generator by two. + * The function uses the Box-Muller transform method to generate two normally distributed + * values, transforms them to log-normally distributed values, and returns both. + * + * \param state - Pointer to a state to use + * \param mean - Mean of the related log-normal distribution + * \param stddev - Standard deviation of the related log-normal distribution + * + * \return Two log-normally distributed \p float value as \p float2 + */ +__forceinline__ __device__ float2 rocrand_log_normal2(rocrand_state_mtgp32* state, + float mean, + float stddev) +{ + auto state1 = rocrand(state); + auto state2 = rocrand(state); + + float2 r = rocrand_device::detail::normal_distribution2(state1, state2); + return float2{expf(mean + (stddev * r.x)), expf(mean + (stddev * r.y))}; +} + /** * \brief Returns a log-normally distributed \p double value. * @@ -633,13 +661,41 @@ float rocrand_log_normal(rocrand_state_mtgp32 * state, float mean, float stddev) * * \return Log-normally distributed \p double value */ -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_mtgp32 * state, double mean, double stddev) +__forceinline__ __device__ double + rocrand_log_normal_double(rocrand_state_mtgp32* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); return exp(mean + (stddev * r)); } +/** + * \brief Returns two log-normally distributed \p double values. + * + * Generates and returns two log-normally distributed \p double values using MTGP32 + * generator in \p state, and increments position of the generator by four. + * The function uses the Box-Muller transform method to generate two normally distributed + * values, transforms them to log-normally distributed values, and returns both. + * + * \param state - Pointer to a state to use + * \param mean - Mean of the related log-normal distribution + * \param stddev - Standard deviation of the related log-normal distribution + * + * \return Two log-normally distributed \p double values as \p double2 + */ +__forceinline__ __device__ double2 rocrand_log_normal_double2(rocrand_state_mtgp32* state, + double mean, + double stddev) +{ + auto state1 = rocrand(state); + auto state2 = rocrand(state); + auto state3 = rocrand(state); + auto state4 = rocrand(state); + + double2 r = rocrand_device::detail::normal_distribution_double2( + uint4{state1, state2, state3, state4}); + return double2{exp(mean + (stddev * r.x)), exp(mean + (stddev * r.y))}; +} + /** * \brief Returns a log-normally distributed \p float value. * @@ -652,8 +708,8 @@ double rocrand_log_normal_double(rocrand_state_mtgp32 * state, double mean, doub * * \return Log-normally distributed \p float value */ -FQUALIFIERS -float rocrand_log_normal(rocrand_state_sobol32 * state, float mean, float stddev) +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_sobol32* state, float mean, float stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); return expf(mean + (stddev * r)); @@ -671,8 +727,8 @@ float rocrand_log_normal(rocrand_state_sobol32 * state, float mean, float stddev * * \return Log-normally distributed \p double value */ -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_sobol32 * state, double mean, double stddev) +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_sobol32* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); return exp(mean + (stddev * r)); @@ -690,8 +746,8 @@ double rocrand_log_normal_double(rocrand_state_sobol32 * state, double mean, dou * * \return Log-normally distributed \p float value */ -FQUALIFIERS -float rocrand_log_normal(rocrand_state_scrambled_sobol32* state, float mean, float stddev) +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_scrambled_sobol32* state, float mean, float stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); return expf(mean + (stddev * r)); @@ -709,8 +765,8 @@ float rocrand_log_normal(rocrand_state_scrambled_sobol32* state, float mean, flo * * \return Log-normally distributed \p double value */ -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_scrambled_sobol32* state, double mean, double stddev) +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_scrambled_sobol32* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); return exp(mean + (stddev * r)); @@ -728,8 +784,8 @@ double rocrand_log_normal_double(rocrand_state_scrambled_sobol32* state, double * * \return Log-normally distributed \p float value */ -FQUALIFIERS -float rocrand_log_normal(rocrand_state_sobol64* state, float mean, float stddev) +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_sobol64* state, float mean, float stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); return expf(mean + (stddev * r)); @@ -747,8 +803,8 @@ float rocrand_log_normal(rocrand_state_sobol64* state, float mean, float stddev) * * \return Log-normally distributed \p double value */ -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_sobol64 * state, double mean, double stddev) +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_sobol64* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); return exp(mean + (stddev * r)); @@ -766,8 +822,8 @@ double rocrand_log_normal_double(rocrand_state_sobol64 * state, double mean, dou * * \return Log-normally distributed \p float value */ -FQUALIFIERS -float rocrand_log_normal(rocrand_state_scrambled_sobol64* state, float mean, float stddev) +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_scrambled_sobol64* state, float mean, float stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); return expf(mean + (stddev * r)); @@ -785,8 +841,8 @@ float rocrand_log_normal(rocrand_state_scrambled_sobol64* state, float mean, flo * * \return Log-normally distributed \p double value */ -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_scrambled_sobol64* state, double mean, double stddev) +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_scrambled_sobol64* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); return exp(mean + (stddev * r)); @@ -804,8 +860,8 @@ double rocrand_log_normal_double(rocrand_state_scrambled_sobol64* state, double * * \return Log-normally distributed \p float value */ -FQUALIFIERS -float rocrand_log_normal(rocrand_state_lfsr113* state, float mean, float stddev) +__forceinline__ __device__ __host__ float + rocrand_log_normal(rocrand_state_lfsr113* state, float mean, float stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); return expf(mean + (stddev * r)); @@ -825,8 +881,9 @@ float rocrand_log_normal(rocrand_state_lfsr113* state, float mean, float stddev) * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_log_normal2(rocrand_state_lfsr113* state, float mean, float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_lfsr113* state, + float mean, + float stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -847,8 +904,8 @@ float2 rocrand_log_normal2(rocrand_state_lfsr113* state, float mean, float stdde * * \return Log-normally distributed \p double value */ -FQUALIFIERS -double rocrand_log_normal_double(rocrand_state_lfsr113* state, double mean, double stddev) +__forceinline__ __device__ __host__ double + rocrand_log_normal_double(rocrand_state_lfsr113* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); return exp(mean + (stddev * r)); @@ -868,8 +925,9 @@ double rocrand_log_normal_double(rocrand_state_lfsr113* state, double mean, doub * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS -double2 rocrand_log_normal_double2(rocrand_state_lfsr113* state, double mean, double stddev) +__forceinline__ __device__ __host__ double2 rocrand_log_normal_double2(rocrand_state_lfsr113* state, + double mean, + double stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -893,7 +951,7 @@ double2 rocrand_log_normal_double2(rocrand_state_lfsr113* state, double mean, do * * \return Log-normally distributed \p float value */ -FQUALIFIERS float +__forceinline__ __device__ __host__ float rocrand_log_normal(rocrand_state_threefry2x32_20* state, double mean, double stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); @@ -914,9 +972,9 @@ FQUALIFIERS float * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry2x32_20* state, - float mean, - float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_threefry2x32_20* state, + float mean, + float stddev) { float2 r = rocrand_device::detail::normal_distribution2(rocrand2(state)); return float2{expf(mean + (stddev * r.x)), expf(mean + (stddev * r.y))}; @@ -934,7 +992,7 @@ FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry2x32_20* state, * * \return Log-normally distributed \p double value */ -FQUALIFIERS double +__forceinline__ __device__ __host__ double rocrand_log_normal_double(rocrand_state_threefry2x32_20* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); @@ -955,9 +1013,8 @@ FQUALIFIERS double * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry2x32_20* state, - double mean, - double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_threefry2x32_20* state, double mean, double stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -981,7 +1038,7 @@ FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry2x32_20* st * * \return Log-normally distributed \p float value */ -FQUALIFIERS float +__forceinline__ __device__ __host__ float rocrand_log_normal(rocrand_state_threefry2x64_20* state, double mean, double stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); @@ -1002,9 +1059,9 @@ FQUALIFIERS float * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry2x64_20* state, - float mean, - float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_threefry2x64_20* state, + float mean, + float stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1025,7 +1082,7 @@ FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry2x64_20* state, * * \return Log-normally distributed \p double value */ -FQUALIFIERS double +__forceinline__ __device__ __host__ double rocrand_log_normal_double(rocrand_state_threefry2x64_20* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); @@ -1046,9 +1103,8 @@ FQUALIFIERS double * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry2x64_20* state, - double mean, - double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_threefry2x64_20* state, double mean, double stddev) { double2 r = rocrand_device::detail::normal_distribution_double2(rocrand2(state)); return double2{exp(mean + (stddev * r.x)), exp(mean + (stddev * r.y))}; @@ -1066,7 +1122,7 @@ FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry2x64_20* st * * \return Log-normally distributed \p float value */ -FQUALIFIERS float +__forceinline__ __device__ __host__ float rocrand_log_normal(rocrand_state_threefry4x32_20* state, double mean, double stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); @@ -1087,9 +1143,9 @@ FQUALIFIERS float * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry4x32_20* state, - float mean, - float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_threefry4x32_20* state, + float mean, + float stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1110,7 +1166,7 @@ FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry4x32_20* state, * * \return Log-normally distributed \p double value */ -FQUALIFIERS double +__forceinline__ __device__ __host__ double rocrand_log_normal_double(rocrand_state_threefry4x32_20* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); @@ -1131,9 +1187,8 @@ FQUALIFIERS double * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry4x32_20* state, - double mean, - double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_threefry4x32_20* state, double mean, double stddev) { double2 r = rocrand_device::detail::normal_distribution_double2(rocrand4(state)); return double2{exp(mean + (stddev * r.x)), exp(mean + (stddev * r.y))}; @@ -1151,7 +1206,7 @@ FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry4x32_20* st * * \return Log-normally distributed \p float value */ -FQUALIFIERS float +__forceinline__ __device__ __host__ float rocrand_log_normal(rocrand_state_threefry4x64_20* state, double mean, double stddev) { float r = rocrand_device::detail::normal_distribution(rocrand(state)); @@ -1172,9 +1227,9 @@ FQUALIFIERS float * * \return Two log-normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry4x64_20* state, - float mean, - float stddev) +__forceinline__ __device__ __host__ float2 rocrand_log_normal2(rocrand_state_threefry4x64_20* state, + float mean, + float stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1195,7 +1250,7 @@ FQUALIFIERS float2 rocrand_log_normal2(rocrand_state_threefry4x64_20* state, * * \return Log-normally distributed \p double value */ -FQUALIFIERS double +__forceinline__ __device__ __host__ double rocrand_log_normal_double(rocrand_state_threefry4x64_20* state, double mean, double stddev) { double r = rocrand_device::detail::normal_distribution_double(rocrand(state)); @@ -1216,9 +1271,8 @@ FQUALIFIERS double * * \return Two log-normally distributed \p double values as \p double2 */ -FQUALIFIERS double2 rocrand_log_normal_double2(rocrand_state_threefry4x64_20* state, - double mean, - double stddev) +__forceinline__ __device__ __host__ double2 + rocrand_log_normal_double2(rocrand_state_threefry4x64_20* state, double mean, double stddev) { auto state1 = rocrand(state); auto state2 = rocrand(state); diff --git a/library/include/rocrand/rocrand_mrg31k3p.h b/library/include/rocrand/rocrand_mrg31k3p.h index c7292722..38fe73db 100644 --- a/library/include/rocrand/rocrand_mrg31k3p.h +++ b/library/include/rocrand/rocrand_mrg31k3p.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_MRG31K3P_H_ #define ROCRAND_MRG31K3P_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" #include "rocrand/rocrand_mrg31k3p_precomputed.h" @@ -59,7 +55,7 @@ class mrg31k3p_engine unsigned int x1[3]; unsigned int x2[3]; -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE // The Box–Muller transform requires two inputs to convert uniformly // distributed real values [0; 1] to normally distributed real values // (with mean = 0, and stddev = 1). Often user wants only one @@ -72,7 +68,7 @@ class mrg31k3p_engine #endif }; - FQUALIFIERS mrg31k3p_engine() + __forceinline__ __device__ __host__ mrg31k3p_engine() { this->seed(ROCRAND_MRG31K3P_DEFAULT_SEED, 0, 0); } @@ -85,9 +81,9 @@ class mrg31k3p_engine /// zero, value \p ROCRAND_MRG31K3P_DEFAULT_SEED is used instead. /// /// A subsequence is 2^72 numbers long. - FQUALIFIERS mrg31k3p_engine(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ mrg31k3p_engine(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset) { this->seed(seed, subsequence, offset); } @@ -100,9 +96,9 @@ class mrg31k3p_engine /// zero, value \p ROCRAND_MRG31K3P_DEFAULT_SEED is used instead. /// /// A subsequence is 2^72 numbers long. - FQUALIFIERS void seed(unsigned long long seed_value, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ void seed(unsigned long long seed_value, + const unsigned long long subsequence, + const unsigned long long offset) { if(seed_value == 0) { @@ -120,28 +116,29 @@ class mrg31k3p_engine } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS void discard(unsigned long long offset) + __forceinline__ __device__ __host__ void discard(unsigned long long offset) { this->discard_impl(offset); } /// Advances the internal state to skip \p subsequence subsequences. /// A subsequence is 2^72 numbers long. - FQUALIFIERS void discard_subsequence(unsigned long long subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence) { this->discard_subsequence_impl(subsequence); } /// Advances the internal state to skip \p sequence sequences. /// A sequence is 2^134 numbers long. - FQUALIFIERS void discard_sequence(unsigned long long sequence) + __forceinline__ __device__ __host__ void discard_sequence(unsigned long long sequence) { this->discard_sequence_impl(sequence); } - FQUALIFIERS void restart(const unsigned long long subsequence, const unsigned long long offset) + __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence, + const unsigned long long offset) { -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE m_state.boxmuller_float_state = 0; m_state.boxmuller_double_state = 0; #endif @@ -149,13 +146,13 @@ class mrg31k3p_engine this->discard_impl(offset); } - FQUALIFIERS unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return this->next(); } // Returned value is in range [1, ROCRAND_MRG31K3P_M1]. - FQUALIFIERS unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { // First component unsigned int tmp @@ -188,13 +185,14 @@ class mrg31k3p_engine protected: // Advances the internal state to skip \p offset numbers. - FQUALIFIERS void discard_impl(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset) { discard_state(offset); } // Advances the internal state to skip \p subsequence subsequences. - FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence) + __forceinline__ __device__ __host__ void + discard_subsequence_impl(unsigned long long subsequence) { int i = 0; @@ -216,7 +214,7 @@ class mrg31k3p_engine } // Advances the internal state to skip \p sequences. - FQUALIFIERS void discard_sequence_impl(unsigned long long sequence) + __forceinline__ __device__ __host__ void discard_sequence_impl(unsigned long long sequence) { int i = 0; @@ -238,7 +236,7 @@ class mrg31k3p_engine } // Advances the internal state to skip \p offset numbers. - FQUALIFIERS void discard_state(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_state(unsigned long long offset) { int i = 0; @@ -260,13 +258,14 @@ class mrg31k3p_engine } // Advances the internal state to the next state. - FQUALIFIERS void discard_state() + __forceinline__ __device__ __host__ void discard_state() { discard_state(1); } private: - FQUALIFIERS static void mod_mat_vec_m1(const unsigned int* A, unsigned int* s) + __forceinline__ __device__ __host__ static void mod_mat_vec_m1(const unsigned int* A, + unsigned int* s) { unsigned long long x[3] = {s[0], s[1], s[2]}; @@ -277,7 +276,8 @@ class mrg31k3p_engine s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2])); } - FQUALIFIERS static void mod_mat_vec_m2(const unsigned int* A, unsigned int* s) + __forceinline__ __device__ __host__ static void mod_mat_vec_m2(const unsigned int* A, + unsigned int* s) { unsigned long long x[3] = {s[0], s[1], s[2]}; @@ -288,22 +288,24 @@ class mrg31k3p_engine s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2])); } - FQUALIFIERS static unsigned long long mod_mul_m1(unsigned int i, unsigned long long j) + __forceinline__ __device__ __host__ static unsigned long long mod_mul_m1(unsigned int i, + unsigned long long j) { return mod_m1(i * j); } - FQUALIFIERS static unsigned long long mod_m1(unsigned long long p) + __forceinline__ __device__ __host__ static unsigned long long mod_m1(unsigned long long p) { return p % ROCRAND_MRG31K3P_M1; } - FQUALIFIERS static unsigned long long mod_mul_m2(unsigned int i, unsigned long long j) + __forceinline__ __device__ __host__ static unsigned long long mod_mul_m2(unsigned int i, + unsigned long long j) { return mod_m2(i * j); } - FQUALIFIERS static unsigned long long mod_m2(unsigned long long p) + __forceinline__ __device__ __host__ static unsigned long long mod_m2(unsigned long long p) { return p % ROCRAND_MRG31K3P_M2; } @@ -312,7 +314,7 @@ class mrg31k3p_engine // State mrg31k3p_state m_state; -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE friend struct detail::engine_boxmuller_helper; #endif }; // mrg31k3p_engine class @@ -339,10 +341,10 @@ typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p; * \param offset - Absolute offset into subsequence * \param state - Pointer to state to initialize */ -FQUALIFIERS void rocrand_init(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset, - rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset, + rocrand_state_mrg31k3p* state) { *state = rocrand_state_mrg31k3p(seed, subsequence, offset); } @@ -359,7 +361,7 @@ FQUALIFIERS void rocrand_init(const unsigned long long seed, * * \return Pseudorandom value (32-bit) as an unsigned int */ -FQUALIFIERS unsigned int rocrand(rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p* state) { // next() in [1, ROCRAND_MRG31K3P_M1] return static_cast((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM); @@ -373,7 +375,8 @@ FQUALIFIERS unsigned int rocrand(rocrand_state_mrg31k3p* state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_mrg31k3p* state) { return state->discard(offset); } @@ -387,8 +390,8 @@ FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_mrg31k3p* st * \param subsequence - Number of subsequences to skip * \param state - Pointer to state to update */ -FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, - rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, + rocrand_state_mrg31k3p* state) { return state->discard_subsequence(subsequence); } @@ -402,7 +405,8 @@ FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, * \param sequence - Number of sequences to skip * \param state - Pointer to state to update */ -FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, + rocrand_state_mrg31k3p* state) { return state->discard_sequence(sequence); } diff --git a/library/include/rocrand/rocrand_mrg32k3a.h b/library/include/rocrand/rocrand_mrg32k3a.h index 3a6cfdbc..790584e6 100644 --- a/library/include/rocrand/rocrand_mrg32k3a.h +++ b/library/include/rocrand/rocrand_mrg32k3a.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_MRG32K3A_H_ #define ROCRAND_MRG32K3A_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" #include "rocrand/rocrand_mrg32k3a_precomputed.h" @@ -63,7 +59,7 @@ class mrg32k3a_engine unsigned int g1[3]; unsigned int g2[3]; - #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE // The Box–Muller transform requires two inputs to convert uniformly // distributed real values [0; 1] to normally distributed real values // (with mean = 0, and stddev = 1). Often user wants only one @@ -73,11 +69,10 @@ class mrg32k3a_engine unsigned int boxmuller_double_state; // is there a double in boxmuller_double float boxmuller_float; // normally distributed float double boxmuller_double; // normally distributed double - #endif + #endif }; - FQUALIFIERS - mrg32k3a_engine() + __forceinline__ __device__ __host__ mrg32k3a_engine() { this->seed(ROCRAND_MRG32K3A_DEFAULT_SEED, 0, 0); } @@ -90,10 +85,9 @@ class mrg32k3a_engine /// zero, value \p ROCRAND_MRG32K3A_DEFAULT_SEED is used instead. /// /// A subsequence is 2^76 numbers long. - FQUALIFIERS - mrg32k3a_engine(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ mrg32k3a_engine(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset) { this->seed(seed, subsequence, offset); } @@ -106,10 +100,9 @@ class mrg32k3a_engine /// zero, value \p ROCRAND_MRG32K3A_DEFAULT_SEED is used instead. /// /// A subsequence is 2^76 numbers long. - FQUALIFIERS - void seed(unsigned long long seed_value, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ void seed(unsigned long long seed_value, + const unsigned long long subsequence, + const unsigned long long offset) { if(seed_value == 0) { @@ -127,50 +120,44 @@ class mrg32k3a_engine } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned long long offset) + __forceinline__ __device__ __host__ void discard(unsigned long long offset) { this->discard_impl(offset); } /// Advances the internal state to skip \p subsequence subsequences. /// A subsequence is 2^76 numbers long. - FQUALIFIERS - void discard_subsequence(unsigned long long subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence) { this->discard_subsequence_impl(subsequence); } /// Advances the internal state to skip \p sequence sequences. /// A sequence is 2^127 numbers long. - FQUALIFIERS - void discard_sequence(unsigned long long sequence) + __forceinline__ __device__ __host__ void discard_sequence(unsigned long long sequence) { this->discard_sequence_impl(sequence); } - FQUALIFIERS - void restart(const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence, + const unsigned long long offset) { - #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE m_state.boxmuller_float_state = 0; m_state.boxmuller_double_state = 0; - #endif + #endif this->discard_subsequence_impl(subsequence); this->discard_impl(offset); } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return this->next(); } // Returned value is in range [1, ROCRAND_MRG32K3A_M1], // where ROCRAND_MRG32K3A_M1 < UINT_MAX - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { const unsigned int p1 = mod_m1( detail::mad_u64_u32( @@ -208,15 +195,14 @@ class mrg32k3a_engine protected: // Advances the internal state to skip \p offset numbers. // DOES NOT CALCULATE NEW ULONGLONG - FQUALIFIERS - void discard_impl(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset) { discard_state(offset); } // DOES NOT CALCULATE NEW ULONGLONG - FQUALIFIERS - void discard_subsequence_impl(unsigned long long subsequence) + __forceinline__ __device__ __host__ void + discard_subsequence_impl(unsigned long long subsequence) { int i = 0; @@ -236,8 +222,7 @@ class mrg32k3a_engine } // DOES NOT CALCULATE NEW ULONGLONG - FQUALIFIERS - void discard_sequence_impl(unsigned long long sequence) + __forceinline__ __device__ __host__ void discard_sequence_impl(unsigned long long sequence) { int i = 0; @@ -258,8 +243,7 @@ class mrg32k3a_engine // Advances the internal state by offset times. // DOES NOT CALCULATE NEW ULONGLONG - FQUALIFIERS - void discard_state(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_state(unsigned long long offset) { int i = 0; @@ -280,16 +264,14 @@ class mrg32k3a_engine // Advances the internal state to the next state // DOES NOT CALCULATE NEW ULONGLONG - FQUALIFIERS - void discard_state() + __forceinline__ __device__ __host__ void discard_state() { discard_state(1); } private: - FQUALIFIERS - static void mod_mat_vec_m1(const unsigned long long * A, - unsigned int * s) + __forceinline__ __device__ __host__ static void mod_mat_vec_m1(const unsigned long long* A, + unsigned int* s) { unsigned long long x[3]; @@ -310,9 +292,8 @@ class mrg32k3a_engine s[2] = x[2]; } - FQUALIFIERS - static void mod_mat_vec_m2(const unsigned long long * A, - unsigned int * s) + __forceinline__ __device__ __host__ static void mod_mat_vec_m2(const unsigned long long* A, + unsigned int* s) { unsigned long long x[3]; @@ -333,9 +314,8 @@ class mrg32k3a_engine s[2] = x[2]; } - FQUALIFIERS - static unsigned long long mod_mul_m1(unsigned int i, - unsigned long long j) + __forceinline__ __device__ __host__ static unsigned long long mod_mul_m1(unsigned int i, + unsigned long long j) { long long hi, lo, temp1, temp2; @@ -350,8 +330,7 @@ class mrg32k3a_engine return lo; } - FQUALIFIERS - static unsigned long long mod_m1(unsigned long long p) + __forceinline__ __device__ __host__ static unsigned long long mod_m1(unsigned long long p) { p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M1C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1)); if (p >= ROCRAND_MRG32K3A_M1) @@ -360,9 +339,8 @@ class mrg32k3a_engine return p; } - FQUALIFIERS - static unsigned long long mod_mul_m2(unsigned int i, - unsigned long long j) + __forceinline__ __device__ __host__ static unsigned long long mod_mul_m2(unsigned int i, + unsigned long long j) { long long hi, lo, temp1, temp2; @@ -377,8 +355,7 @@ class mrg32k3a_engine return lo; } - FQUALIFIERS - static unsigned long long mod_m2(unsigned long long p) + __forceinline__ __device__ __host__ static unsigned long long mod_m2(unsigned long long p) { p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M2C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1)); p = detail::mad_u64_u32(ROCRAND_MRG32K3A_M2C, (p >> 32), p & (ROCRAND_MRG32K3A_POW32 - 1)); @@ -392,7 +369,7 @@ class mrg32k3a_engine // State mrg32k3a_state m_state; - #ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE friend struct detail::engine_boxmuller_helper; #endif @@ -420,11 +397,10 @@ typedef rocrand_device::mrg32k3a_engine rocrand_state_mrg32k3a; * \param offset - Absolute offset into subsequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset, - rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset, + rocrand_state_mrg32k3a* state) { *state = rocrand_state_mrg32k3a(seed, subsequence, offset); } @@ -441,8 +417,7 @@ void rocrand_init(const unsigned long long seed, * * \return Pseudorandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg32k3a* state) { // next() in [1, ROCRAND_MRG32K3A_M1] return static_cast((state->next() - 1) * ROCRAND_MRG32K3A_UINT_NORM); @@ -456,8 +431,8 @@ unsigned int rocrand(rocrand_state_mrg32k3a * state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long offset, rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_mrg32k3a* state) { return state->discard(offset); } @@ -471,8 +446,8 @@ void skipahead(unsigned long long offset, rocrand_state_mrg32k3a * state) * \param subsequence - Number of subsequences to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, + rocrand_state_mrg32k3a* state) { return state->discard_subsequence(subsequence); } @@ -486,8 +461,8 @@ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg32k3 * \param sequence - Number of sequences to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, + rocrand_state_mrg32k3a* state) { return state->discard_sequence(sequence); } diff --git a/library/include/rocrand/rocrand_mtgp32.h b/library/include/rocrand/rocrand_mtgp32.h index f10513d8..daf72889 100644 --- a/library/include/rocrand/rocrand_mtgp32.h +++ b/library/include/rocrand/rocrand_mtgp32.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -57,10 +57,6 @@ #include -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand.h" #include "rocrand/rocrand_common.h" @@ -152,17 +148,16 @@ void rocrand_mtgp32_init_state(unsigned int array[], class mtgp32_engine { public: - FQUALIFIERS - // Initialization is not supported for __shared__ variables - mtgp32_engine() // cppcheck-suppress uninitMemberVar + __forceinline__ __device__ __host__ + // Initialization is not supported for __shared__ variables + mtgp32_engine() // cppcheck-suppress uninitMemberVar { } - FQUALIFIERS - mtgp32_engine(const mtgp32_state &m_state, - const mtgp32_params * params, - int bid) + __forceinline__ __device__ __host__ mtgp32_engine(const mtgp32_state& m_state, + const mtgp32_params* params, + int bid) { this->m_state = m_state; pos_tbl = params->pos_tbl[bid]; @@ -176,8 +171,7 @@ class mtgp32_engine } } - FQUALIFIERS - void copy(const mtgp32_engine * m_engine) + __forceinline__ __device__ __host__ void copy(const mtgp32_engine* m_engine) { #if defined(__HIP_DEVICE_COMPILE__) const unsigned int thread_id = threadIdx.x; @@ -214,8 +208,7 @@ class mtgp32_engine #endif } - FQUALIFIERS - void set_params(mtgp32_params * params) + __forceinline__ __device__ __host__ void set_params(mtgp32_params* params) { pos_tbl = params->pos_tbl[m_state.id]; sh1_tbl = params->sh1_tbl[m_state.id]; @@ -228,31 +221,20 @@ class mtgp32_engine } } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return this->next(); } - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { -#if defined(__HIP_DEVICE_COMPILE__) - unsigned int t = threadIdx.x; - unsigned int d = blockDim.x; - int pos = pos_tbl; - unsigned int r; - unsigned int o; - - r = para_rec(m_state.status[(t + m_state.offset) & MTGP_MASK], - m_state.status[(t + m_state.offset + 1) & MTGP_MASK], - m_state.status[(t + m_state.offset + pos) & MTGP_MASK]); - m_state.status[(t + m_state.offset + MTGP_N) & MTGP_MASK] = r; - - o = temper(r, m_state.status[(t + m_state.offset + pos - 1) & MTGP_MASK]); +#ifdef __HIP_DEVICE_COMPILE__ + unsigned int o = next_thread(threadIdx.x); __syncthreads(); - if (t == 0) - m_state.offset = (m_state.offset + d) & MTGP_MASK; + if(threadIdx.x == 0) + { + m_state.offset = (m_state.offset + blockDim.x) & MTGP_MASK; + } __syncthreads(); return o; #else @@ -260,8 +242,7 @@ class mtgp32_engine #endif } - FQUALIFIERS - unsigned int next_single() + __forceinline__ __device__ __host__ unsigned int next_single() { #if defined(__HIP_DEVICE_COMPILE__) unsigned int t = threadIdx.x; @@ -287,8 +268,8 @@ class mtgp32_engine } private: - FQUALIFIERS - unsigned int para_rec(unsigned int X1, unsigned int X2, unsigned int Y) const + __forceinline__ __device__ __host__ unsigned int + para_rec(unsigned int X1, unsigned int X2, unsigned int Y) const { unsigned int X = (X1 & mask) ^ X2; unsigned int MAT; @@ -299,8 +280,7 @@ class mtgp32_engine return Y ^ MAT; } - FQUALIFIERS - unsigned int temper(unsigned int V, unsigned int T) const + __forceinline__ __device__ __host__ unsigned int temper(unsigned int V, unsigned int T) const { unsigned int MAT; @@ -310,8 +290,8 @@ class mtgp32_engine return V ^ MAT; } - FQUALIFIERS - unsigned int temper_single(unsigned int V, unsigned int T) const + __forceinline__ __device__ __host__ unsigned int temper_single(unsigned int V, + unsigned int T) const { unsigned int MAT; unsigned int r; @@ -323,6 +303,19 @@ class mtgp32_engine return r; } +protected: + /// \brief Generate the next value for thread `thread_idx` and modify state in the process, + /// do not update offset. + __forceinline__ __device__ __host__ unsigned int next_thread(unsigned int thread_idx) + { + const unsigned int r + = para_rec(m_state.status[(thread_idx + m_state.offset) & MTGP_MASK], + m_state.status[(thread_idx + m_state.offset + 1) & MTGP_MASK], + m_state.status[(thread_idx + m_state.offset + pos_tbl) & MTGP_MASK]); + m_state.status[(thread_idx + m_state.offset + MTGP_N) & MTGP_MASK] = r; + return temper(r, m_state.status[(thread_idx + m_state.offset + pos_tbl - 1) & MTGP_MASK]); + } + public: // State mtgp32_state m_state; @@ -355,9 +348,9 @@ typedef rocrand_device::mtgp32_params mtgp32_params; * \brief Initializes MTGP32 states * * Initializes MTGP32 states on the host-side by allocating a state array in host - * memory, initializes that array, and copies the result to device memory. + * memory, initializes that array, and copies the result to device or host memory. * - * \param d_state - Pointer to an array of states in device memory + * \param state - Pointer to an array of states in device or host memory * \param params - Pointer to an array of type mtgp32_fast_params in host memory * \param n - Number of states to initialize * \param seed - Seed value @@ -366,11 +359,10 @@ typedef rocrand_device::mtgp32_params mtgp32_params; * - ROCRAND_STATUS_ALLOCATION_FAILED if states could not be initialized * - ROCRAND_STATUS_SUCCESS if states are initialized */ -__host__ inline -rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 * d_state, - mtgp32_fast_params params[], - int n, - unsigned long long seed) +__host__ inline rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32* state, + mtgp32_fast_params params[], + int n, + unsigned long long seed) { int i; rocrand_state_mtgp32 * h_state = (rocrand_state_mtgp32 *) malloc(sizeof(rocrand_state_mtgp32) * n); @@ -395,7 +387,7 @@ rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 * d_state, } const hipError_t error - = hipMemcpy(d_state, h_state, sizeof(rocrand_state_mtgp32) * n, hipMemcpyHostToDevice); + = hipMemcpy(state, h_state, sizeof(rocrand_state_mtgp32) * n, hipMemcpyDefault); free(h_state); if(error != hipSuccess) @@ -500,8 +492,7 @@ rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_p * * \return Pseudorandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_mtgp32 * state) +__forceinline__ __device__ unsigned int rocrand(rocrand_state_mtgp32* state) { return state->next(); } @@ -537,8 +528,8 @@ unsigned int rocrand(rocrand_state_mtgp32 * state) * \param dest - Pointer to a state to copy to * */ -FQUALIFIERS -void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 * src, rocrand_state_mtgp32 * dest) +__forceinline__ __device__ void rocrand_mtgp32_block_copy(rocrand_state_mtgp32* src, + rocrand_state_mtgp32* dest) { dest->copy(src); } @@ -549,8 +540,8 @@ void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 * src, rocrand_state_mtgp32 * \param state - Pointer to a MTGP32 state * \param params - Pointer to new parameters */ -FQUALIFIERS -void rocrand_mtgp32_set_params(rocrand_state_mtgp32 * state, mtgp32_params * params) +__forceinline__ __device__ void rocrand_mtgp32_set_params(rocrand_state_mtgp32* state, + mtgp32_params* params) { state->set_params(params); } diff --git a/library/include/rocrand/rocrand_normal.h b/library/include/rocrand/rocrand_normal.h index d43a7043..e6872060 100644 --- a/library/include/rocrand/rocrand_normal.h +++ b/library/include/rocrand/rocrand_normal.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_NORMAL_H_ #define ROCRAND_NORMAL_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - /** \rocrand_internal \addtogroup rocranddevice * * @{ @@ -52,8 +48,7 @@ namespace rocrand_device { namespace detail { -FQUALIFIERS -float2 box_muller(unsigned int x, unsigned int y) +__forceinline__ __device__ __host__ float2 box_muller(unsigned int x, unsigned int y) { float2 result; float u = ROCRAND_2POW32_INV + (x * ROCRAND_2POW32_INV); @@ -70,7 +65,7 @@ float2 box_muller(unsigned int x, unsigned int y) return result; } -FQUALIFIERS float2 box_muller(unsigned long long v) +__forceinline__ __device__ __host__ float2 box_muller(unsigned long long v) { unsigned int x = static_cast(v); unsigned int y = static_cast(v >> 32); @@ -78,8 +73,7 @@ FQUALIFIERS float2 box_muller(unsigned long long v) return box_muller(x, y); } -FQUALIFIERS -double2 box_muller_double(uint4 v) +__forceinline__ __device__ __host__ double2 box_muller_double(uint4 v) { double2 result; unsigned long long int v1 = (unsigned long long int)v.x ^ @@ -101,7 +95,7 @@ double2 box_muller_double(uint4 v) return result; } -FQUALIFIERS double2 box_muller_double(ulonglong2 v) +__forceinline__ __device__ __host__ double2 box_muller_double(ulonglong2 v) { unsigned int x = static_cast(v.x); unsigned int y = static_cast(v.x >> 32); @@ -111,8 +105,7 @@ FQUALIFIERS double2 box_muller_double(ulonglong2 v) return box_muller_double(make_uint4(x, y, z, w)); } -FQUALIFIERS -__half2 box_muller_half(unsigned short x, unsigned short y) +__forceinline__ __device__ __host__ __half2 box_muller_half(unsigned short x, unsigned short y) { #if defined(ROCRAND_HALF_MATH_SUPPORTED) __half u = __float2half(ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV)); @@ -143,7 +136,7 @@ __half2 box_muller_half(unsigned short x, unsigned short y) } template -FQUALIFIERS float2 mrg_box_muller(unsigned int x, unsigned int y) +__forceinline__ __device__ __host__ float2 mrg_box_muller(unsigned int x, unsigned int y) { float2 result; float u = rocrand_device::detail::mrg_uniform_distribution(x); @@ -161,7 +154,7 @@ FQUALIFIERS float2 mrg_box_muller(unsigned int x, unsigned int y) } template -FQUALIFIERS double2 mrg_box_muller_double(unsigned int x, unsigned int y) +__forceinline__ __device__ __host__ double2 mrg_box_muller_double(unsigned int x, unsigned int y) { double2 result; double u = rocrand_device::detail::mrg_uniform_distribution(x); @@ -178,8 +171,7 @@ FQUALIFIERS double2 mrg_box_muller_double(unsigned int x, unsigned int y) return result; } -FQUALIFIERS -float roc_f_erfinv(float x) +__forceinline__ __device__ __host__ float roc_f_erfinv(float x) { float tt1, tt2, lnx, sgn; sgn = (x < 0.0f) ? -1.0f : 1.0f; @@ -206,8 +198,7 @@ float roc_f_erfinv(float x) return(sgn * sqrtf(-tt1 + sqrtf(tt1 * tt1 - tt2))); } -FQUALIFIERS -double roc_d_erfinv(double x) +__forceinline__ __device__ __host__ double roc_d_erfinv(double x) { double tt1, tt2, lnx, sgn; sgn = (x < 0.0) ? -1.0 : 1.0; @@ -234,40 +225,36 @@ double roc_d_erfinv(double x) return(sgn * sqrt(-tt1 + sqrt(tt1 * tt1 - tt2))); } -FQUALIFIERS -float normal_distribution(unsigned int x) +__forceinline__ __device__ __host__ float normal_distribution(unsigned int x) { float p = ::rocrand_device::detail::uniform_distribution(x); float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f); return v; } -FQUALIFIERS -float normal_distribution(unsigned long long int x) +__forceinline__ __device__ __host__ float normal_distribution(unsigned long long int x) { float p = ::rocrand_device::detail::uniform_distribution(x); float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f); return v; } -FQUALIFIERS -float2 normal_distribution2(unsigned int v1, unsigned int v2) +__forceinline__ __device__ __host__ float2 normal_distribution2(unsigned int v1, unsigned int v2) { return ::rocrand_device::detail::box_muller(v1, v2); } -FQUALIFIERS float2 normal_distribution2(uint2 v) +__forceinline__ __device__ __host__ float2 normal_distribution2(uint2 v) { return ::rocrand_device::detail::box_muller(v.x, v.y); } -FQUALIFIERS float2 normal_distribution2(unsigned long long v) +__forceinline__ __device__ __host__ float2 normal_distribution2(unsigned long long v) { return ::rocrand_device::detail::box_muller(v); } -FQUALIFIERS -float4 normal_distribution4(uint4 v) +__forceinline__ __device__ __host__ float4 normal_distribution4(uint4 v) { float2 r1 = ::rocrand_device::detail::box_muller(v.x, v.y); float2 r2 = ::rocrand_device::detail::box_muller(v.z, v.w); @@ -279,49 +266,46 @@ float4 normal_distribution4(uint4 v) }; } -FQUALIFIERS float4 normal_distribution4(longlong2 v) +__forceinline__ __device__ __host__ float4 normal_distribution4(longlong2 v) { float2 r1 = ::rocrand_device::detail::box_muller(v.x); float2 r2 = ::rocrand_device::detail::box_muller(v.y); return float4{r1.x, r1.y, r2.x, r2.y}; } -FQUALIFIERS float4 normal_distribution4(unsigned long long v1, unsigned long long v2) +__forceinline__ __device__ __host__ float4 normal_distribution4(unsigned long long v1, + unsigned long long v2) { float2 r1 = ::rocrand_device::detail::box_muller(v1); float2 r2 = ::rocrand_device::detail::box_muller(v2); return float4{r1.x, r1.y, r2.x, r2.y}; } -FQUALIFIERS -double normal_distribution_double(unsigned int x) +__forceinline__ __device__ __host__ double normal_distribution_double(unsigned int x) { double p = ::rocrand_device::detail::uniform_distribution_double(x); double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0); return v; } -FQUALIFIERS -double normal_distribution_double(unsigned long long int x) +__forceinline__ __device__ __host__ double normal_distribution_double(unsigned long long int x) { double p = ::rocrand_device::detail::uniform_distribution_double(x); double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0); return v; } -FQUALIFIERS -double2 normal_distribution_double2(uint4 v) +__forceinline__ __device__ __host__ double2 normal_distribution_double2(uint4 v) { return ::rocrand_device::detail::box_muller_double(v); } -FQUALIFIERS double2 normal_distribution_double2(ulonglong2 v) +__forceinline__ __device__ __host__ double2 normal_distribution_double2(ulonglong2 v) { return ::rocrand_device::detail::box_muller_double(v); } -FQUALIFIERS -__half2 normal_distribution_half2(unsigned int v) +__forceinline__ __device__ __host__ __half2 normal_distribution_half2(unsigned int v) { return ::rocrand_device::detail::box_muller_half( static_cast(v), @@ -329,26 +313,28 @@ __half2 normal_distribution_half2(unsigned int v) ); } -FQUALIFIERS __half2 normal_distribution_half2(unsigned long long v) +__forceinline__ __device__ __host__ __half2 normal_distribution_half2(unsigned long long v) { return ::rocrand_device::detail::box_muller_half(static_cast(v), static_cast(v >> 32)); } template -FQUALIFIERS float2 mrg_normal_distribution2(unsigned int v1, unsigned int v2) +__forceinline__ __device__ __host__ float2 mrg_normal_distribution2(unsigned int v1, + unsigned int v2) { return ::rocrand_device::detail::mrg_box_muller(v1, v2); } template -FQUALIFIERS double2 mrg_normal_distribution_double2(unsigned int v1, unsigned int v2) +__forceinline__ __device__ __host__ double2 mrg_normal_distribution_double2(unsigned int v1, + unsigned int v2) { return ::rocrand_device::detail::mrg_box_muller_double(v1, v2); } template -FQUALIFIERS __half2 mrg_normal_distribution_half2(unsigned int v) +__forceinline__ __device__ __host__ __half2 mrg_normal_distribution_half2(unsigned int v) { v = rocrand_device::detail::mrg_uniform_distribution_uint(v); return ::rocrand_device::detail::box_muller_half( @@ -374,9 +360,8 @@ FQUALIFIERS __half2 mrg_normal_distribution_half2(unsigned int v) * * \return Normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE -FQUALIFIERS -float rocrand_normal(rocrand_state_philox4x32_10 * state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_philox4x32_10* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -392,7 +377,7 @@ float rocrand_normal(rocrand_state_philox4x32_10 * state) bm_helper::save_float(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p float values. @@ -408,8 +393,7 @@ float rocrand_normal(rocrand_state_philox4x32_10 * state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_normal2(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_philox4x32_10* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -431,8 +415,7 @@ float2 rocrand_normal2(rocrand_state_philox4x32_10 * state) * * \return Four normally distributed \p float value as \p float4 */ -FQUALIFIERS -float4 rocrand_normal4(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ float4 rocrand_normal4(rocrand_state_philox4x32_10* state) { return rocrand_device::detail::normal_distribution4(rocrand4(state)); } @@ -451,9 +434,8 @@ float4 rocrand_normal4(rocrand_state_philox4x32_10 * state) * * \return Normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE -FQUALIFIERS -double rocrand_normal_double(rocrand_state_philox4x32_10 * state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_philox4x32_10* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -465,7 +447,7 @@ double rocrand_normal_double(rocrand_state_philox4x32_10 * state) bm_helper::save_double(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p double values. @@ -481,8 +463,8 @@ double rocrand_normal_double(rocrand_state_philox4x32_10 * state) * * \return Two normally distributed \p double values as \p double2 */ -FQUALIFIERS -double2 rocrand_normal_double2(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ double2 + rocrand_normal_double2(rocrand_state_philox4x32_10* state) { return rocrand_device::detail::normal_distribution_double2(rocrand4(state)); } @@ -501,8 +483,8 @@ double2 rocrand_normal_double2(rocrand_state_philox4x32_10 * state) * * \return Four normally distributed \p double values as \p double4 */ -FQUALIFIERS -double4 rocrand_normal_double4(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ double4 + rocrand_normal_double4(rocrand_state_philox4x32_10* state) { double2 r1, r2; r1 = rocrand_device::detail::normal_distribution_double2(rocrand4(state)); @@ -526,8 +508,8 @@ double4 rocrand_normal_double4(rocrand_state_philox4x32_10 * state) * * \return Normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE -FQUALIFIERS float rocrand_normal(rocrand_state_mrg31k3p* state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_mrg31k3p* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -544,7 +526,7 @@ FQUALIFIERS float rocrand_normal(rocrand_state_mrg31k3p* state) bm_helper::save_float(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p float values. @@ -560,7 +542,7 @@ FQUALIFIERS float rocrand_normal(rocrand_state_mrg31k3p* state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_normal2(rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_mrg31k3p* state) { auto state1 = state->next(); auto state2 = state->next(); @@ -582,8 +564,8 @@ FQUALIFIERS float2 rocrand_normal2(rocrand_state_mrg31k3p* state) * * \return Normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE -FQUALIFIERS double rocrand_normal_double(rocrand_state_mrg31k3p* state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_mrg31k3p* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -601,7 +583,7 @@ FQUALIFIERS double rocrand_normal_double(rocrand_state_mrg31k3p* state) bm_helper::save_double(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p double values. @@ -617,7 +599,7 @@ FQUALIFIERS double rocrand_normal_double(rocrand_state_mrg31k3p* state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_mrg31k3p* state) { auto state1 = state->next(); auto state2 = state->next(); @@ -640,9 +622,8 @@ FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_mrg31k3p* state) * * \return Normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE -FQUALIFIERS -float rocrand_normal(rocrand_state_mrg32k3a * state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_mrg32k3a* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -659,7 +640,7 @@ float rocrand_normal(rocrand_state_mrg32k3a * state) bm_helper::save_float(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p float values. @@ -675,8 +656,7 @@ float rocrand_normal(rocrand_state_mrg32k3a * state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_normal2(rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_mrg32k3a* state) { auto state1 = state->next(); auto state2 = state->next(); @@ -698,9 +678,8 @@ float2 rocrand_normal2(rocrand_state_mrg32k3a * state) * * \return Normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE -FQUALIFIERS -double rocrand_normal_double(rocrand_state_mrg32k3a * state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_mrg32k3a* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -718,7 +697,7 @@ double rocrand_normal_double(rocrand_state_mrg32k3a * state) bm_helper::save_double(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p double values. @@ -734,8 +713,7 @@ double rocrand_normal_double(rocrand_state_mrg32k3a * state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS -double2 rocrand_normal_double2(rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_mrg32k3a* state) { auto state1 = state->next(); auto state2 = state->next(); @@ -758,9 +736,8 @@ double2 rocrand_normal_double2(rocrand_state_mrg32k3a * state) * * \return Normally distributed \p float value */ -#ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE -FQUALIFIERS -float rocrand_normal(rocrand_state_xorwow * state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_xorwow* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -774,7 +751,7 @@ float rocrand_normal(rocrand_state_xorwow * state) bm_helper::save_float(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p float values. @@ -790,8 +767,7 @@ float rocrand_normal(rocrand_state_xorwow * state) * * \return Two normally distributed \p float values as \p float2 */ -FQUALIFIERS -float2 rocrand_normal2(rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_xorwow* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -812,9 +788,8 @@ float2 rocrand_normal2(rocrand_state_xorwow * state) * * \return Normally distributed \p double value */ -#ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE -FQUALIFIERS -double rocrand_normal_double(rocrand_state_xorwow * state) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_xorwow* state) { typedef rocrand_device::detail::engine_boxmuller_helper bm_helper; @@ -834,7 +809,7 @@ double rocrand_normal_double(rocrand_state_xorwow * state) bm_helper::save_double(state, r.y); return r.x; } -#endif // ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns two normally distributed \p double values. @@ -850,8 +825,7 @@ double rocrand_normal_double(rocrand_state_xorwow * state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS -double2 rocrand_normal_double2(rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_xorwow* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -875,12 +849,32 @@ double2 rocrand_normal_double2(rocrand_state_xorwow * state) * * \return Normally distributed \p float value */ -FQUALIFIERS -float rocrand_normal(rocrand_state_mtgp32 * state) +__forceinline__ __device__ float rocrand_normal(rocrand_state_mtgp32* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } +/** + * \brief Returns two normally distributed \p float values. + * + * Generates and returns two normally distributed \p float values using MTGP32 + * generator in \p state, and increments position of the generator by two. + * Used normal distribution has mean value equal to 0.0f, and standard deviation + * equal to 1.0f. + * The function uses the Box-Muller transform method to generate two normally + * distributed values, and returns both of them. + * + * \param state - Pointer to a state to use + * + * \return Two normally distributed \p float values as \p float2 + */ +__forceinline__ __device__ float2 rocrand_normal2(rocrand_state_mtgp32* state) +{ + auto state1 = rocrand(state); + auto state2 = rocrand(state); + return rocrand_device::detail::normal_distribution2(state1, state2); +} + /** * \brief Returns a normally distributed \p double value. * @@ -893,12 +887,36 @@ float rocrand_normal(rocrand_state_mtgp32 * state) * * \return Normally distributed \p double value */ -FQUALIFIERS -double rocrand_normal_double(rocrand_state_mtgp32 * state) +__forceinline__ __device__ double rocrand_normal_double(rocrand_state_mtgp32* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } +/** + * \brief Returns two normally distributed \p double values. + * + * Generates and returns two normally distributed \p double values using MTGP32 + * generator in \p state, and increments position of the generator by four. + * Used normal distribution has mean value equal to 0.0f, and standard deviation + * equal to 1.0f. + * The function uses the Box-Muller transform method to generate two normally + * distributed values, and returns both of them. + * + * \param state - Pointer to a state to use + * + * \return Two normally distributed \p double value as \p double2 + */ +__forceinline__ __device__ double2 rocrand_normal_double2(rocrand_state_mtgp32* state) +{ + auto state1 = rocrand(state); + auto state2 = rocrand(state); + auto state3 = rocrand(state); + auto state4 = rocrand(state); + + return rocrand_device::detail::normal_distribution_double2( + uint4{state1, state2, state3, state4}); +} + /** * \brief Returns a normally distributed \p float value. * @@ -911,8 +929,7 @@ double rocrand_normal_double(rocrand_state_mtgp32 * state) * * \return Normally distributed \p float value */ -FQUALIFIERS -float rocrand_normal(rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_sobol32* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -929,8 +946,7 @@ float rocrand_normal(rocrand_state_sobol32 * state) * * \return Normally distributed \p double value */ -FQUALIFIERS -double rocrand_normal_double(rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_sobol32* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -947,8 +963,7 @@ double rocrand_normal_double(rocrand_state_sobol32 * state) * * \return Normally distributed \p float value */ -FQUALIFIERS -float rocrand_normal(rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_scrambled_sobol32* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -965,8 +980,8 @@ float rocrand_normal(rocrand_state_scrambled_sobol32* state) * * \return Normally distributed \p double value */ -FQUALIFIERS -double rocrand_normal_double(rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ double + rocrand_normal_double(rocrand_state_scrambled_sobol32* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -983,8 +998,7 @@ double rocrand_normal_double(rocrand_state_scrambled_sobol32* state) * * \return Normally distributed \p float value */ -FQUALIFIERS -float rocrand_normal(rocrand_state_sobol64* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_sobol64* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1001,8 +1015,7 @@ float rocrand_normal(rocrand_state_sobol64* state) * * \return Normally distributed \p double value */ -FQUALIFIERS -double rocrand_normal_double(rocrand_state_sobol64 * state) +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_sobol64* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1019,8 +1032,7 @@ double rocrand_normal_double(rocrand_state_sobol64 * state) * * \return Normally distributed \p float value */ -FQUALIFIERS -float rocrand_normal(rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_scrambled_sobol64* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1037,8 +1049,8 @@ float rocrand_normal(rocrand_state_scrambled_sobol64* state) * * \return Normally distributed \p double value */ -FQUALIFIERS -double rocrand_normal_double(rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ double + rocrand_normal_double(rocrand_state_scrambled_sobol64* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1055,8 +1067,7 @@ double rocrand_normal_double(rocrand_state_scrambled_sobol64* state) * * \return Normally distributed \p float value */ -FQUALIFIERS -float rocrand_normal(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_lfsr113* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1075,8 +1086,7 @@ float rocrand_normal(rocrand_state_lfsr113* state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS -float2 rocrand_normal2(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_lfsr113* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1096,8 +1106,7 @@ float2 rocrand_normal2(rocrand_state_lfsr113* state) * * \return Normally distributed \p double value */ -FQUALIFIERS -double rocrand_normal_double(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_lfsr113* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1116,8 +1125,7 @@ double rocrand_normal_double(rocrand_state_lfsr113* state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS -double2 rocrand_normal_double2(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_lfsr113* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1140,7 +1148,7 @@ double2 rocrand_normal_double2(rocrand_state_lfsr113* state) * * \return Normally distributed \p float value */ -FQUALIFIERS float rocrand_normal(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_threefry2x32_20* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1159,7 +1167,7 @@ FQUALIFIERS float rocrand_normal(rocrand_state_threefry2x32_20* state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_threefry2x32_20* state) { return rocrand_device::detail::normal_distribution2(rocrand2(state)); } @@ -1176,7 +1184,8 @@ FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry2x32_20* state) * * \return Normally distributed \p double value */ -FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ double + rocrand_normal_double(rocrand_state_threefry2x32_20* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1195,7 +1204,8 @@ FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry2x32_20* state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ double2 + rocrand_normal_double2(rocrand_state_threefry2x32_20* state) { auto state1 = rocrand2(state); auto state2 = rocrand2(state); @@ -1216,7 +1226,7 @@ FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry2x32_20* state) * * \return Normally distributed \p float value */ -FQUALIFIERS float rocrand_normal(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_threefry2x64_20* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1235,7 +1245,7 @@ FQUALIFIERS float rocrand_normal(rocrand_state_threefry2x64_20* state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_threefry2x64_20* state) { return rocrand_device::detail::normal_distribution2(rocrand(state)); } @@ -1252,7 +1262,8 @@ FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry2x64_20* state) * * \return Normally distributed \p double value */ -FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ double + rocrand_normal_double(rocrand_state_threefry2x64_20* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1271,7 +1282,8 @@ FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry2x64_20* state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ double2 + rocrand_normal_double2(rocrand_state_threefry2x64_20* state) { return rocrand_device::detail::normal_distribution_double2(rocrand2(state)); } @@ -1288,7 +1300,7 @@ FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry2x64_20* state) * * \return Normally distributed \p float value */ -FQUALIFIERS float rocrand_normal(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_threefry4x32_20* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1307,7 +1319,7 @@ FQUALIFIERS float rocrand_normal(rocrand_state_threefry4x32_20* state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_threefry4x32_20* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1327,7 +1339,8 @@ FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry4x32_20* state) * * \return Normally distributed \p double value */ -FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ double + rocrand_normal_double(rocrand_state_threefry4x32_20* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1346,7 +1359,8 @@ FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry4x32_20* state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ double2 + rocrand_normal_double2(rocrand_state_threefry4x32_20* state) { return rocrand_device::detail::normal_distribution_double2(rocrand4(state)); } @@ -1363,7 +1377,7 @@ FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry4x32_20* state) * * \return Normally distributed \p float value */ -FQUALIFIERS float rocrand_normal(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_threefry4x64_20* state) { return rocrand_device::detail::normal_distribution(rocrand(state)); } @@ -1382,7 +1396,7 @@ FQUALIFIERS float rocrand_normal(rocrand_state_threefry4x64_20* state) * * \return Two normally distributed \p float value as \p float2 */ -FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_threefry4x64_20* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -1402,7 +1416,8 @@ FQUALIFIERS float2 rocrand_normal2(rocrand_state_threefry4x64_20* state) * * \return Normally distributed \p double value */ -FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ double + rocrand_normal_double(rocrand_state_threefry4x64_20* state) { return rocrand_device::detail::normal_distribution_double(rocrand(state)); } @@ -1421,7 +1436,8 @@ FQUALIFIERS double rocrand_normal_double(rocrand_state_threefry4x64_20* state) * * \return Two normally distributed \p double value as \p double2 */ -FQUALIFIERS double2 rocrand_normal_double2(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ double2 + rocrand_normal_double2(rocrand_state_threefry4x64_20* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); diff --git a/library/include/rocrand/rocrand_philox4x32_10.h b/library/include/rocrand/rocrand_philox4x32_10.h index 85048252..34605511 100644 --- a/library/include/rocrand/rocrand_philox4x32_10.h +++ b/library/include/rocrand/rocrand_philox4x32_10.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_PHILOX4X32_10_H_ #define ROCRAND_PHILOX4X32_10_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" // Constants from Random123 @@ -80,8 +76,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace rocrand_device { namespace detail { -FQUALIFIERS -unsigned int mulhilo32(unsigned int x, unsigned int y, unsigned int& z) +__forceinline__ __device__ __host__ unsigned int + mulhilo32(unsigned int x, unsigned int y, unsigned int& z) { unsigned long long xy = mad_u64_u32(x, y, 0); z = static_cast(xy >> 32); @@ -100,7 +96,7 @@ class philox4x32_10_engine uint2 key; unsigned int substate; - #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE // The Box–Muller transform requires two inputs to convert uniformly // distributed real values [0; 1] to normally distributed real values // (with mean = 0, and stddev = 1). Often user wants only one @@ -110,11 +106,10 @@ class philox4x32_10_engine unsigned int boxmuller_double_state; // is there a double in boxmuller_double float boxmuller_float; // normally distributed float double boxmuller_double; // normally distributed double - #endif + #endif }; - FQUALIFIERS - philox4x32_10_engine() + __forceinline__ __device__ __host__ philox4x32_10_engine() { this->seed(ROCRAND_PHILOX4x32_DEFAULT_SEED, 0, 0); } @@ -124,10 +119,9 @@ class philox4x32_10_engine /// and skips \p offset random numbers. /// /// A subsequence consists of 2 ^ 66 random numbers. - FQUALIFIERS - philox4x32_10_engine(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ philox4x32_10_engine(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset) { this->seed(seed, subsequence, offset); } @@ -137,10 +131,9 @@ class philox4x32_10_engine /// and \p offset random numbers. /// /// A subsequence consists of 2 ^ 66 random numbers. - FQUALIFIERS - void seed(unsigned long long seed_value, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ void seed(unsigned long long seed_value, + const unsigned long long subsequence, + const unsigned long long offset) { m_state.key.x = static_cast(seed_value); m_state.key.y = static_cast(seed_value >> 32); @@ -148,8 +141,7 @@ class philox4x32_10_engine } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned long long offset) + __forceinline__ __device__ __host__ void discard(unsigned long long offset) { this->discard_impl(offset); this->m_state.result = this->ten_rounds(m_state.counter, m_state.key); @@ -159,37 +151,33 @@ class philox4x32_10_engine /// a subsequence consisting of 2 ^ 66 random numbers. /// In other words, this function is equivalent to calling \p discard /// 2 ^ 66 times without using the return value, but is much faster. - FQUALIFIERS - void discard_subsequence(unsigned long long subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence) { this->discard_subsequence_impl(subsequence); m_state.result = this->ten_rounds(m_state.counter, m_state.key); } - FQUALIFIERS - void restart(const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence, + const unsigned long long offset) { m_state.counter = {0, 0, 0, 0}; m_state.result = {0, 0, 0, 0}; m_state.substate = 0; - #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE m_state.boxmuller_float_state = 0; m_state.boxmuller_double_state = 0; - #endif + #endif this->discard_subsequence_impl(subsequence); this->discard_impl(offset); m_state.result = this->ten_rounds(m_state.counter, m_state.key); } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return this->next(); } - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { #if defined(__HIP_PLATFORM_AMD__) unsigned int ret = m_state.result.data[m_state.substate]; @@ -206,8 +194,7 @@ class philox4x32_10_engine return ret; } - FQUALIFIERS - uint4 next4() + __forceinline__ __device__ __host__ uint4 next4() { uint4 ret = m_state.result; this->discard_state(); @@ -218,8 +205,7 @@ class philox4x32_10_engine protected: // Advances the internal state to skip \p offset numbers. // DOES NOT CALCULATE NEW 4 UINTs (m_state.result) - FQUALIFIERS - void discard_impl(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset) { // Adjust offset for subset m_state.substate += offset & 3; @@ -231,8 +217,8 @@ class philox4x32_10_engine } // DOES NOT CALCULATE NEW 4 UINTs (m_state.result) - FQUALIFIERS - void discard_subsequence_impl(unsigned long long subsequence) + __forceinline__ __device__ __host__ void + discard_subsequence_impl(unsigned long long subsequence) { unsigned int lo = static_cast(subsequence); unsigned int hi = static_cast(subsequence >> 32); @@ -244,8 +230,7 @@ class philox4x32_10_engine // Advances the internal state by offset times. // DOES NOT CALCULATE NEW 4 UINTs (m_state.result) - FQUALIFIERS - void discard_state(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_state(unsigned long long offset) { unsigned int lo = static_cast(offset); unsigned int hi = static_cast(offset >> 32); @@ -259,14 +244,12 @@ class philox4x32_10_engine // Advances the internal state to the next state // DOES NOT CALCULATE NEW 4 UINTs (m_state.result) - FQUALIFIERS - void discard_state() + __forceinline__ __device__ __host__ void discard_state() { m_state.counter = this->bump_counter(m_state.counter); } - FQUALIFIERS - static uint4 bump_counter(uint4 counter) + __forceinline__ __device__ __host__ static uint4 bump_counter(uint4 counter) { counter.x++; unsigned int add = counter.x == 0 ? 1 : 0; @@ -276,8 +259,7 @@ class philox4x32_10_engine return counter; } - FQUALIFIERS - uint4 interleave(const uint4 prev, const uint4 next) const + __forceinline__ __device__ __host__ uint4 interleave(const uint4 prev, const uint4 next) const { switch(m_state.substate) { @@ -294,8 +276,7 @@ class philox4x32_10_engine } // 10 Philox4x32 rounds - FQUALIFIERS - uint4 ten_rounds(uint4 counter, uint2 key) + __forceinline__ __device__ __host__ uint4 ten_rounds(uint4 counter, uint2 key) { counter = this->single_round(counter, key); key = this->bumpkey(key); // 1 counter = this->single_round(counter, key); key = this->bumpkey(key); // 2 @@ -311,8 +292,7 @@ class philox4x32_10_engine private: // Single Philox4x32 round - FQUALIFIERS - static uint4 single_round(uint4 counter, uint2 key) + __forceinline__ __device__ __host__ static uint4 single_round(uint4 counter, uint2 key) { // Source: Random123 unsigned int hi0; @@ -327,8 +307,7 @@ class philox4x32_10_engine }; } - FQUALIFIERS - static uint2 bumpkey(uint2 key) + __forceinline__ __device__ __host__ static uint2 bumpkey(uint2 key) { key.x += ROCRAND_PHILOX_W32_0; key.y += ROCRAND_PHILOX_W32_1; @@ -339,7 +318,7 @@ class philox4x32_10_engine // State philox4x32_10_state m_state; - #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE friend struct detail::engine_boxmuller_helper; #endif @@ -367,11 +346,10 @@ typedef rocrand_device::philox4x32_10_engine rocrand_state_philox4x32_10; * \param offset - Absolute offset into subsequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset, - rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset, + rocrand_state_philox4x32_10* state) { *state = rocrand_state_philox4x32_10(seed, subsequence, offset); } @@ -388,8 +366,7 @@ void rocrand_init(const unsigned long long seed, * * \return Pseudorandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_philox4x32_10* state) { return state->next(); } @@ -406,8 +383,7 @@ unsigned int rocrand(rocrand_state_philox4x32_10 * state) * * \return Four pseudorandom values (32-bit) as an uint4 */ -FQUALIFIERS -uint4 rocrand4(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ uint4 rocrand4(rocrand_state_philox4x32_10* state) { return state->next4(); } @@ -420,8 +396,8 @@ uint4 rocrand4(rocrand_state_philox4x32_10 * state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long offset, rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_philox4x32_10* state) { return state->discard(offset); } @@ -435,8 +411,8 @@ void skipahead(unsigned long long offset, rocrand_state_philox4x32_10 * state) * \param subsequence - Number of subsequences to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead_subsequence(unsigned long long subsequence, rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, + rocrand_state_philox4x32_10* state) { return state->discard_subsequence(subsequence); } @@ -450,11 +426,11 @@ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_philox4 * \param sequence - Number of sequences to skip * \param state - Pointer to state to update */ - FQUALIFIERS - void skipahead_sequence(unsigned long long sequence, rocrand_state_philox4x32_10 * state) - { - return state->discard_subsequence(sequence); - } +__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, + rocrand_state_philox4x32_10* state) +{ + return state->discard_subsequence(sequence); +} #endif // ROCRAND_PHILOX4X32_10_H_ diff --git a/library/include/rocrand/rocrand_poisson.h b/library/include/rocrand/rocrand_poisson.h index 97206033..d539b68d 100644 --- a/library/include/rocrand/rocrand_poisson.h +++ b/library/include/rocrand/rocrand_poisson.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_POISSON_H_ #define ROCRAND_POISSON_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - /** \rocrand_internal \addtogroup rocranddevice * * @{ @@ -57,7 +53,8 @@ constexpr double lambda_threshold_small = 64.0; constexpr double lambda_threshold_huge = 4000.0; template -FQUALIFIERS Result_Type poisson_distribution_small(State& state, double lambda) +__forceinline__ __device__ __host__ Result_Type poisson_distribution_small(State& state, + double lambda) { // Knuth's method @@ -75,8 +72,7 @@ FQUALIFIERS Result_Type poisson_distribution_small(State& state, double lambda) return k - 1; } -FQUALIFIERS -double lgamma_approx(const double x) +__forceinline__ __device__ __host__ double lgamma_approx(const double x) { // Lanczos approximation (g = 7, n = 9) @@ -109,7 +105,8 @@ double lgamma_approx(const double x) } template -FQUALIFIERS Result_Type poisson_distribution_large(State& state, double lambda) +__forceinline__ __device__ __host__ Result_Type poisson_distribution_large(State& state, + double lambda) { // Rejection method PA, A. C. Atkinson @@ -141,7 +138,8 @@ FQUALIFIERS Result_Type poisson_distribution_large(State& state, double lambda) } template -FQUALIFIERS Result_Type poisson_distribution_huge(State& state, double lambda) +__forceinline__ __device__ __host__ Result_Type poisson_distribution_huge(State& state, + double lambda) { // Approximate Poisson distribution with normal distribution @@ -150,7 +148,7 @@ FQUALIFIERS Result_Type poisson_distribution_huge(State& state, double lambda) } template -FQUALIFIERS Result_Type poisson_distribution(State& state, double lambda) +__forceinline__ __device__ __host__ Result_Type poisson_distribution(State& state, double lambda) { if (lambda < lambda_threshold_small) { @@ -167,7 +165,8 @@ FQUALIFIERS Result_Type poisson_distribution(State& state, double lambda) } template -FQUALIFIERS Result_Type poisson_distribution_itr(State& state, double lambda) +__forceinline__ __device__ __host__ Result_Type poisson_distribution_itr(State& state, + double lambda) { // Algorithm ITR // George S. Fishman @@ -204,7 +203,8 @@ FQUALIFIERS Result_Type poisson_distribution_itr(State& state, double lambda) } template -FQUALIFIERS Result_Type poisson_distribution_inv(State& state, double lambda) +__forceinline__ __device__ __host__ Result_Type poisson_distribution_inv(State& state, + double lambda) { if (lambda < 1000.0) { @@ -230,9 +230,9 @@ FQUALIFIERS Result_Type poisson_distribution_inv(State& state, double lambda) * * \return Poisson-distributed unsigned int */ -#ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_philox4x32_10 * state, double lambda) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_philox4x32_10* state, + double lambda) { return rocrand_device::detail::poisson_distribution( state, @@ -250,8 +250,8 @@ unsigned int rocrand_poisson(rocrand_state_philox4x32_10 * state, double lambda) * * \return Four Poisson-distributed unsigned int values as \p uint4 */ -FQUALIFIERS -uint4 rocrand_poisson4(rocrand_state_philox4x32_10 * state, double lambda) +__forceinline__ __device__ __host__ uint4 rocrand_poisson4(rocrand_state_philox4x32_10* state, + double lambda) { return uint4{ rocrand_device::detail::poisson_distribution( @@ -267,7 +267,7 @@ uint4 rocrand_poisson4(rocrand_state_philox4x32_10 * state, double lambda) state, lambda)}; } -#endif // ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns a Poisson-distributed unsigned int using MRG31k3p generator. @@ -280,14 +280,15 @@ uint4 rocrand_poisson4(rocrand_state_philox4x32_10 * state, double lambda) * * \return Poisson-distributed unsigned int */ -#ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE -FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_mrg31k3p* state, double lambda) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mrg31k3p* state, + double lambda) { return rocrand_device::detail::poisson_distribution( state, lambda); } -#endif // ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns a Poisson-distributed unsigned int using MRG32k3a generator. @@ -300,15 +301,15 @@ FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_mrg31k3p* state, double l * * \return Poisson-distributed unsigned int */ -#ifndef ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_mrg32k3a * state, double lambda) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mrg32k3a* state, + double lambda) { return rocrand_device::detail::poisson_distribution( state, lambda); } -#endif // ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns a Poisson-distributed unsigned int using XORWOW generator. @@ -321,15 +322,15 @@ unsigned int rocrand_poisson(rocrand_state_mrg32k3a * state, double lambda) * * \return Poisson-distributed unsigned int */ -#ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_xorwow * state, double lambda) +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_xorwow* state, + double lambda) { return rocrand_device::detail::poisson_distribution( state, lambda); } -#endif // ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE +#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE /** * \brief Returns a Poisson-distributed unsigned int using MTGP32 generator. @@ -342,8 +343,8 @@ unsigned int rocrand_poisson(rocrand_state_xorwow * state, double lambda) * * \return Poisson-distributed unsigned int */ -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_mtgp32 * state, double lambda) +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mtgp32* state, + double lambda) { return rocrand_device::detail::poisson_distribution_inv( state, @@ -361,8 +362,8 @@ unsigned int rocrand_poisson(rocrand_state_mtgp32 * state, double lambda) * * \return Poisson-distributed unsigned int */ -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_sobol32 * state, double lambda) +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_sobol32* state, + double lambda) { return rocrand_device::detail::poisson_distribution_inv( state, @@ -380,8 +381,8 @@ unsigned int rocrand_poisson(rocrand_state_sobol32 * state, double lambda) * * \return Poisson-distributed unsigned int */ -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_scrambled_sobol32* state, double lambda) +__forceinline__ __device__ __host__ unsigned int + rocrand_poisson(rocrand_state_scrambled_sobol32* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); @@ -398,8 +399,8 @@ unsigned int rocrand_poisson(rocrand_state_scrambled_sobol32* state, double lamb * * \return Poisson-distributed unsigned long long int */ -FQUALIFIERS -unsigned long long int rocrand_poisson(rocrand_state_sobol64* state, double lambda) +__forceinline__ __device__ __host__ unsigned long long int + rocrand_poisson(rocrand_state_sobol64* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); @@ -416,8 +417,8 @@ unsigned long long int rocrand_poisson(rocrand_state_sobol64* state, double lamb * * \return Poisson-distributed unsigned long long int */ -FQUALIFIERS -unsigned long long int rocrand_poisson(rocrand_state_scrambled_sobol64* state, double lambda) +__forceinline__ __device__ __host__ unsigned long long int + rocrand_poisson(rocrand_state_scrambled_sobol64* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); @@ -434,8 +435,8 @@ unsigned long long int rocrand_poisson(rocrand_state_scrambled_sobol64* state, d * * \return Poisson-distributed unsigned int */ -FQUALIFIERS -unsigned int rocrand_poisson(rocrand_state_lfsr113* state, double lambda) +__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_lfsr113* state, + double lambda) { return rocrand_device::detail::poisson_distribution_inv( state, @@ -453,7 +454,8 @@ unsigned int rocrand_poisson(rocrand_state_lfsr113* state, double lambda) * * \return Poisson-distributed unsigned int */ -FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry2x32_20* state, double lambda) +__forceinline__ __device__ __host__ unsigned int + rocrand_poisson(rocrand_state_threefry2x32_20* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); } @@ -469,7 +471,8 @@ FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry2x32_20* state, d * * \return Poisson-distributed unsigned int */ -FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry2x64_20* state, double lambda) +__forceinline__ __device__ __host__ unsigned int + rocrand_poisson(rocrand_state_threefry2x64_20* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); } @@ -485,7 +488,8 @@ FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry2x64_20* state, d * * \return Poisson-distributed unsigned int */ -FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry4x32_20* state, double lambda) +__forceinline__ __device__ __host__ unsigned int + rocrand_poisson(rocrand_state_threefry4x32_20* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); } @@ -501,7 +505,8 @@ FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry4x32_20* state, d * * \return Poisson-distributed unsigned int */ -FQUALIFIERS unsigned int rocrand_poisson(rocrand_state_threefry4x64_20* state, double lambda) +__forceinline__ __device__ __host__ unsigned int + rocrand_poisson(rocrand_state_threefry4x64_20* state, double lambda) { return rocrand_device::detail::poisson_distribution_inv(state, lambda); } diff --git a/library/include/rocrand/rocrand_scrambled_sobol32.h b/library/include/rocrand/rocrand_scrambled_sobol32.h index 83ca72ba..1accefc5 100644 --- a/library/include/rocrand/rocrand_scrambled_sobol32.h +++ b/library/include/rocrand/rocrand_scrambled_sobol32.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_SCRAMBLED_SOBOL32_H_ #define ROCRAND_SCRAMBLED_SOBOL32_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" #include "rocrand/rocrand_sobol32.h" @@ -35,59 +31,52 @@ template class scrambled_sobol32_engine { public: - FQUALIFIERS - scrambled_sobol32_engine() : scramble_constant() {} + __forceinline__ __device__ __host__ scrambled_sobol32_engine() : scramble_constant() {} - FQUALIFIERS - scrambled_sobol32_engine(const unsigned int* vectors, - const unsigned int scramble_constant, - const unsigned int offset) + __forceinline__ __device__ __host__ + scrambled_sobol32_engine(const unsigned int* vectors, + const unsigned int scramble_constant, + const unsigned int offset) : m_engine(vectors, 0), scramble_constant(scramble_constant) { discard(offset); } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned int offset) + __forceinline__ __device__ __host__ void discard(unsigned int offset) { m_engine.discard(offset); } - FQUALIFIERS - void discard() + __forceinline__ __device__ __host__ void discard() { m_engine.discard(); } /// Advances the internal state by stride times, where stride is power of 2 - FQUALIFIERS - void discard_stride(unsigned int stride) + __forceinline__ __device__ __host__ void discard_stride(unsigned int stride) { m_engine.discard_stride(stride); } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return this->next(); } - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { unsigned int p = m_engine.next(); return p ^ scramble_constant; } - FQUALIFIERS - unsigned int current() + __forceinline__ __device__ __host__ unsigned int current() { unsigned int p = m_engine.current(); return p ^ scramble_constant; } - FQUALIFIERS static constexpr bool uses_shared_vectors() + __forceinline__ __device__ __host__ static constexpr bool uses_shared_vectors() { return UseSharedVectors; } @@ -122,11 +111,10 @@ typedef rocrand_device::scrambled_sobol32_engine rocrand_state_scrambled_ * \param offset - Absolute offset into sequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned int* vectors, - const unsigned int scramble_constant, - const unsigned int offset, - rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned int* vectors, + const unsigned int scramble_constant, + const unsigned int offset, + rocrand_state_scrambled_sobol32* state) { *state = rocrand_state_scrambled_sobol32(vectors, scramble_constant, offset); } @@ -143,8 +131,7 @@ void rocrand_init(const unsigned int* vectors, * * \return Quasirandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_scrambled_sobol32* state) { return state->next(); } @@ -157,8 +144,8 @@ unsigned int rocrand(rocrand_state_scrambled_sobol32* state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long offset, rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_scrambled_sobol32* state) { return state->discard(offset); } diff --git a/library/include/rocrand/rocrand_scrambled_sobol64.h b/library/include/rocrand/rocrand_scrambled_sobol64.h index 3f4cb18e..b685a7f7 100644 --- a/library/include/rocrand/rocrand_scrambled_sobol64.h +++ b/library/include/rocrand/rocrand_scrambled_sobol64.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_SCRAMBLED_SOBOL64_H_ #define ROCRAND_SCRAMBLED_SOBOL64_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" #include "rocrand/rocrand_sobol64.h" @@ -35,59 +31,52 @@ template class scrambled_sobol64_engine { public: - FQUALIFIERS - scrambled_sobol64_engine() : scramble_constant() {} + __forceinline__ __device__ __host__ scrambled_sobol64_engine() : scramble_constant() {} - FQUALIFIERS - scrambled_sobol64_engine(const unsigned long long int* vectors, - const unsigned long long int scramble_constant, - const unsigned int offset) + __forceinline__ __device__ __host__ + scrambled_sobol64_engine(const unsigned long long int* vectors, + const unsigned long long int scramble_constant, + const unsigned int offset) : m_engine(vectors, 0), scramble_constant(scramble_constant) { discard(offset); } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned long long int offset) + __forceinline__ __device__ __host__ void discard(unsigned long long int offset) { m_engine.discard(offset); } - FQUALIFIERS - void discard() + __forceinline__ __device__ __host__ void discard() { m_engine.discard(); } /// Advances the internal state by stride times, where stride is power of 2 - FQUALIFIERS - void discard_stride(unsigned long long int stride) + __forceinline__ __device__ __host__ void discard_stride(unsigned long long int stride) { m_engine.discard_stride(stride); } - FQUALIFIERS - unsigned long long int operator()() + __forceinline__ __device__ __host__ unsigned long long int operator()() { return this->next(); } - FQUALIFIERS - unsigned long long int next() + __forceinline__ __device__ __host__ unsigned long long int next() { unsigned long long int p = m_engine.next(); return p ^ scramble_constant; } - FQUALIFIERS - unsigned long long int current() + __forceinline__ __device__ __host__ unsigned long long int current() { unsigned long long int p = m_engine.current(); return p ^ scramble_constant; } - FQUALIFIERS static constexpr bool uses_shared_vectors() + __forceinline__ __device__ __host__ static constexpr bool uses_shared_vectors() { return UseSharedVectors; } @@ -122,29 +111,29 @@ typedef rocrand_device::scrambled_sobol64_engine rocrand_state_scrambled_ * \param offset - Absolute offset into sequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned long long int* vectors, - const unsigned long long int scramble_constant, - const unsigned int offset, - rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ void + rocrand_init(const unsigned long long int* vectors, + const unsigned long long int scramble_constant, + const unsigned int offset, + rocrand_state_scrambled_sobol64* state) { *state = rocrand_state_scrambled_sobol64(vectors, scramble_constant, offset); } /** - * \brief Returns uniformly distributed random unsigned int value + * \brief Returns uniformly distributed random unsigned long long int value * from [0; 2^64 - 1] range. * - * Generates and returns uniformly distributed random unsigned int + * Generates and returns uniformly distributed random unsigned long long int * value from [0; 2^64 - 1] range using scrambled_sobol64 generator in \p state. * State is incremented by one position. * * \param state - Pointer to a state to use * - * \return Quasirandom value (64-bit) as an unsigned int + * \return Quasirandom value (64-bit) as an unsigned long long int */ -FQUALIFIERS -unsigned long long int rocrand(rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ unsigned long long int + rocrand(rocrand_state_scrambled_sobol64* state) { return state->next(); } @@ -157,8 +146,8 @@ unsigned long long int rocrand(rocrand_state_scrambled_sobol64* state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long offset, rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_scrambled_sobol64* state) { return state->discard(offset); } diff --git a/library/include/rocrand/rocrand_sobol32.h b/library/include/rocrand/rocrand_sobol32.h index a23e1206..d46feeb1 100644 --- a/library/include/rocrand/rocrand_sobol32.h +++ b/library/include/rocrand/rocrand_sobol32.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_SOBOL32_H_ #define ROCRAND_SOBOL32_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" namespace rocrand_device { @@ -36,13 +32,11 @@ struct sobol32_state unsigned int i; unsigned int vectors[32]; - FQUALIFIERS - sobol32_state() : d(), i(), vectors() { } + __forceinline__ __device__ __host__ sobol32_state() : d(), i(), vectors() {} - FQUALIFIERS - sobol32_state(const unsigned int d, - const unsigned int i, - const unsigned int * vectors) + __forceinline__ __device__ __host__ sobol32_state(const unsigned int d, + const unsigned int i, + const unsigned int* vectors) : d(d), i(i) { for(int k = 0; k < 32; k++) @@ -59,14 +53,13 @@ struct sobol32_state unsigned int i; const unsigned int * vectors; - FQUALIFIERS - sobol32_state() : d(), i(), vectors() { } + __forceinline__ __device__ __host__ sobol32_state() : d(), i(), vectors() {} - FQUALIFIERS - sobol32_state(const unsigned int d, - const unsigned int i, - const unsigned int * vectors) - : d(d), i(i), vectors(vectors) { } + __forceinline__ __device__ __host__ sobol32_state(const unsigned int d, + const unsigned int i, + const unsigned int* vectors) + : d(d), i(i), vectors(vectors) + {} }; template @@ -76,66 +69,57 @@ class sobol32_engine typedef struct sobol32_state sobol32_state; - FQUALIFIERS - sobol32_engine() { } + __forceinline__ __device__ __host__ sobol32_engine() {} - FQUALIFIERS - sobol32_engine(const unsigned int * vectors, - const unsigned int offset) + __forceinline__ __device__ __host__ sobol32_engine(const unsigned int* vectors, + const unsigned int offset) : m_state(0, 0, vectors) { discard_state(offset); } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned int offset) + __forceinline__ __device__ __host__ void discard(unsigned int offset) { discard_state(offset); } - FQUALIFIERS - void discard() + __forceinline__ __device__ __host__ void discard() { discard_state(); } /// Advances the internal state by stride times, where stride is power of 2 - FQUALIFIERS - void discard_stride(unsigned int stride) + __forceinline__ __device__ __host__ void discard_stride(unsigned int stride) { discard_state_power2(stride); } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return this->next(); } - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { unsigned int p = m_state.d; discard_state(); return p; } - FQUALIFIERS - unsigned int current() const + __forceinline__ __device__ __host__ unsigned int current() const { return m_state.d; } - FQUALIFIERS static constexpr bool uses_shared_vectors() + __forceinline__ __device__ __host__ static constexpr bool uses_shared_vectors() { return UseSharedVectors; } protected: // Advances the internal state by offset times. - FQUALIFIERS - void discard_state(unsigned int offset) + __forceinline__ __device__ __host__ void discard_state(unsigned int offset) { m_state.i += offset; const unsigned int g = m_state.i ^ (m_state.i >> 1); @@ -147,15 +131,13 @@ class sobol32_engine } // Advances the internal state to the next state - FQUALIFIERS - void discard_state() + __forceinline__ __device__ __host__ void discard_state() { m_state.d ^= m_state.vectors[rightmost_zero_bit(m_state.i)]; m_state.i++; } - FQUALIFIERS - void discard_state_power2(unsigned int stride) + __forceinline__ __device__ __host__ void discard_state_power2(unsigned int stride) { // Leap frog // @@ -176,8 +158,7 @@ class sobol32_engine // Returns the index of the rightmost zero bit in the binary expansion of // x (Gray code of the current element's index) - FQUALIFIERS - unsigned int rightmost_zero_bit(unsigned int x) + __forceinline__ __device__ __host__ unsigned int rightmost_zero_bit(unsigned int x) { #if defined(__HIP_DEVICE_COMPILE__) unsigned int z = __ffs(~x); @@ -223,10 +204,9 @@ typedef rocrand_device::sobol32_engine rocrand_state_sobol32; * \param offset - Absolute offset into sequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned int * vectors, - const unsigned int offset, - rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned int* vectors, + const unsigned int offset, + rocrand_state_sobol32* state) { *state = rocrand_state_sobol32(vectors, offset); } @@ -243,8 +223,7 @@ void rocrand_init(const unsigned int * vectors, * * \return Quasirandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_sobol32* state) { return state->next(); } @@ -257,8 +236,8 @@ unsigned int rocrand(rocrand_state_sobol32 * state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long offset, rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_sobol32* state) { return state->discard(offset); } diff --git a/library/include/rocrand/rocrand_sobol64.h b/library/include/rocrand/rocrand_sobol64.h index 8e1a5fd3..23bebe37 100644 --- a/library/include/rocrand/rocrand_sobol64.h +++ b/library/include/rocrand/rocrand_sobol64.h @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_SOBOL64_H_ #define ROCRAND_SOBOL64_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" namespace rocrand_device { @@ -36,13 +32,11 @@ struct sobol64_state unsigned long long int i; unsigned long long int vectors[64]; - FQUALIFIERS - sobol64_state() : d(), i(), vectors() { } + __forceinline__ __device__ __host__ sobol64_state() : d(), i(), vectors() {} - FQUALIFIERS - sobol64_state(const unsigned long long int d, - const unsigned long long int i, - const unsigned long long int * vectors) + __forceinline__ __device__ __host__ sobol64_state(const unsigned long long int d, + const unsigned long long int i, + const unsigned long long int* vectors) : d(d), i(i) { for(int k = 0; k < 64; k++) @@ -59,14 +53,13 @@ struct sobol64_state unsigned long long int i; const unsigned long long int * vectors; - FQUALIFIERS - sobol64_state() : d(), i(), vectors() { } + __forceinline__ __device__ __host__ sobol64_state() : d(), i(), vectors() {} - FQUALIFIERS - sobol64_state(const unsigned long long int d, - const unsigned long long int i, - const unsigned long long int * vectors) - : d(d), i(i), vectors(vectors) { } + __forceinline__ __device__ __host__ sobol64_state(const unsigned long long int d, + const unsigned long long int i, + const unsigned long long int* vectors) + : d(d), i(i), vectors(vectors) + {} }; template @@ -76,65 +69,57 @@ class sobol64_engine typedef struct sobol64_state sobol64_state; - FQUALIFIERS - sobol64_engine() { } + __forceinline__ __device__ __host__ sobol64_engine() {} - FQUALIFIERS - sobol64_engine(const unsigned long long int* vectors, const unsigned long long int offset) + __forceinline__ __device__ __host__ sobol64_engine(const unsigned long long int* vectors, + const unsigned long long int offset) : m_state(0, 0, vectors) { discard_state(offset); } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned long long int offset) + __forceinline__ __device__ __host__ void discard(unsigned long long int offset) { discard_state(offset); } - FQUALIFIERS - void discard() + __forceinline__ __device__ __host__ void discard() { discard_state(); } /// Advances the internal state by stride times, where stride is power of 2 - FQUALIFIERS - void discard_stride(unsigned long long int stride) + __forceinline__ __device__ __host__ void discard_stride(unsigned long long int stride) { discard_state_power2(stride); } - FQUALIFIERS - unsigned long long int operator()() + __forceinline__ __device__ __host__ unsigned long long int operator()() { return this->next(); } - FQUALIFIERS - unsigned long long int next() + __forceinline__ __device__ __host__ unsigned long long int next() { unsigned long long int p = m_state.d; discard_state(); return p; } - FQUALIFIERS - unsigned long long int current() const + __forceinline__ __device__ __host__ unsigned long long int current() const { return m_state.d; } - FQUALIFIERS static constexpr bool uses_shared_vectors() + __forceinline__ __device__ __host__ static constexpr bool uses_shared_vectors() { return UseSharedVectors; } protected: // Advances the internal state by offset times. - FQUALIFIERS - void discard_state(unsigned long long int offset) + __forceinline__ __device__ __host__ void discard_state(unsigned long long int offset) { m_state.i += offset; const unsigned long long int g = m_state.i ^ (m_state.i >> 1ull); @@ -146,15 +131,13 @@ class sobol64_engine } // Advances the internal state to the next state - FQUALIFIERS - void discard_state() + __forceinline__ __device__ __host__ void discard_state() { m_state.d ^= m_state.vectors[rightmost_zero_bit(m_state.i)]; m_state.i++; } - FQUALIFIERS - void discard_state_power2(unsigned long long int stride) + __forceinline__ __device__ __host__ void discard_state_power2(unsigned long long int stride) { // Leap frog // @@ -176,8 +159,7 @@ class sobol64_engine // Returns the index of the rightmost zero bit in the binary expansion of // x (Gray code of the current element's index) // NOTE changing unsigned long long int to unit64_t will cause compile failure on device - FQUALIFIERS - unsigned int rightmost_zero_bit(unsigned long long int x) + __forceinline__ __device__ __host__ unsigned int rightmost_zero_bit(unsigned long long int x) { #if defined(__HIP_DEVICE_COMPILE__) unsigned int z = __ffsll(~x); @@ -223,28 +205,26 @@ typedef rocrand_device::sobol64_engine rocrand_state_sobol64; * \param offset - Absolute offset into sequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned long long int * vectors, - const unsigned int offset, - rocrand_state_sobol64 * state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long int* vectors, + const unsigned int offset, + rocrand_state_sobol64* state) { *state = rocrand_state_sobol64(vectors, offset); } /** - * \brief Returns uniformly distributed random unsigned int value + * \brief Returns uniformly distributed random unsigned long long int value * from [0; 2^64 - 1] range. * - * Generates and returns uniformly distributed random unsigned int + * Generates and returns uniformly distributed random unsigned long long int * value from [0; 2^64 - 1] range using sobol64 generator in \p state. * State is incremented by one position. * * \param state - Pointer to a state to use * - * \return Quasirandom value (64-bit) as an unsigned int + * \return Quasirandom value (64-bit) as an unsigned long long int */ -FQUALIFIERS -unsigned long long int rocrand(rocrand_state_sobol64 * state) +__forceinline__ __device__ __host__ unsigned long long int rocrand(rocrand_state_sobol64* state) { return state->next(); } @@ -257,8 +237,8 @@ unsigned long long int rocrand(rocrand_state_sobol64 * state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long int offset, rocrand_state_sobol64* state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long int offset, + rocrand_state_sobol64* state) { return state->discard(offset); } diff --git a/library/include/rocrand/rocrand_threefry2_impl.h b/library/include/rocrand/rocrand_threefry2_impl.h index 2c275948..643e678a 100644 --- a/library/include/rocrand/rocrand_threefry2_impl.h +++ b/library/include/rocrand/rocrand_threefry2_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY2_IMPL_H_ #define ROCRAND_THREEFRY2_IMPL_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_threefry_common.h" #include @@ -72,10 +68,10 @@ namespace rocrand_device { template -FQUALIFIERS int threefry_rotation_array(int index) = delete; +__forceinline__ __device__ __host__ int threefry_rotation_array(int index) = delete; template<> -FQUALIFIERS int threefry_rotation_array(int index) +__forceinline__ __device__ __host__ int threefry_rotation_array(int index) { // Output from skein_rot_search (srs32x2-X5000.out) // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 @@ -86,7 +82,7 @@ FQUALIFIERS int threefry_rotation_array(int index) } template<> -FQUALIFIERS int threefry_rotation_array(int index) +__forceinline__ __device__ __host__ int threefry_rotation_array(int index) { // Output from skein_rot_search: (srs64_B64-X1000) // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57 @@ -110,13 +106,13 @@ class threefry_engine2_base using state_type = threefry_state_2; using state_vector_type = state_value; - FQUALIFIERS void discard(unsigned long long offset) + __forceinline__ __device__ __host__ void discard(unsigned long long offset) { this->discard_impl(offset); m_state.result = this->threefry_rounds(m_state.counter, m_state.key); } - FQUALIFIERS void discard() + __forceinline__ __device__ __host__ void discard() { m_state.result = this->threefry_rounds(m_state.counter, m_state.key); } @@ -126,18 +122,18 @@ class threefry_engine2_base /// where b is the number of bits of the value type of the generator. /// In other words, this function is equivalent to calling \p discard /// 2 * (2 ^ b) times without using the return value, but is much faster. - FQUALIFIERS void discard_subsequence(unsigned long long subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence) { this->discard_subsequence_impl(subsequence); m_state.result = this->threefry_rounds(m_state.counter, m_state.key); } - FQUALIFIERS value operator()() + __forceinline__ __device__ __host__ value operator()() { return this->next(); } - FQUALIFIERS value next() + __forceinline__ __device__ __host__ value next() { #if defined(__HIP_PLATFORM_AMD__) value ret = m_state.result.data[m_state.substate]; @@ -154,7 +150,7 @@ class threefry_engine2_base return ret; } - FQUALIFIERS state_value next2() + __forceinline__ __device__ __host__ state_value next2() { state_value ret = m_state.result; m_state.counter = this->bump_counter(m_state.counter); @@ -164,7 +160,8 @@ class threefry_engine2_base } protected: - FQUALIFIERS static state_value threefry_rounds(state_value counter, state_value key) + __forceinline__ __device__ __host__ static state_value threefry_rounds(state_value counter, + state_value key) { state_value X; value ks[2 + 1]; @@ -207,7 +204,7 @@ class threefry_engine2_base /// Advances the internal state to skip \p offset numbers. /// Does not calculate new values (or update m_state.result). - FQUALIFIERS void discard_impl(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset) { // Adjust offset for subset m_state.substate += offset & 1; @@ -219,14 +216,15 @@ class threefry_engine2_base } /// Does not calculate new values (or update m_state.result). - FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence) + __forceinline__ __device__ __host__ void + discard_subsequence_impl(unsigned long long subsequence) { m_state.counter.y += subsequence; } /// Advances the internal state by \p offset times. /// Does not calculate new values (or update m_state.result). - FQUALIFIERS void discard_state(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_state(unsigned long long offset) { value lo, hi; ::rocrand_device::detail::split_ull(lo, hi, offset); @@ -236,7 +234,7 @@ class threefry_engine2_base m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0); } - FQUALIFIERS static state_value bump_counter(state_value counter) + __forceinline__ __device__ __host__ static state_value bump_counter(state_value counter) { counter.x++; value add = counter.x == 0 ? 1 : 0; @@ -244,7 +242,8 @@ class threefry_engine2_base return counter; } - FQUALIFIERS state_value interleave(const state_value prev, const state_value next) const + __forceinline__ __device__ __host__ state_value interleave(const state_value prev, + const state_value next) const { switch(m_state.substate) { diff --git a/library/include/rocrand/rocrand_threefry2x32_20.h b/library/include/rocrand/rocrand_threefry2x32_20.h index a52f7305..201a6529 100644 --- a/library/include/rocrand/rocrand_threefry2x32_20.h +++ b/library/include/rocrand/rocrand_threefry2x32_20.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY2X32_20_H_ #define ROCRAND_THREEFRY2X32_20_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_threefry2_impl.h" namespace rocrand_device @@ -73,9 +69,10 @@ class threefry2x32_20_engine : public threefry_engine2_baseseed(seed, subsequence, offset); } @@ -85,9 +82,9 @@ class threefry2x32_20_engine : public threefry_engine2_baseunsigned int */ -FQUALIFIERS unsigned int rocrand(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_threefry2x32_20* state) { return state->next(); } @@ -156,7 +153,7 @@ FQUALIFIERS unsigned int rocrand(rocrand_state_threefry2x32_20* state) * * \return Two pseudorandom values (32-bit) as an uint2 */ -FQUALIFIERS uint2 rocrand2(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ uint2 rocrand2(rocrand_state_threefry2x32_20* state) { return state->next2(); } diff --git a/library/include/rocrand/rocrand_threefry2x64_20.h b/library/include/rocrand/rocrand_threefry2x64_20.h index 1ec072a8..698f6672 100644 --- a/library/include/rocrand/rocrand_threefry2x64_20.h +++ b/library/include/rocrand/rocrand_threefry2x64_20.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY2X64_20_H_ #define ROCRAND_THREEFRY2X64_20_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_threefry2_impl.h" namespace rocrand_device @@ -74,9 +70,10 @@ class threefry2x64_20_engine : public threefry_engine2_baseseed(seed, subsequence, offset); } @@ -86,9 +83,9 @@ class threefry2x64_20_engine : public threefry_engine2_base> 32}; @@ -117,10 +114,10 @@ typedef rocrand_device::threefry2x64_20_engine rocrand_state_threefry2x64_20; * \param offset - Absolute offset into subsequence * \param state - Pointer to state to initialize */ -FQUALIFIERS void rocrand_init(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset, - rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset, + rocrand_state_threefry2x64_20* state) { *state = rocrand_state_threefry2x64_20(seed, subsequence, offset); } @@ -139,7 +136,7 @@ FQUALIFIERS void rocrand_init(const unsigned long long seed, * * \return Pseudorandom value (64-bit) as an unsigned long long */ -FQUALIFIERS unsigned long long rocrand(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ unsigned long long rocrand(rocrand_state_threefry2x64_20* state) { return state->next(); } @@ -156,7 +153,7 @@ FQUALIFIERS unsigned long long rocrand(rocrand_state_threefry2x64_20* state) * * \return Two pseudorandom values (64-bit) as an ulonglong2 */ -FQUALIFIERS ulonglong2 rocrand2(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ ulonglong2 rocrand2(rocrand_state_threefry2x64_20* state) { return state->next2(); } diff --git a/library/include/rocrand/rocrand_threefry4_impl.h b/library/include/rocrand/rocrand_threefry4_impl.h index 13843331..703e4183 100644 --- a/library/include/rocrand/rocrand_threefry4_impl.h +++ b/library/include/rocrand/rocrand_threefry4_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY4_IMPL_H_ #define ROCRAND_THREEFRY4_IMPL_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_threefry_common.h" #include @@ -72,10 +68,11 @@ namespace rocrand_device { template -FQUALIFIERS int threefry_rotation_array(int indexX, int indexY) = delete; +__forceinline__ __device__ __host__ int threefry_rotation_array(int indexX, int indexY) = delete; template<> -FQUALIFIERS int threefry_rotation_array(int indexX, int indexY) +__forceinline__ __device__ __host__ int threefry_rotation_array(int indexX, + int indexY) { // Output from skein_rot_search: (srs-B128-X5000.out) // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 @@ -96,7 +93,8 @@ FQUALIFIERS int threefry_rotation_array(int indexX, int indexY) } template<> -FQUALIFIERS int threefry_rotation_array(int indexX, int indexY) +__forceinline__ __device__ __host__ int threefry_rotation_array(int indexX, + int indexY) { // These are the R_256 constants from the Threefish reference sources // with names changed to R_64x4... */ @@ -128,7 +126,7 @@ class threefry_engine4_base using state_vector_type = state_value; /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS void discard(unsigned long long offset) + __forceinline__ __device__ __host__ void discard(unsigned long long offset) { this->discard_impl(offset); this->m_state.result = this->threefry_rounds(m_state.counter, m_state.key); @@ -139,18 +137,18 @@ class threefry_engine4_base /// where b is the number of bits of the value type of the generator. /// In other words, this function is equivalent to calling \p discard /// 4 * (2 ^ b) times without using the return value, but is much faster. - FQUALIFIERS void discard_subsequence(unsigned long long subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence) { this->discard_subsequence_impl(subsequence); m_state.result = this->threefry_rounds(m_state.counter, m_state.key); } - FQUALIFIERS value operator()() + __forceinline__ __device__ __host__ value operator()() { return this->next(); } - FQUALIFIERS value next() + __forceinline__ __device__ __host__ value next() { #if defined(__HIP_PLATFORM_AMD__) value ret = m_state.result.data[m_state.substate]; @@ -167,7 +165,7 @@ class threefry_engine4_base return ret; } - FQUALIFIERS state_value next4() + __forceinline__ __device__ __host__ state_value next4() { state_value ret = m_state.result; m_state.counter = this->bump_counter(m_state.counter); @@ -177,7 +175,8 @@ class threefry_engine4_base } protected: - FQUALIFIERS static state_value threefry_rounds(state_value counter, state_value key) + __forceinline__ __device__ __host__ static state_value threefry_rounds(state_value counter, + state_value key) { state_value X; value ks[4 + 1]; @@ -247,7 +246,7 @@ class threefry_engine4_base /// Advances the internal state to skip \p offset numbers. /// Does not calculate new values (or update m_state.result). - FQUALIFIERS void discard_impl(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset) { // Adjust offset for subset m_state.substate += offset & 3; @@ -259,7 +258,8 @@ class threefry_engine4_base } /// Does not calculate new values (or update m_state.result). - FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence) + __forceinline__ __device__ __host__ void + discard_subsequence_impl(unsigned long long subsequence) { value lo, hi; ::rocrand_device::detail::split_ull(lo, hi, subsequence); @@ -271,7 +271,7 @@ class threefry_engine4_base /// Advances the internal state by \p offset times. /// Does not calculate new values (or update m_state.result). - FQUALIFIERS void discard_state(unsigned long long offset) + __forceinline__ __device__ __host__ void discard_state(unsigned long long offset) { value lo, hi; ::rocrand_device::detail::split_ull(lo, hi, offset); @@ -283,7 +283,7 @@ class threefry_engine4_base m_state.counter.w += (m_state.counter.z < old_counter.z ? 1 : 0); } - FQUALIFIERS static state_value bump_counter(state_value counter) + __forceinline__ __device__ __host__ static state_value bump_counter(state_value counter) { counter.x++; value add = counter.x == 0 ? 1 : 0; @@ -295,7 +295,8 @@ class threefry_engine4_base return counter; } - FQUALIFIERS state_value interleave(const state_value prev, const state_value next) const + __forceinline__ __device__ __host__ state_value interleave(const state_value prev, + const state_value next) const { switch(m_state.substate) { diff --git a/library/include/rocrand/rocrand_threefry4x32_20.h b/library/include/rocrand/rocrand_threefry4x32_20.h index 4039f631..b57753fa 100644 --- a/library/include/rocrand/rocrand_threefry4x32_20.h +++ b/library/include/rocrand/rocrand_threefry4x32_20.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY4X32_20_H_ #define ROCRAND_THREEFRY4X32_20_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_threefry4_impl.h" namespace rocrand_device @@ -73,9 +69,10 @@ class threefry4x32_20_engine : public threefry_engine4_baseseed(seed, subsequence, offset); } @@ -85,9 +82,9 @@ class threefry4x32_20_engine : public threefry_engine4_baseunsigned int */ -FQUALIFIERS unsigned int rocrand(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_threefry4x32_20* state) { return state->next(); } @@ -158,7 +155,7 @@ FQUALIFIERS unsigned int rocrand(rocrand_state_threefry4x32_20* state) * * \return Four pseudorandom values (32-bit) as an uint2 */ -FQUALIFIERS uint4 rocrand4(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ uint4 rocrand4(rocrand_state_threefry4x32_20* state) { return state->next4(); } diff --git a/library/include/rocrand/rocrand_threefry4x64_20.h b/library/include/rocrand/rocrand_threefry4x64_20.h index c28e1f6e..6d4a29be 100644 --- a/library/include/rocrand/rocrand_threefry4x64_20.h +++ b/library/include/rocrand/rocrand_threefry4x64_20.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY4X64_20_H_ #define ROCRAND_THREEFRY4X64_20_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_threefry4_impl.h" namespace rocrand_device @@ -74,9 +70,10 @@ class threefry4x64_20_engine : public threefry_engine4_baseseed(seed, subsequence, offset); } @@ -86,9 +83,9 @@ class threefry4x64_20_engine : public threefry_engine4_base> 16, seed >> 32, seed >> 48}; @@ -117,10 +114,10 @@ typedef rocrand_device::threefry4x64_20_engine rocrand_state_threefry4x64_20; * \param offset - Absolute offset into subsequence * \param state - Pointer to state to initialize */ -FQUALIFIERS void rocrand_init(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset, - rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset, + rocrand_state_threefry4x64_20* state) { *state = rocrand_state_threefry4x64_20(seed, subsequence, offset); } @@ -139,7 +136,7 @@ FQUALIFIERS void rocrand_init(const unsigned long long seed, * * \return Pseudorandom value (64-bit) as an unsigned long long */ -FQUALIFIERS unsigned long long rocrand(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ unsigned long long rocrand(rocrand_state_threefry4x64_20* state) { return state->next(); } @@ -156,7 +153,7 @@ FQUALIFIERS unsigned long long rocrand(rocrand_state_threefry4x64_20* state) * * \return Four pseudorandom values (64-bit) as an ulonglong4 */ -FQUALIFIERS ulonglong4 rocrand4(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ ulonglong4 rocrand4(rocrand_state_threefry4x64_20* state) { return state->next4(); } diff --git a/library/include/rocrand/rocrand_threefry_common.h b/library/include/rocrand/rocrand_threefry_common.h index c334df7f..6fb707c2 100644 --- a/library/include/rocrand/rocrand_threefry_common.h +++ b/library/include/rocrand/rocrand_threefry_common.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -53,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ROCRAND_THREEFRY_COMMON_H_ #define ROCRAND_THREEFRY_COMMON_H_ -#ifndef FQUALIFIERS - #define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_common.h" // C240 constant for Skein Hash function Threefish @@ -68,31 +64,32 @@ namespace rocrand_device { template -FQUALIFIERS value rotl(value x, int d); +__forceinline__ __device__ __host__ value rotl(value x, int d); template<> -FQUALIFIERS unsigned long long rotl(unsigned long long x, int d) +__forceinline__ __device__ __host__ unsigned long long + rotl(unsigned long long x, int d) { return ((x << d) | (x >> (64 - d) & 63)); }; template<> -FQUALIFIERS unsigned int rotl(unsigned int x, int d) +__forceinline__ __device__ __host__ unsigned int rotl(unsigned int x, int d) { return (x << (d & 31)) | (x >> ((32 - d) & 31)); }; template -FQUALIFIERS value skein_ks_parity(); +__forceinline__ __device__ __host__ value skein_ks_parity(); template<> -FQUALIFIERS unsigned int skein_ks_parity() +__forceinline__ __device__ __host__ unsigned int skein_ks_parity() { return SKEIN_KS_PARITY32; } template<> -FQUALIFIERS unsigned long long skein_ks_parity() +__forceinline__ __device__ __host__ unsigned long long skein_ks_parity() { return SKEIN_KS_PARITY64; } diff --git a/library/include/rocrand/rocrand_uniform.h b/library/include/rocrand/rocrand_uniform.h index 6e3e9795..2bf772b5 100644 --- a/library/include/rocrand/rocrand_uniform.h +++ b/library/include/rocrand/rocrand_uniform.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -26,11 +26,6 @@ #ifndef ROCRAND_UNIFORM_H_ #define ROCRAND_UNIFORM_H_ -/// Shorthand for commonly used function qualifiers -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS - #include "rocrand/rocrand_lfsr113.h" #include "rocrand/rocrand_mrg31k3p.h" #include "rocrand/rocrand_mrg32k3a.h" @@ -65,22 +60,19 @@ union two_uints_to_ulong // For unsigned integer between 0 and UINT_MAX, returns value between // 0.0f and 1.0f, excluding 0.0f and including 1.0f. -FQUALIFIERS -float uniform_distribution(unsigned int v) +__forceinline__ __device__ __host__ float uniform_distribution(unsigned int v) { return ROCRAND_2POW32_INV + (v * ROCRAND_2POW32_INV); } // For unsigned integer between 0 and ULLONG_MAX, returns value between // 0.0f and 1.0f, excluding 0.0f and including 1.0f. -FQUALIFIERS -float uniform_distribution(unsigned long long int v) +__forceinline__ __device__ __host__ float uniform_distribution(unsigned long long int v) { return ROCRAND_2POW32_INV + (v >> 32) * ROCRAND_2POW32_INV; } -FQUALIFIERS -float4 uniform_distribution4(uint4 v) +__forceinline__ __device__ __host__ float4 uniform_distribution4(uint4 v) { return float4 { ROCRAND_2POW32_INV + (v.x * ROCRAND_2POW32_INV), @@ -90,7 +82,7 @@ float4 uniform_distribution4(uint4 v) }; } -FQUALIFIERS float4 uniform_distribution4(ulonglong4 v) +__forceinline__ __device__ __host__ float4 uniform_distribution4(ulonglong4 v) { return float4{ROCRAND_2POW64_INV + (v.x * ROCRAND_2POW64_INV), ROCRAND_2POW64_INV + (v.y * ROCRAND_2POW64_INV), @@ -100,14 +92,13 @@ FQUALIFIERS float4 uniform_distribution4(ulonglong4 v) // For unsigned integer between 0 and UINT_MAX, returns value between // 0.0 and 1.0, excluding 0.0 and including 1.0. -FQUALIFIERS -double uniform_distribution_double(unsigned int v) +__forceinline__ __device__ __host__ double uniform_distribution_double(unsigned int v) { return ROCRAND_2POW32_INV_DOUBLE + (v * ROCRAND_2POW32_INV_DOUBLE); } -FQUALIFIERS -double uniform_distribution_double(unsigned int v1, unsigned int v2) +__forceinline__ __device__ __host__ double uniform_distribution_double(unsigned int v1, + unsigned int v2) { two_uints_to_ulong v; v.uint2_value.x = v1; @@ -115,8 +106,7 @@ double uniform_distribution_double(unsigned int v1, unsigned int v2) return ROCRAND_2POW53_INV_DOUBLE + (v.ulong_value * ROCRAND_2POW53_INV_DOUBLE); } -FQUALIFIERS -double uniform_distribution_double(unsigned long long int v) +__forceinline__ __device__ __host__ double uniform_distribution_double(unsigned long long int v) { return ROCRAND_2POW53_INV_DOUBLE + ( // 2^53 is the biggest int that can be stored in double, such @@ -125,8 +115,7 @@ double uniform_distribution_double(unsigned long long int v) ); } -FQUALIFIERS -double2 uniform_distribution_double2(uint4 v) +__forceinline__ __device__ __host__ double2 uniform_distribution_double2(uint4 v) { return double2 { uniform_distribution_double(v.x, v.y), @@ -134,8 +123,7 @@ double2 uniform_distribution_double2(uint4 v) }; } -FQUALIFIERS -double4 uniform_distribution_double4(uint4 v1, uint4 v2) +__forceinline__ __device__ __host__ double4 uniform_distribution_double4(uint4 v1, uint4 v2) { return double4 { uniform_distribution_double(v1.x, v1.y), @@ -145,17 +133,17 @@ double4 uniform_distribution_double4(uint4 v1, uint4 v2) }; } -FQUALIFIERS double2 uniform_distribution_double2(ulonglong2 v) +__forceinline__ __device__ __host__ double2 uniform_distribution_double2(ulonglong2 v) { return double2{uniform_distribution_double(v.x), uniform_distribution_double(v.y)}; } -FQUALIFIERS double2 uniform_distribution_double2(ulonglong4 v) +__forceinline__ __device__ __host__ double2 uniform_distribution_double2(ulonglong4 v) { return double2{uniform_distribution_double(v.x), uniform_distribution_double(v.y)}; } -FQUALIFIERS double4 uniform_distribution_double4(ulonglong4 v) +__forceinline__ __device__ __host__ double4 uniform_distribution_double4(ulonglong4 v) { return double4{uniform_distribution_double(v.x), uniform_distribution_double(v.z), @@ -163,8 +151,7 @@ FQUALIFIERS double4 uniform_distribution_double4(ulonglong4 v) uniform_distribution_double(v.z)}; } -FQUALIFIERS -__half uniform_distribution_half(unsigned short v) +__forceinline__ __device__ __host__ __half uniform_distribution_half(unsigned short v) { return __float2half(ROCRAND_2POW16_INV + (v * ROCRAND_2POW16_INV)); } @@ -172,16 +159,19 @@ __half uniform_distribution_half(unsigned short v) // For an unsigned integer produced by an MRG-based engine, returns a value // in range [0, UINT32_MAX]. template -FQUALIFIERS unsigned int mrg_uniform_distribution_uint(unsigned int v) = delete; +__forceinline__ __device__ __host__ unsigned int mrg_uniform_distribution_uint(unsigned int v) + = delete; template<> -FQUALIFIERS unsigned int mrg_uniform_distribution_uint(unsigned int v) +__forceinline__ __device__ __host__ unsigned int + mrg_uniform_distribution_uint(unsigned int v) { return static_cast((v - 1) * ROCRAND_MRG31K3P_UINT32_NORM); } template<> -FQUALIFIERS unsigned int mrg_uniform_distribution_uint(unsigned int v) +__forceinline__ __device__ __host__ unsigned int + mrg_uniform_distribution_uint(unsigned int v) { return static_cast((v - 1) * ROCRAND_MRG32K3A_UINT_NORM); } @@ -189,17 +179,19 @@ FQUALIFIERS unsigned int mrg_uniform_distribution_uint(u // For an unsigned integer produced by an MRG-based engine, returns value between // 0.0f and 1.0f, excluding 0.0f and including 1.0f. template -FQUALIFIERS float mrg_uniform_distribution(unsigned int v) = delete; +__forceinline__ __device__ __host__ float mrg_uniform_distribution(unsigned int v) = delete; template<> -FQUALIFIERS float mrg_uniform_distribution(unsigned int v) +__forceinline__ __device__ __host__ float + mrg_uniform_distribution(unsigned int v) { double ret = static_cast(v) * ROCRAND_MRG31K3P_NORM_DOUBLE; return static_cast(ret); } template<> -FQUALIFIERS float mrg_uniform_distribution(unsigned int v) +__forceinline__ __device__ __host__ float + mrg_uniform_distribution(unsigned int v) { double ret = static_cast(v) * ROCRAND_MRG32K3A_NORM_DOUBLE; return static_cast(ret); @@ -208,17 +200,19 @@ FQUALIFIERS float mrg_uniform_distribution(unsigned int // For an unsigned integer produced by an MRG generator, returns value between // 0.0 and 1.0, excluding 0.0 and including 1.0. template -FQUALIFIERS double mrg_uniform_distribution_double(unsigned int v) = delete; +__forceinline__ __device__ __host__ double mrg_uniform_distribution_double(unsigned int v) = delete; template<> -FQUALIFIERS double mrg_uniform_distribution_double(unsigned int v) +__forceinline__ __device__ __host__ double + mrg_uniform_distribution_double(unsigned int v) { double ret = static_cast(v) * ROCRAND_MRG31K3P_NORM_DOUBLE; return ret; } template<> -FQUALIFIERS double mrg_uniform_distribution_double(unsigned int v) +__forceinline__ __device__ __host__ double + mrg_uniform_distribution_double(unsigned int v) { double ret = static_cast(v) * ROCRAND_MRG32K3A_NORM_DOUBLE; return ret; @@ -239,8 +233,7 @@ FQUALIFIERS double mrg_uniform_distribution_double(unsig * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_philox4x32_10* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -257,8 +250,7 @@ float rocrand_uniform(rocrand_state_philox4x32_10 * state) * * \return Two uniformly distributed \p float values from (0; 1] range as \p float2. */ -FQUALIFIERS -float2 rocrand_uniform2(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ float2 rocrand_uniform2(rocrand_state_philox4x32_10* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -281,8 +273,7 @@ float2 rocrand_uniform2(rocrand_state_philox4x32_10 * state) * * \return Four uniformly distributed \p float values from (0; 1] range as \p float4. */ -FQUALIFIERS -float4 rocrand_uniform4(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ float4 rocrand_uniform4(rocrand_state_philox4x32_10* state) { return rocrand_device::detail::uniform_distribution4(rocrand4(state)); } @@ -299,8 +290,8 @@ float4 rocrand_uniform4(rocrand_state_philox4x32_10 * state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_philox4x32_10* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -320,8 +311,8 @@ double rocrand_uniform_double(rocrand_state_philox4x32_10 * state) * * \return Two uniformly distributed \p double values from (0; 1] range as \p double2. */ -FQUALIFIERS -double2 rocrand_uniform_double2(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ double2 + rocrand_uniform_double2(rocrand_state_philox4x32_10* state) { return rocrand_device::detail::uniform_distribution_double2(rocrand4(state)); } @@ -338,8 +329,8 @@ double2 rocrand_uniform_double2(rocrand_state_philox4x32_10 * state) * * \return Four uniformly distributed \p double values from (0; 1] range as \p double4. */ -FQUALIFIERS -double4 rocrand_uniform_double4(rocrand_state_philox4x32_10 * state) +__forceinline__ __device__ __host__ double4 + rocrand_uniform_double4(rocrand_state_philox4x32_10* state) { return rocrand_device::detail::uniform_distribution_double4(rocrand4(state), rocrand4(state)); } @@ -356,7 +347,7 @@ double4 rocrand_uniform_double4(rocrand_state_philox4x32_10 * state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS float rocrand_uniform(rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_mrg31k3p* state) { return rocrand_device::detail::mrg_uniform_distribution(state->next()); } @@ -376,7 +367,7 @@ FQUALIFIERS float rocrand_uniform(rocrand_state_mrg31k3p* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS double rocrand_uniform_double(rocrand_state_mrg31k3p* state) +__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_mrg31k3p* state) { return rocrand_device::detail::mrg_uniform_distribution_double( state->next()); @@ -394,8 +385,7 @@ FQUALIFIERS double rocrand_uniform_double(rocrand_state_mrg31k3p* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_mrg32k3a* state) { return rocrand_device::detail::mrg_uniform_distribution(state->next()); } @@ -415,8 +405,7 @@ float rocrand_uniform(rocrand_state_mrg32k3a * state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_mrg32k3a * state) +__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_mrg32k3a* state) { return rocrand_device::detail::mrg_uniform_distribution_double( state->next()); @@ -434,8 +423,7 @@ double rocrand_uniform_double(rocrand_state_mrg32k3a * state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_xorwow* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -452,8 +440,7 @@ float rocrand_uniform(rocrand_state_xorwow * state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_xorwow* state) { auto state1 = rocrand(state); auto state2 = rocrand(state); @@ -473,8 +460,7 @@ double rocrand_uniform_double(rocrand_state_xorwow * state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_mtgp32 * state) +__forceinline__ __device__ float rocrand_uniform(rocrand_state_mtgp32* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -494,8 +480,7 @@ float rocrand_uniform(rocrand_state_mtgp32 * state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_mtgp32 * state) +__forceinline__ __device__ double rocrand_uniform_double(rocrand_state_mtgp32* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -512,8 +497,7 @@ double rocrand_uniform_double(rocrand_state_mtgp32 * state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_sobol32* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -533,8 +517,7 @@ float rocrand_uniform(rocrand_state_sobol32 * state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_sobol32 * state) +__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_sobol32* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -551,8 +534,7 @@ double rocrand_uniform_double(rocrand_state_sobol32 * state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_scrambled_sobol32* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -572,8 +554,8 @@ float rocrand_uniform(rocrand_state_scrambled_sobol32* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_scrambled_sobol32* state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_scrambled_sobol32* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -590,8 +572,7 @@ double rocrand_uniform_double(rocrand_state_scrambled_sobol32* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_sobol64* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_sobol64* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -608,8 +589,7 @@ float rocrand_uniform(rocrand_state_sobol64* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_sobol64 * state) +__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_sobol64* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -626,8 +606,7 @@ double rocrand_uniform_double(rocrand_state_sobol64 * state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_scrambled_sobol64* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -644,8 +623,8 @@ float rocrand_uniform(rocrand_state_scrambled_sobol64* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_scrambled_sobol64* state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_scrambled_sobol64* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -662,8 +641,7 @@ double rocrand_uniform_double(rocrand_state_scrambled_sobol64* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS -float rocrand_uniform(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_lfsr113* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -683,8 +661,7 @@ float rocrand_uniform(rocrand_state_lfsr113* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS -double rocrand_uniform_double(rocrand_state_lfsr113* state) +__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_lfsr113* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -701,7 +678,7 @@ double rocrand_uniform_double(rocrand_state_lfsr113* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS float rocrand_uniform(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_threefry2x32_20* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -721,7 +698,8 @@ FQUALIFIERS float rocrand_uniform(rocrand_state_threefry2x32_20* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry2x32_20* state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_threefry2x32_20* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -738,7 +716,7 @@ FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry2x32_20* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS float rocrand_uniform(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_threefry2x64_20* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -758,7 +736,8 @@ FQUALIFIERS float rocrand_uniform(rocrand_state_threefry2x64_20* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry2x64_20* state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_threefry2x64_20* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -775,7 +754,7 @@ FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry2x64_20* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS float rocrand_uniform(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_threefry4x32_20* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -795,7 +774,8 @@ FQUALIFIERS float rocrand_uniform(rocrand_state_threefry4x32_20* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry4x32_20* state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_threefry4x32_20* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } @@ -812,7 +792,7 @@ FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry4x32_20* state) * * \return Uniformly distributed \p float value from (0; 1] range. */ -FQUALIFIERS float rocrand_uniform(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ float rocrand_uniform(rocrand_state_threefry4x64_20* state) { return rocrand_device::detail::uniform_distribution(rocrand(state)); } @@ -832,7 +812,8 @@ FQUALIFIERS float rocrand_uniform(rocrand_state_threefry4x64_20* state) * * \return Uniformly distributed \p double value from (0; 1] range. */ -FQUALIFIERS double rocrand_uniform_double(rocrand_state_threefry4x64_20* state) +__forceinline__ __device__ __host__ double + rocrand_uniform_double(rocrand_state_threefry4x64_20* state) { return rocrand_device::detail::uniform_distribution_double(rocrand(state)); } diff --git a/library/include/rocrand/rocrand_xorwow.h b/library/include/rocrand/rocrand_xorwow.h index 9902d6c5..14eed112 100644 --- a/library/include/rocrand/rocrand_xorwow.h +++ b/library/include/rocrand/rocrand_xorwow.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_XORWOW_H_ #define ROCRAND_XORWOW_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ -#endif // FQUALIFIERS_ - #include "rocrand/rocrand_common.h" #include "rocrand/rocrand_xorwow_precomputed.h" @@ -42,8 +38,7 @@ namespace rocrand_device { namespace detail { -FQUALIFIERS -void copy_vec(unsigned int * dst, const unsigned int * src) +__forceinline__ __device__ __host__ void copy_vec(unsigned int* dst, const unsigned int* src) { for (int i = 0; i < XORWOW_N; i++) { @@ -51,8 +46,7 @@ void copy_vec(unsigned int * dst, const unsigned int * src) } } -FQUALIFIERS -void mul_mat_vec_inplace(const unsigned int * m, unsigned int * v) +__forceinline__ __device__ __host__ void mul_mat_vec_inplace(const unsigned int* m, unsigned int* v) { unsigned int r[XORWOW_N] = { 0 }; for (int ij = 0; ij < XORWOW_N * XORWOW_M; ij++) @@ -78,7 +72,7 @@ class xorwow_engine // Weyl sequence value unsigned int d; - #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE // The Box–Muller transform requires two inputs to convert uniformly // distributed real values [0; 1] to normally distributed real values // (with mean = 0, and stddev = 1). Often user wants only one @@ -88,24 +82,24 @@ class xorwow_engine unsigned int boxmuller_double_state; // is there a double in boxmuller_double float boxmuller_float; // normally distributed float double boxmuller_double; // normally distributed double - #endif + #endif // Xorshift values (160 bits) unsigned int x[5]; }; - FQUALIFIERS - xorwow_engine() : xorwow_engine(ROCRAND_XORWOW_DEFAULT_SEED, 0, 0) { } + __forceinline__ __device__ __host__ xorwow_engine() + : xorwow_engine(ROCRAND_XORWOW_DEFAULT_SEED, 0, 0) + {} /// Initializes the internal state of the PRNG using /// seed value \p seed, goes to \p subsequence -th subsequence, /// and skips \p offset random numbers. /// /// A subsequence is 2^67 numbers long. - FQUALIFIERS - xorwow_engine(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset) + __forceinline__ __device__ __host__ xorwow_engine(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset) { m_state.x[0] = 123456789U; m_state.x[1] = 362436069U; @@ -130,15 +124,14 @@ class xorwow_engine discard_subsequence(subsequence); discard(offset); - #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE m_state.boxmuller_float_state = 0; m_state.boxmuller_double_state = 0; - #endif + #endif } /// Advances the internal state to skip \p offset numbers. - FQUALIFIERS - void discard(unsigned long long offset) + __forceinline__ __device__ __host__ void discard(unsigned long long offset) { #ifdef __HIP_DEVICE_COMPILE__ jump(offset, d_xorwow_jump_matrices); @@ -152,8 +145,7 @@ class xorwow_engine /// Advances the internal state to skip \p subsequence subsequences. /// A subsequence is 2^67 numbers long. - FQUALIFIERS - void discard_subsequence(unsigned long long subsequence) + __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence) { // Discard n * 2^67 samples #ifdef __HIP_DEVICE_COMPILE__ @@ -165,14 +157,12 @@ class xorwow_engine // d has the same value because 2^67 is divisible by 2^32 (d is 32-bit) } - FQUALIFIERS - unsigned int operator()() + __forceinline__ __device__ __host__ unsigned int operator()() { return next(); } - FQUALIFIERS - unsigned int next() + __forceinline__ __device__ __host__ unsigned int next() { const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2); m_state.x[0] = m_state.x[1]; @@ -187,10 +177,9 @@ class xorwow_engine } protected: - - FQUALIFIERS - void jump(unsigned long long v, - const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE]) + __forceinline__ __device__ __host__ void + jump(unsigned long long v, + const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE]) { // x~(n + v) = (A^v mod m)x~n mod m // The matrix (A^v mod m) can be precomputed for selected values of v. @@ -222,7 +211,7 @@ class xorwow_engine // State xorwow_state m_state; - #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE + #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE friend struct detail::engine_boxmuller_helper; #endif @@ -250,11 +239,10 @@ typedef rocrand_device::xorwow_engine rocrand_state_xorwow; * \param offset - Absolute offset into subsequence * \param state - Pointer to state to initialize */ -FQUALIFIERS -void rocrand_init(const unsigned long long seed, - const unsigned long long subsequence, - const unsigned long long offset, - rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, + const unsigned long long subsequence, + const unsigned long long offset, + rocrand_state_xorwow* state) { *state = rocrand_state_xorwow(seed, subsequence, offset); } @@ -271,8 +259,7 @@ void rocrand_init(const unsigned long long seed, * * \return Pseudorandom value (32-bit) as an unsigned int */ -FQUALIFIERS -unsigned int rocrand(rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow* state) { return state->next(); } @@ -285,8 +272,8 @@ unsigned int rocrand(rocrand_state_xorwow * state) * \param offset - Number of elements to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead(unsigned long long offset, rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, + rocrand_state_xorwow* state) { return state->discard(offset); } @@ -300,8 +287,8 @@ void skipahead(unsigned long long offset, rocrand_state_xorwow * state) * \param subsequence - Number of subsequences to skip * \param state - Pointer to state to update */ -FQUALIFIERS -void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow * state) +__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, + rocrand_state_xorwow* state) { return state->discard_subsequence(subsequence); } @@ -315,11 +302,11 @@ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow * \param sequence - Number of sequences to skip * \param state - Pointer to state to update */ - FQUALIFIERS - void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow * state) - { - return state->discard_subsequence(sequence); - } +__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, + rocrand_state_xorwow* state) +{ + return state->discard_subsequence(sequence); +} #endif // ROCRAND_XORWOW_H_ diff --git a/library/include/rocrand/rocrand_xorwow_precomputed.h b/library/include/rocrand/rocrand_xorwow_precomputed.h index e05aa07d..9bc7969b 100644 --- a/library/include/rocrand/rocrand_xorwow_precomputed.h +++ b/library/include/rocrand/rocrand_xorwow_precomputed.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -30,6 +30,7 @@ #define XORWOW_JUMP_MATRICES 32 #define XORWOW_JUMP_LOG2 2 +// clang-format off static const __device__ unsigned int d_xorwow_jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE] = { { 0, 0, 0, 0, 3, 0, 0, 0, 0, 6, 0, 0, 0, 0, 15, 0, 0, 0, 0, 30, 0, 0, 0, 0, 60, @@ -1121,6 +1122,8 @@ static const __device__ unsigned int d_xorwow_jump_matrices[XORWOW_JUMP_MATRICES }, }; +// clang-format on +// clang-format off static const unsigned int h_xorwow_jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE] = { { 0, 0, 0, 0, 3, 0, 0, 0, 0, 6, 0, 0, 0, 0, 15, 0, 0, 0, 0, 30, 0, 0, 0, 0, 60, @@ -2212,6 +2215,8 @@ static const unsigned int h_xorwow_jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SI }, }; +// clang-format on +// clang-format off static const __device__ unsigned int d_xorwow_sequence_jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE] = { { 850664906, 2293210629, 1517805917, 1215500405, 1612415445, 645388200, 824349799, 3517232886, 4075591755, 3089899292, 4249786064, 3811424903, 1100783479, 53649761, 2817264826, 3159462529, 1654848550, 950025444, 3095510002, 4080567211, 4111078399, 3241719305, 2788212779, 4256963770, 2426893717, @@ -3303,6 +3308,8 @@ static const __device__ unsigned int d_xorwow_sequence_jump_matrices[XORWOW_JUMP }, }; +// clang-format on +// clang-format off static const unsigned int h_xorwow_sequence_jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE] = { { 850664906, 2293210629, 1517805917, 1215500405, 1612415445, 645388200, 824349799, 3517232886, 4075591755, 3089899292, 4249786064, 3811424903, 1100783479, 53649761, 2817264826, 3159462529, 1654848550, 950025444, 3095510002, 4080567211, 4111078399, 3241719305, 2788212779, 4256963770, 2426893717, @@ -4394,5 +4401,6 @@ static const unsigned int h_xorwow_sequence_jump_matrices[XORWOW_JUMP_MATRICES][ }, }; +// clang-format on #endif // ROCRAND_XORWOW_PRECOMPUTED_H_ diff --git a/library/src/rng/common.hpp b/library/src/rng/common.hpp index e4b45040..67b15e39 100644 --- a/library/src/rng/common.hpp +++ b/library/src/rng/common.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,9 @@ #ifndef ROCRAND_RNG_COMMON_H_ #define ROCRAND_RNG_COMMON_H_ -#include - -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ __host__ +// Generating normal distributed numbers via the Box-Muller transformation is faster, but requires to always generate two numbers. If only one number is needed, the other is stored in the state of the generator, and returned when another one is requested. For the host API this is not needed, as it always creates pairs of those numbers. This reduces register usage in the kernel. +#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE + #define ROCRAND_DETAIL_BM_NOT_IN_STATE #endif #if !defined(USE_DEVICE_DISPATCH) && !defined(_WIN32) && defined(__HIP_PLATFORM_AMD__) @@ -33,17 +32,23 @@ #include +#include #include #include #include +namespace rocrand_impl +{ + template struct alignas(sizeof(T) * N) aligned_vec_type { T data[N]; }; +} // namespace rocrand_impl + /** * \brief Check for a HIP error and exit the program if encountered. * diff --git a/library/src/rng/config/config_defaults.hpp b/library/src/rng/config/config_defaults.hpp index ec07fc9b..add73cbc 100644 --- a/library/src/rng/config/config_defaults.hpp +++ b/library/src/rng/config/config_defaults.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,7 @@ * This file is automatically generated by `/scripts/config-tuning/generate_config_defaults.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -107,6 +107,6 @@ struct generator_config_defaults static constexpr inline unsigned int blocks = 1024; }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host #endif // ROCRAND_RNG_CONFIG_CONFIG_DEFAULTS_HPP_ diff --git a/library/src/rng/config/lfsr113_config.hpp b/library/src/rng/config/lfsr113_config.hpp index f568cbf2..041acc30 100644 --- a/library/src/rng/config/lfsr113_config.hpp +++ b/library/src/rng/config/lfsr113_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,7 +38,8 @@ struct generator_config_selector { switch(arch) { - case target_arch::gfx1030: return 128; + case target_arch::gfx1102: return 128; + case target_arch::gfx1030: return 64; case target_arch::gfx906: return 256; case target_arch::gfx908: return 256; default: return generator_config_defaults::threads; @@ -49,14 +50,15 @@ struct generator_config_selector { switch(arch) { - case target_arch::gfx1030: return 360; - case target_arch::gfx906: return 560; - case target_arch::gfx908: return 960; + case target_arch::gfx1102: return 256; + case target_arch::gfx1030: return 512; + case target_arch::gfx906: return 2048; + case target_arch::gfx908: return 1024; default: return generator_config_defaults::blocks; } } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_LFSR113_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_LFSR113_HPP_ diff --git a/library/src/rng/config/mrg31k3p_config.hpp b/library/src/rng/config/mrg31k3p_config.hpp index b6fa9c79..21da1b34 100644 --- a/library/src/rng/config/mrg31k3p_config.hpp +++ b/library/src/rng/config/mrg31k3p_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 256; case target_arch::gfx906: return 256; case target_arch::gfx908: return 1024; @@ -49,6 +50,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 256; case target_arch::gfx1030: return 1152; case target_arch::gfx906: return 1792; case target_arch::gfx908: return 600; @@ -57,6 +59,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_MRG31K3P_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_MRG31K3P_HPP_ diff --git a/library/src/rng/config/mrg32k3a_config.hpp b/library/src/rng/config/mrg32k3a_config.hpp index b621dbbf..222df513 100644 --- a/library/src/rng/config/mrg32k3a_config.hpp +++ b/library/src/rng/config/mrg32k3a_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 256; case target_arch::gfx906: return 256; case target_arch::gfx908: return 1024; @@ -49,6 +50,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 256; case target_arch::gfx1030: return 2304; case target_arch::gfx906: return 2048; case target_arch::gfx908: return 600; @@ -57,6 +59,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_MRG32K3A_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_MRG32K3A_HPP_ diff --git a/library/src/rng/config/mtgp32_config.hpp b/library/src/rng/config/mtgp32_config.hpp index 1c5927ba..4c3a9553 100644 --- a/library/src/rng/config/mtgp32_config.hpp +++ b/library/src/rng/config/mtgp32_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,8 +38,8 @@ struct generator_config_selector { switch(arch) { - case target_arch::gfx1030: return 256; case target_arch::gfx1102: return 256; + case target_arch::gfx1030: return 256; case target_arch::gfx906: return 256; case target_arch::gfx908: return 256; default: return generator_config_defaults::threads; @@ -50,8 +50,8 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 320; case target_arch::gfx1030: return 512; - case target_arch::gfx1102: return 512; case target_arch::gfx906: return 448; case target_arch::gfx908: return 480; default: return generator_config_defaults::blocks; @@ -59,6 +59,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host #endif // ROCRAND_RNG_CONFIG_MTGP32_HPP_ diff --git a/library/src/rng/config/philox4_32_10_config.hpp b/library/src/rng/config/philox4_32_10_config.hpp index 3ed25aec..57275425 100644 --- a/library/src/rng/config/philox4_32_10_config.hpp +++ b/library/src/rng/config/philox4_32_10_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 1024; case target_arch::gfx1030: return 1024; case target_arch::gfx906: return 64; case target_arch::gfx908: return 512; @@ -49,6 +50,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 2304; case target_arch::gfx906: return 896; case target_arch::gfx908: return 3840; @@ -57,6 +59,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_PHILOX4_32_10_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_PHILOX4_32_10_HPP_ diff --git a/library/src/rng/config/threefry2_32_20_config.hpp b/library/src/rng/config/threefry2_32_20_config.hpp index 3822bce4..be4ed681 100644 --- a/library/src/rng/config/threefry2_32_20_config.hpp +++ b/library/src/rng/config/threefry2_32_20_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 1024; case target_arch::gfx1030: return 256; case target_arch::gfx906: return 256; case target_arch::gfx908: return 512; @@ -50,6 +51,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 2304; case target_arch::gfx906: return 896; case target_arch::gfx908: return 1200; @@ -59,6 +61,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_THREEFRY2_32_20_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_THREEFRY2_32_20_HPP_ diff --git a/library/src/rng/config/threefry2_64_20_config.hpp b/library/src/rng/config/threefry2_64_20_config.hpp index 6943237d..1fcef423 100644 --- a/library/src/rng/config/threefry2_64_20_config.hpp +++ b/library/src/rng/config/threefry2_64_20_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 512; case target_arch::gfx1030: return 128; case target_arch::gfx906: return 256; case target_arch::gfx908: return 256; @@ -50,6 +51,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 2304; case target_arch::gfx906: return 560; case target_arch::gfx908: return 960; @@ -59,6 +61,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_THREEFRY2_64_20_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_THREEFRY2_64_20_HPP_ diff --git a/library/src/rng/config/threefry4_32_20_config.hpp b/library/src/rng/config/threefry4_32_20_config.hpp index 47ca1b3f..8809f0d3 100644 --- a/library/src/rng/config/threefry4_32_20_config.hpp +++ b/library/src/rng/config/threefry4_32_20_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 1024; case target_arch::gfx1030: return 1024; case target_arch::gfx906: return 256; case target_arch::gfx908: return 256; @@ -50,6 +51,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 2304; case target_arch::gfx906: return 896; case target_arch::gfx908: return 1200; @@ -59,6 +61,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_THREEFRY4_32_20_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_THREEFRY4_32_20_HPP_ diff --git a/library/src/rng/config/threefry4_64_20_config.hpp b/library/src/rng/config/threefry4_64_20_config.hpp index d33e009a..73e00da3 100644 --- a/library/src/rng/config/threefry4_64_20_config.hpp +++ b/library/src/rng/config/threefry4_64_20_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 512; case target_arch::gfx1030: return 1024; case target_arch::gfx906: return 128; case target_arch::gfx908: return 128; @@ -50,6 +51,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 2304; case target_arch::gfx906: return 1792; case target_arch::gfx908: return 1920; @@ -59,6 +61,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_THREEFRY4_64_20_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_THREEFRY4_64_20_HPP_ diff --git a/library/src/rng/config/xorwow_config.hpp b/library/src/rng/config/xorwow_config.hpp index 616923cb..17aa5120 100644 --- a/library/src/rng/config/xorwow_config.hpp +++ b/library/src/rng/config/xorwow_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -38,6 +38,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 128; case target_arch::gfx1030: return 128; case target_arch::gfx906: return 256; case target_arch::gfx908: return 256; @@ -49,6 +50,7 @@ struct generator_config_selector { switch(arch) { + case target_arch::gfx1102: return 256; case target_arch::gfx1030: return 360; case target_arch::gfx906: return 560; case target_arch::gfx908: return 600; @@ -57,6 +59,6 @@ struct generator_config_selector } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host -#endif // ROCRAND_RNG_CONFIG_XORWOW_HPP_ \ No newline at end of file +#endif // ROCRAND_RNG_CONFIG_XORWOW_HPP_ diff --git a/library/src/rng/config_types.hpp b/library/src/rng/config_types.hpp index 8edacd1c..dcaedb22 100644 --- a/library/src/rng/config_types.hpp +++ b/library/src/rng/config_types.hpp @@ -33,7 +33,7 @@ #include #include -namespace rocrand_host::detail +namespace rocrand_impl::host { /// \brief Represents a device target's processor architecture. @@ -56,7 +56,7 @@ enum class target_arch : unsigned int }; /// @brief Returns the detected processor architecture of the device that is currently compiled against. -__device__ constexpr target_arch get_device_arch() +__host__ __device__ constexpr target_arch get_device_arch() { #if !defined(USE_DEVICE_DISPATCH) return target_arch::unknown; @@ -290,7 +290,7 @@ __host__ __device__ constexpr bool is_ordering_quasi(const rocrand_ordering orde template auto dynamic_dispatch(rocrand_ordering order, Function&& func) { - bool is_dynamic = ::rocrand_host::detail::is_ordering_dynamic(order); + bool is_dynamic = is_ordering_dynamic(order); if(is_dynamic) { return std::forward(func)(std::true_type{}); @@ -337,7 +337,7 @@ hipError_t get_generator_config(const hipStream_t stream, /// @param dynamic_config Whether architecture-specific launch config can be selected or not. /// @return The selected launch config. template -__device__ constexpr generator_config get_generator_config_device(bool dynamic_config) +__host__ __device__ constexpr generator_config get_generator_config_device(bool dynamic_config) { return generator_config{generator_config_selector::get_threads( dynamic_config ? get_device_arch() : target_arch::unknown), @@ -360,7 +360,7 @@ struct default_config_provider /// @param is_dynamic Controls if the returned config belongs to the static or the dynamic ordering. /// @return The kernel config struct. template - __device__ static constexpr generator_config device_config(const bool is_dynamic) + __host__ __device__ static constexpr generator_config device_config(const bool is_dynamic) { return get_generator_config_device(is_dynamic); } @@ -381,7 +381,7 @@ struct default_config_provider }; /// @brief ConfigProvider that always returns a config with the specified \ref Blocks and \ref Threads. -/// This can be used in place of \ref rocrand_host::detail::default_config_provider, which bases the +/// This can be used in place of \ref rocrand_impl::host::default_config_provider, which bases the /// returned configuration on the current architecture. /// @tparam Threads The number of threads in the kernel block. /// @tparam Blocks The number of blocks in the kernel grid. @@ -391,7 +391,7 @@ struct static_config_provider static constexpr inline generator_config static_config = {Threads, Blocks}; template - __device__ static constexpr generator_config device_config(const bool /*is_dynamic*/) + __host__ __device__ static constexpr generator_config device_config(const bool /*is_dynamic*/) { return static_config; } @@ -425,7 +425,8 @@ struct static_block_size_config_provider static constexpr inline block_size_generator_config static_config = {Threads}; template - __device__ static constexpr block_size_generator_config device_config(const bool /*is_dynamic*/) + __host__ __device__ static constexpr block_size_generator_config + device_config(const bool /*is_dynamic*/) { return static_config; } @@ -493,7 +494,7 @@ hipError_t get_least_common_grid_size(const hipStream_t stream, /// @return The least common multiple of all grid sizes across configurations. /// @tparam ConfigProvider Provider of the kernel launch configs. template -__device__ constexpr unsigned int get_least_common_grid_size(const bool is_dynamic) +__host__ __device__ constexpr unsigned int get_least_common_grid_size(const bool is_dynamic) { generator_config type_configs[6]{}; type_configs[0] = ConfigProvider::template device_config(is_dynamic); @@ -520,7 +521,7 @@ __device__ constexpr unsigned int get_least_common_grid_size(const bool is_dynam /// @tparam ConfigProvider Provider of the kernel launch configs. /// @tparam T The generated value type to load the config for. template -__device__ constexpr bool is_single_tile_config(const bool is_dynamic) +__host__ __device__ constexpr bool is_single_tile_config(const bool is_dynamic) { const auto config = ConfigProvider::template device_config(is_dynamic); const unsigned int grid_size = config.blocks * config.threads; @@ -538,7 +539,7 @@ __device__ constexpr bool is_single_tile_config(const bool is_dynamic) /// @param is_dynamic Whether the current kernel uses dynamic ordering or not. /// @returns The number of threads per block for the current config. template -__device__ constexpr unsigned int get_block_size(const bool is_dynamic) +__host__ __device__ constexpr unsigned int get_block_size(const bool is_dynamic) { return ConfigProvider::template device_config(is_dynamic).threads; } @@ -548,6 +549,6 @@ __device__ constexpr unsigned int get_block_size(const bool is_dynamic) template class GeneratorTemplate> constexpr inline rocrand_rng_type gen_template_type_v = GeneratorTemplate::type(); -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host #endif // ROCRAND_RNG_CONFIG_TYPES_H_ diff --git a/library/src/rng/device_engines.hpp b/library/src/rng/device_engines.hpp index 85993c77..30ed89c7 100644 --- a/library/src/rng/device_engines.hpp +++ b/library/src/rng/device_engines.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,15 +21,11 @@ #ifndef ROCRAND_RNG_DEVICE_ENGINES_H_ #define ROCRAND_RNG_DEVICE_ENGINES_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ __host__ -#endif +#include -#define ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE -#define ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE -#define ROCRAND_DETAIL_MRG32K3A_BM_NOT_IN_STATE -#define ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE +#include -#include +ROCRAND_PRAGMA_MESSAGE("Internal device_engines.hpp header has been deprecated. Please include the " + "necessary headers directly.") #endif // ROCRAND_RNG_DEVICE_ENGINES_H_ diff --git a/library/src/rng/distribution/device_distributions.hpp b/library/src/rng/distribution/device_distributions.hpp index 72be73b3..c3d973ea 100644 --- a/library/src/rng/distribution/device_distributions.hpp +++ b/library/src/rng/distribution/device_distributions.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,6 @@ #ifndef ROCRAND_RNG_DISTRIBUTION_DEVICE_DISTRIBUTIONS_H_ #define ROCRAND_RNG_DISTRIBUTION_DEVICE_DISTRIBUTIONS_H_ -#ifndef FQUALIFIERS -#define FQUALIFIERS __forceinline__ __device__ __host__ -#endif - #include "../device_engines.hpp" #include @@ -32,4 +28,9 @@ #include #include +#include + +ROCRAND_PRAGMA_MESSAGE("Internal device_distributions.hpp header has been deprecated. Please " + "include the necessary engine- or distribution-headers directly.") + #endif // ROCRAND_RNG_DISTRIBUTION_DEVICE_DISTRIBUTIONS_H_ diff --git a/library/src/rng/distribution/discrete.hpp b/library/src/rng/distribution/discrete.hpp index 49621b62..a8f3333b 100644 --- a/library/src/rng/distribution/discrete.hpp +++ b/library/src/rng/distribution/discrete.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,14 +21,14 @@ #ifndef ROCRAND_RNG_DISTRIBUTION_DISCRETE_H_ #define ROCRAND_RNG_DISTRIBUTION_DISCRETE_H_ -#include -#include -#include +#include "../common.hpp" #include +#include -#include "../common.hpp" -#include "device_distributions.hpp" +#include +#include +#include // Alias method // @@ -38,23 +38,26 @@ // Vose M. D. // A Linear Algorithm For Generating Random Numbers With a Given Distribution, 1991 -enum rocrand_discrete_method +namespace rocrand_impl::host { - ROCRAND_DISCRETE_METHOD_ALIAS = 1, - ROCRAND_DISCRETE_METHOD_CDF = 2, - ROCRAND_DISCRETE_METHOD_UNIVERSAL = ROCRAND_DISCRETE_METHOD_ALIAS | ROCRAND_DISCRETE_METHOD_CDF + +enum discrete_method +{ + DISCRETE_METHOD_ALIAS = 1, + DISCRETE_METHOD_CDF = 2, + DISCRETE_METHOD_UNIVERSAL = DISCRETE_METHOD_ALIAS | DISCRETE_METHOD_CDF }; -template -class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_st +template +class discrete_distribution_base : public rocrand_discrete_distribution_st { public: static constexpr unsigned int input_width = 1; static constexpr unsigned int output_width = 1; - // rocrand_discrete_distribution_st is a struct - rocrand_discrete_distribution_base() // cppcheck-suppress uninitDerivedMemberVar + // rocrand_discrete_distribution_st is a struct + discrete_distribution_base() // cppcheck-suppress uninitDerivedMemberVar { size = 0; offset = 0; @@ -63,18 +66,15 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ cdf = NULL; } - rocrand_discrete_distribution_base(const double * probabilities, - unsigned int size, - unsigned int offset) - : rocrand_discrete_distribution_base() + discrete_distribution_base(const double* probabilities, unsigned int size, unsigned int offset) + : discrete_distribution_base() { std::vector p(probabilities, probabilities + size); init(p, size, offset); } - __host__ __device__ - ~rocrand_discrete_distribution_base() { } + __host__ __device__ ~discrete_distribution_base() {} void deallocate() { @@ -119,7 +119,7 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ template __forceinline__ __host__ __device__ unsigned int operator()(T x) const { - if ((Method & ROCRAND_DISCRETE_METHOD_ALIAS) != 0) + if((Method & DISCRETE_METHOD_ALIAS) != 0) { return rocrand_device::detail::discrete_alias(x, *this); } @@ -147,11 +147,11 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ deallocate(); allocate(); normalize(p); - if ((Method & ROCRAND_DISCRETE_METHOD_ALIAS) != 0) + if((Method & DISCRETE_METHOD_ALIAS) != 0) { create_alias_table(p); } - if ((Method & ROCRAND_DISCRETE_METHOD_CDF) != 0) + if((Method & DISCRETE_METHOD_CDF) != 0) { create_cdf(p); } @@ -161,12 +161,12 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ { if (IsHostSide) { - if ((Method & ROCRAND_DISCRETE_METHOD_ALIAS) != 0) + if((Method & DISCRETE_METHOD_ALIAS) != 0) { probability = new double[size]; alias = new unsigned int[size]; } - if ((Method & ROCRAND_DISCRETE_METHOD_CDF) != 0) + if((Method & DISCRETE_METHOD_CDF) != 0) { cdf = new double[size]; } @@ -174,7 +174,7 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ else { hipError_t error; - if ((Method & ROCRAND_DISCRETE_METHOD_ALIAS) != 0) + if((Method & DISCRETE_METHOD_ALIAS) != 0) { error = hipMalloc(&probability, sizeof(double) * size); if (error != hipSuccess) @@ -187,7 +187,7 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ throw ROCRAND_STATUS_ALLOCATION_FAILED; } } - if ((Method & ROCRAND_DISCRETE_METHOD_CDF) != 0) + if((Method & DISCRETE_METHOD_CDF) != 0) { error = hipMalloc(&cdf, sizeof(double) * size); if (error != hipSuccess) @@ -308,4 +308,6 @@ class rocrand_discrete_distribution_base : public rocrand_discrete_distribution_ } }; +} // namespace rocrand_impl::host + #endif // ROCRAND_RNG_DISTRIBUTION_DISCRETE_H_ diff --git a/library/src/rng/distribution/log_normal.hpp b/library/src/rng/distribution/log_normal.hpp index e9385960..2f9dc711 100644 --- a/library/src/rng/distribution/log_normal.hpp +++ b/library/src/rng/distribution/log_normal.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,11 +21,19 @@ #ifndef ROCRAND_RNG_DISTRIBUTION_LOG_NORMAL_H_ #define ROCRAND_RNG_DISTRIBUTION_LOG_NORMAL_H_ -#include +#include "../common.hpp" + +#include +#include +#include +#include + #include -#include "device_distributions.hpp" -#include "rocrand/rocrand.h" +#include + +namespace rocrand_impl::host +{ inline constexpr unsigned int log_normal_distribution_max_input_width_default = 4; @@ -376,4 +384,6 @@ struct sobol_log_normal_distribution<__half> } }; +} // namespace rocrand_impl::host + #endif // ROCRAND_RNG_DISTRIBUTION_LOG_NORMAL_H_ diff --git a/library/src/rng/distribution/normal.hpp b/library/src/rng/distribution/normal.hpp index 53f96c65..6465ef43 100644 --- a/library/src/rng/distribution/normal.hpp +++ b/library/src/rng/distribution/normal.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,11 +21,17 @@ #ifndef ROCRAND_RNG_DISTRIBUTION_NORMAL_H_ #define ROCRAND_RNG_DISTRIBUTION_NORMAL_H_ -#include +#include "../common.hpp" + +#include +#include + #include -#include "device_distributions.hpp" -#include "rocrand/rocrand.h" +#include + +namespace rocrand_impl::host +{ inline constexpr unsigned int normal_distribution_max_input_width_default = 4; @@ -376,4 +382,6 @@ struct sobol_normal_distribution<__half> } }; +} // namespace rocrand_impl::host + #endif // ROCRAND_RNG_DISTRIBUTION_NORMAL_H_ diff --git a/library/src/rng/distribution/poisson.hpp b/library/src/rng/distribution/poisson.hpp index 416d827d..764a4219 100644 --- a/library/src/rng/distribution/poisson.hpp +++ b/library/src/rng/distribution/poisson.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,26 +21,27 @@ #ifndef ROCRAND_RNG_DISTRIBUTION_POISSON_H_ #define ROCRAND_RNG_DISTRIBUTION_POISSON_H_ -#include -#include -#include +#include "discrete.hpp" #include +#include -#include "discrete.hpp" +#include +#include +#include -template -class rocrand_poisson_distribution : public rocrand_discrete_distribution_base +namespace rocrand_impl::host { -public: - typedef rocrand_discrete_distribution_base base; +template +class poisson_distribution : public discrete_distribution_base +{ +public: + typedef discrete_distribution_base base; - rocrand_poisson_distribution() - : base() { } + poisson_distribution() : base() {} - explicit rocrand_poisson_distribution(double lambda) - : rocrand_poisson_distribution() + explicit poisson_distribution(double lambda) : poisson_distribution() { set_lambda(lambda); } @@ -109,12 +110,11 @@ class rocrand_poisson_distribution : public rocrand_discrete_distribution_base +template class poisson_distribution_manager { public: - - rocrand_poisson_distribution dis; + poisson_distribution dis; poisson_distribution_manager() = default; @@ -123,7 +123,7 @@ class poisson_distribution_manager poisson_distribution_manager(poisson_distribution_manager&& other) : dis(other.dis), lambda(other.lambda) { - // For now, we didn't make rocrand_poisson_distribution move-only + // For now, we didn't make poisson_distribution move-only // We copied the pointers of dis. Prevent deallocation by the destructor of other other.dis = {}; } @@ -135,7 +135,7 @@ class poisson_distribution_manager dis = other.dis; lambda = other.lambda; - // For now, we didn't make rocrand_poisson_distribution move-only + // For now, we didn't make poisson_distribution move-only // We copied the pointers of dis. Prevent deallocation by the destructor of other other.dis = {}; @@ -166,8 +166,7 @@ class poisson_distribution_manager template struct mrg_engine_poisson_distribution { - using distribution_type - = rocrand_poisson_distribution; + using distribution_type = poisson_distribution; static constexpr unsigned int input_width = 1; static constexpr unsigned int output_width = 1; @@ -193,9 +192,11 @@ struct mrg_engine_poisson_distribution struct mrg_poisson_distribution : mrg_engine_poisson_distribution { - explicit mrg_poisson_distribution(rocrand_poisson_distribution dis) + explicit mrg_poisson_distribution(poisson_distribution dis) : mrg_engine_poisson_distribution(dis) {} }; +} // namespace rocrand_impl::host + #endif // ROCRAND_RNG_DISTRIBUTION_POISSON_H_ diff --git a/library/src/rng/distribution/uniform.hpp b/library/src/rng/distribution/uniform.hpp index cdf45263..ae6850d4 100644 --- a/library/src/rng/distribution/uniform.hpp +++ b/library/src/rng/distribution/uniform.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,14 +21,19 @@ #ifndef ROCRAND_RNG_DISTRIBUTION_UNIFORM_H_ #define ROCRAND_RNG_DISTRIBUTION_UNIFORM_H_ -#include -#include +#include "../common.hpp" -#include "device_distributions.hpp" +#include + +#include +#include // Universal +namespace rocrand_impl::host +{ + template struct uniform_distribution; @@ -428,4 +433,6 @@ struct sobol_uniform_distribution<__half> } }; +} // namespace rocrand_impl::host + #endif // ROCRAND_RNG_DISTRIBUTION_UNIFORM_H_ diff --git a/library/src/rng/generator_type.hpp b/library/src/rng/generator_type.hpp index 4a2f1862..0363730d 100644 --- a/library/src/rng/generator_type.hpp +++ b/library/src/rng/generator_type.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -72,13 +72,16 @@ struct rocrand_generator_base_type // clang-format on }; +namespace rocrand_impl::host +{ + /// This wrapper provides support for the different types of generator functions, /// while calling into a generic function in the actual generator implementation. /// This saves us to write the same code for the distributions every time. template -struct rocrand_generator_type : rocrand_generator_base_type +struct generator_type : rocrand_generator_base_type { - rocrand_generator_type() : m_generator() {} + generator_type() : m_generator() {} rocrand_rng_type type() const override final { @@ -229,15 +232,13 @@ struct rocrand_generator_type : rocrand_generator_base_type /// \brief This type provides some default implementations for the methods /// that are required by the `Generator` parameter of `rocrand_generator`. /// It can be used, but it is not required. It only exists as utility. -struct rocrand_generator_impl_base +struct generator_impl_base { - rocrand_generator_impl_base(rocrand_ordering order, - unsigned long long offset, - hipStream_t stream) + generator_impl_base(rocrand_ordering order, unsigned long long offset, hipStream_t stream) : m_order(order), m_offset(offset), m_stream(stream) {} - virtual ~rocrand_generator_impl_base() = default; + virtual ~generator_impl_base() = default; virtual void reset() = 0; @@ -289,4 +290,6 @@ struct rocrand_generator_impl_base hipStream_t m_stream; }; +} // namespace rocrand_impl::host + #endif // ROCRAND_RNG_GENERATOR_TYPE_H_ diff --git a/library/src/rng/generator_type_lfsr113.cpp b/library/src/rng/generator_type_lfsr113.cpp index cbe2ee99..25e3f1e5 100644 --- a/library/src/rng/generator_type_lfsr113.cpp +++ b/library/src/rng/generator_type_lfsr113.cpp @@ -22,5 +22,11 @@ #include "lfsr113.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_mrg31k3p.cpp b/library/src/rng/generator_type_mrg31k3p.cpp index b050abcb..24cb1968 100644 --- a/library/src/rng/generator_type_mrg31k3p.cpp +++ b/library/src/rng/generator_type_mrg31k3p.cpp @@ -22,5 +22,11 @@ #include "mrg.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_mrg32k3a.cpp b/library/src/rng/generator_type_mrg32k3a.cpp index 79781b9b..ac5cddf0 100644 --- a/library/src/rng/generator_type_mrg32k3a.cpp +++ b/library/src/rng/generator_type_mrg32k3a.cpp @@ -22,5 +22,11 @@ #include "mrg.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_mt19937.cpp b/library/src/rng/generator_type_mt19937.cpp index c5a30f4a..f48105da 100644 --- a/library/src/rng/generator_type_mt19937.cpp +++ b/library/src/rng/generator_type_mt19937.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,4 +22,9 @@ #include "mt19937.hpp" -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_mtgp32.cpp b/library/src/rng/generator_type_mtgp32.cpp index edf1da60..dbaf89fd 100644 --- a/library/src/rng/generator_type_mtgp32.cpp +++ b/library/src/rng/generator_type_mtgp32.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,4 +22,11 @@ #include "mtgp32.hpp" -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_philox4x32_10.cpp b/library/src/rng/generator_type_philox4x32_10.cpp index 248fad9b..20885b13 100644 --- a/library/src/rng/generator_type_philox4x32_10.cpp +++ b/library/src/rng/generator_type_philox4x32_10.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,5 +22,11 @@ #include "philox4x32_10.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_scrambled_sobol32.cpp b/library/src/rng/generator_type_scrambled_sobol32.cpp index 6012a6d0..ff93fa8b 100644 --- a/library/src/rng/generator_type_scrambled_sobol32.cpp +++ b/library/src/rng/generator_type_scrambled_sobol32.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,5 +22,11 @@ #include "sobol.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_scrambled_sobol64.cpp b/library/src/rng/generator_type_scrambled_sobol64.cpp index 998a72dd..f3f289bb 100644 --- a/library/src/rng/generator_type_scrambled_sobol64.cpp +++ b/library/src/rng/generator_type_scrambled_sobol64.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,5 +22,11 @@ #include "sobol.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_sobol32.cpp b/library/src/rng/generator_type_sobol32.cpp index 9630c430..62adc1d7 100644 --- a/library/src/rng/generator_type_sobol32.cpp +++ b/library/src/rng/generator_type_sobol32.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,5 +22,11 @@ #include "sobol.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_sobol64.cpp b/library/src/rng/generator_type_sobol64.cpp index f6037c82..071cf473 100644 --- a/library/src/rng/generator_type_sobol64.cpp +++ b/library/src/rng/generator_type_sobol64.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,5 +22,11 @@ #include "sobol.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct rocrand_impl::host::generator_type; +template struct rocrand_impl::host::generator_type>; +template struct rocrand_impl::host::generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_threefry2x32_20.cpp b/library/src/rng/generator_type_threefry2x32_20.cpp index 7284cdd4..7d170cdf 100644 --- a/library/src/rng/generator_type_threefry2x32_20.cpp +++ b/library/src/rng/generator_type_threefry2x32_20.cpp @@ -22,5 +22,11 @@ #include "threefry.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_threefry2x64_20.cpp b/library/src/rng/generator_type_threefry2x64_20.cpp index 95a8ec19..b8a0b48a 100644 --- a/library/src/rng/generator_type_threefry2x64_20.cpp +++ b/library/src/rng/generator_type_threefry2x64_20.cpp @@ -22,5 +22,11 @@ #include "threefry.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_threefry4x32_20.cpp b/library/src/rng/generator_type_threefry4x32_20.cpp index 886ee2aa..7a717484 100644 --- a/library/src/rng/generator_type_threefry4x32_20.cpp +++ b/library/src/rng/generator_type_threefry4x32_20.cpp @@ -22,5 +22,11 @@ #include "threefry.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_threefry4x64_20.cpp b/library/src/rng/generator_type_threefry4x64_20.cpp index f7ff6392..354d2eb0 100644 --- a/library/src/rng/generator_type_threefry4x64_20.cpp +++ b/library/src/rng/generator_type_threefry4x64_20.cpp @@ -22,5 +22,11 @@ #include "threefry.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_type_xorwow.cpp b/library/src/rng/generator_type_xorwow.cpp index d6f56a28..6434702b 100644 --- a/library/src/rng/generator_type_xorwow.cpp +++ b/library/src/rng/generator_type_xorwow.cpp @@ -22,5 +22,11 @@ #include "xorwow.hpp" -template struct rocrand_generator_type; -template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +template struct generator_type; +template struct generator_type>; +template struct generator_type>; + +} // namespace rocrand_impl::host diff --git a/library/src/rng/generator_types.hpp b/library/src/rng/generator_types.hpp index 73b9ae63..9a09c029 100644 --- a/library/src/rng/generator_types.hpp +++ b/library/src/rng/generator_types.hpp @@ -32,33 +32,53 @@ #include "threefry.hpp" #include "xorwow.hpp" -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; -extern template struct rocrand_generator_type; +namespace rocrand_impl::host +{ + +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; +extern template struct generator_type; +extern template struct generator_type>; +extern template struct generator_type>; + +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_GENERATOR_TYPES_H_ diff --git a/library/src/rng/lfsr113.hpp b/library/src/rng/lfsr113.hpp index 46c73dd6..41c5e58a 100644 --- a/library/src/rng/lfsr113.hpp +++ b/library/src/rng/lfsr113.hpp @@ -21,21 +21,22 @@ #ifndef ROCRAND_RNG_LFSR113_H_ #define ROCRAND_RNG_LFSR113_H_ -#include -#include - -#include - #include "config/lfsr113_config.hpp" #include "common.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" #include "system.hpp" -namespace rocrand_host::detail +#include +#include + +#include + +#include + +namespace rocrand_impl::host { typedef ::rocrand_device::lfsr113_engine lfsr113_device_engine; @@ -45,13 +46,17 @@ __host__ __device__ inline void init_lfsr113_engines(dim3 block_idx, dim3 /*grid_dim*/, dim3 block_dim, lfsr113_device_engine* engines, + const unsigned int start_engine_id, const unsigned int engines_size, - const uint4 seeds) + const uint4 seeds, + const unsigned int offset) { const unsigned int engine_id = block_idx.x * block_dim.x + thread_idx.x; if(engine_id < engines_size) { - engines[engine_id] = lfsr113_device_engine(seeds, engine_id); + engines[engine_id] = lfsr113_device_engine(seeds, + engine_id, + offset + (engine_id < start_engine_id ? 1 : 0)); } } @@ -74,10 +79,10 @@ __host__ __device__ void generate_lfsr113(dim3 block_idx, using vec_type = aligned_vec_type; - const unsigned int id = block_idx.x * BlockSize + thread_idx.x; - const unsigned int stride = grid_dim.x * BlockSize; + const unsigned int id = block_idx.x * BlockSize + thread_idx.x; + const unsigned int num_engines = grid_dim.x * BlockSize; - const unsigned int engine_id = (id + start_engine_id) & (stride - 1); + const unsigned int engine_id = (id + start_engine_id) & (num_engines - 1); lfsr113_device_engine engine = engines[engine_id]; unsigned int input[input_width]; @@ -109,7 +114,7 @@ __host__ __device__ void generate_lfsr113(dim3 block_idx, __builtin_amdgcn_s_waitcnt(/*vmcnt*/ 0 | (/*exp_cnt*/ 0x7 << 4) | (/*lgkmcnt*/ 0xf << 8)); #endif vec_data[index] = *reinterpret_cast(output); - index += stride; + index += num_engines; } if(output_width > 1 && index == vec_n) @@ -151,32 +156,31 @@ __host__ __device__ void generate_lfsr113(dim3 block_idx, } } + // Save engine with its state engines[engine_id] = engine; } -} // namespace rocrand_host::detail - template -class rocrand_lfsr113_template : public rocrand_generator_impl_base +class lfsr113_generator_template : public generator_impl_base { public: using system_type = System; - using base_type = rocrand_generator_impl_base; - using engine_type = ::rocrand_host::detail::lfsr113_device_engine; - - rocrand_lfsr113_template(uint4 seeds = {ROCRAND_LFSR113_DEFAULT_SEED_X, - ROCRAND_LFSR113_DEFAULT_SEED_Y, - ROCRAND_LFSR113_DEFAULT_SEED_Z, - ROCRAND_LFSR113_DEFAULT_SEED_W}, - unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + using base_type = generator_impl_base; + using engine_type = lfsr113_device_engine; + + lfsr113_generator_template(uint4 seeds = {ROCRAND_LFSR113_DEFAULT_SEED_X, + ROCRAND_LFSR113_DEFAULT_SEED_Y, + ROCRAND_LFSR113_DEFAULT_SEED_Z, + ROCRAND_LFSR113_DEFAULT_SEED_W}, + unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream), m_seed(seeds) {} - rocrand_lfsr113_template(const rocrand_lfsr113_template&) = delete; + lfsr113_generator_template(const lfsr113_generator_template&) = delete; - rocrand_lfsr113_template(rocrand_lfsr113_template&& other) + lfsr113_generator_template(lfsr113_generator_template&& other) : base_type(other) , m_engines_initialized(other.m_engines_initialized) , m_engines(other.m_engines) @@ -189,9 +193,9 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base other.m_engines = nullptr; } - rocrand_lfsr113_template& operator=(const rocrand_lfsr113_template&) = delete; + lfsr113_generator_template& operator=(const lfsr113_generator_template&) = delete; - rocrand_lfsr113_template& operator=(rocrand_lfsr113_template&& other) + lfsr113_generator_template& operator=(lfsr113_generator_template&& other) { *static_cast(this) = std::move(other); m_engines_initialized = other.m_engines_initialized; @@ -207,7 +211,7 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base return *this; } - ~rocrand_lfsr113_template() + ~lfsr113_generator_template() { if(m_engines != nullptr) { @@ -272,15 +276,12 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base return m_seed; } - rocrand_status set_offset(unsigned long long offset) - { - (void)offset; - // Can't set offset for LFSR113 - return ROCRAND_STATUS_TYPE_ERROR; - } - rocrand_status set_order(rocrand_ordering order) { + if(!system_type::is_device() && order == ROCRAND_ORDERING_PSEUDO_DYNAMIC) + { + return ROCRAND_STATUS_OUT_OF_RANGE; + } static constexpr std::array supported_orderings{ ROCRAND_ORDERING_PSEUDO_DEFAULT, ROCRAND_ORDERING_PSEUDO_DYNAMIC, @@ -305,16 +306,13 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base } hipError_t error - = rocrand_host::detail::get_least_common_grid_size(m_stream, - m_order, - m_engines_size); + = get_least_common_grid_size(m_stream, m_order, m_engines_size); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; } - // offset is always 0 - m_start_engine_id = 0; + m_start_engine_id = m_offset % m_engines_size; if(m_engines != nullptr) { @@ -329,16 +327,17 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base constexpr unsigned int init_threads = 256; const unsigned int init_blocks = (m_engines_size + init_threads - 1) / init_threads; - status = system_type::template launch< - rocrand_host::detail::init_lfsr113_engines, - rocrand_host::detail::static_block_size_config_provider>( + status = system_type::template launch>( dim3(init_blocks), dim3(init_threads), 0, m_stream, m_engines, + m_start_engine_id, m_engines_size, - m_seed); + m_seed, + m_offset / m_engines_size); if(status != ROCRAND_STATUS_SUCCESS) { return status; @@ -357,18 +356,17 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base return status; } - rocrand_host::detail::generator_config config; + generator_config config; const hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); if(error != hipSuccess) return ROCRAND_STATUS_INTERNAL_ERROR; - status = rocrand_host::detail::dynamic_dispatch( + status = dynamic_dispatch( m_order, [&, this](auto is_dynamic) { return system_type::template launch< - rocrand_host::detail:: - generate_lfsr113, + generate_lfsr113, ConfigProvider, T, is_dynamic>(dim3(config.blocks), @@ -454,15 +452,22 @@ class rocrand_lfsr113_template : public rocrand_generator_impl_base unsigned int m_engines_size = 0; uint4 m_seed; - poisson_distribution_manager<> m_poisson; + // For caching of Poisson for consecutive generations with the same lambda + poisson_distribution_manager m_poisson; + + // m_seed from base_type + // m_offset from base_type }; -using rocrand_lfsr113 = rocrand_lfsr113_template< - rocrand_system_device, - rocrand_host::detail::default_config_provider>; +using lfsr113_generator + = lfsr113_generator_template>; + +template +using lfsr113_generator_host + = lfsr113_generator_template, + default_config_provider>; -using rocrand_lfsr113_host = rocrand_lfsr113_template< - rocrand_system_host, - rocrand_host::detail::default_config_provider>; +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_LFSR113_H_ diff --git a/library/src/rng/mrg.hpp b/library/src/rng/mrg.hpp index 101bbb4c..0e792509 100644 --- a/library/src/rng/mrg.hpp +++ b/library/src/rng/mrg.hpp @@ -26,16 +26,19 @@ #include "common.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" #include "system.hpp" +#include +#include +#include + #include #include -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -152,20 +155,18 @@ __host__ __device__ void generate_mrg(dim3 block_idx, engines[engine_id] = engine; } -} // end namespace rocrand_host::detail - template -class rocrand_mrg_template : public rocrand_generator_impl_base +class mrg_generator_template : public generator_impl_base { public: - using base_type = rocrand_generator_impl_base; + using base_type = generator_impl_base; using engine_type = Engine; using system_type = System; - rocrand_mrg_template(unsigned long long seed = 0, - unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + mrg_generator_template(unsigned long long seed = 0, + unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream), m_seed(seed) { if(m_seed == 0) @@ -174,9 +175,9 @@ class rocrand_mrg_template : public rocrand_generator_impl_base } } - rocrand_mrg_template(const rocrand_mrg_template&) = delete; + mrg_generator_template(const mrg_generator_template&) = delete; - rocrand_mrg_template(rocrand_mrg_template&& other) + mrg_generator_template(mrg_generator_template&& other) : base_type(other) , m_engines_initialized(std::exchange(other.m_engines_initialized, false)) , m_engines(std::exchange(other.m_engines, nullptr)) @@ -186,9 +187,9 @@ class rocrand_mrg_template : public rocrand_generator_impl_base , m_poisson(std::move(other.m_poisson)) {} - rocrand_mrg_template& operator=(const rocrand_mrg_template&) = delete; + mrg_generator_template& operator=(const mrg_generator_template&) = delete; - rocrand_mrg_template& operator=(rocrand_mrg_template&& other) + mrg_generator_template& operator=(mrg_generator_template&& other) { *static_cast(this) = other; m_engines_initialized = std::exchange(other.m_engines_initialized, false); @@ -201,7 +202,7 @@ class rocrand_mrg_template : public rocrand_generator_impl_base return *this; } - ~rocrand_mrg_template() + ~mrg_generator_template() { if(m_engines != nullptr) { @@ -243,6 +244,10 @@ class rocrand_mrg_template : public rocrand_generator_impl_base rocrand_status set_order(rocrand_ordering order) { + if(!system_type::is_device() && order == ROCRAND_ORDERING_PSEUDO_DYNAMIC) + { + return ROCRAND_STATUS_OUT_OF_RANGE; + } static constexpr std::array supported_orderings{ ROCRAND_ORDERING_PSEUDO_DEFAULT, ROCRAND_ORDERING_PSEUDO_DYNAMIC, @@ -267,9 +272,7 @@ class rocrand_mrg_template : public rocrand_generator_impl_base } hipError_t error - = rocrand_host::detail::get_least_common_grid_size(m_stream, - m_order, - m_engines_size); + = get_least_common_grid_size(m_stream, m_order, m_engines_size); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; @@ -290,9 +293,8 @@ class rocrand_mrg_template : public rocrand_generator_impl_base constexpr unsigned int init_threads = ROCRAND_DEFAULT_MAX_BLOCK_SIZE; const unsigned int init_blocks = (m_engines_size + init_threads - 1) / init_threads; - status = system_type::template launch< - rocrand_host::detail::init_engines_mrg, - rocrand_host::detail::static_block_size_config_provider>( + status = system_type::template launch, + static_block_size_config_provider>( dim3(init_blocks), dim3(init_threads), 0, @@ -320,20 +322,19 @@ class rocrand_mrg_template : public rocrand_generator_impl_base return status; } - rocrand_host::detail::generator_config config; + generator_config config; const hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; } - status = rocrand_host::detail::dynamic_dispatch( + status = dynamic_dispatch( m_order, [&, this](auto is_dynamic) { return system_type::template launch< - rocrand_host::detail:: - generate_mrg, + generate_mrg, ConfigProvider, T, is_dynamic>(dim3(config.blocks), @@ -437,31 +438,34 @@ class rocrand_mrg_template : public rocrand_generator_impl_base unsigned long long m_seed; // For caching of Poisson for consecutive generations with the same lambda - poisson_distribution_manager - m_poisson; + poisson_distribution_manager m_poisson; // m_seed from base_type // m_offset from base_type }; -using rocrand_mrg31k3p = rocrand_mrg_template< - rocrand_system_device, - rocrand_device::mrg31k3p_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_mrg31k3p_host = rocrand_mrg_template< - rocrand_system_host, - rocrand_device::mrg31k3p_engine, - rocrand_host::detail::static_default_config_provider_t>; - -using rocrand_mrg32k3a = rocrand_mrg_template< - rocrand_system_device, - rocrand_device::mrg32k3a_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_mrg32k3a_host = rocrand_mrg_template< - rocrand_system_host, - rocrand_device::mrg32k3a_engine, - rocrand_host::detail::default_config_provider>; +using mrg31k3p_generator + = mrg_generator_template>; + +template +using mrg31k3p_generator_host + = mrg_generator_template, + rocrand_device::mrg31k3p_engine, + static_default_config_provider_t>; + +using mrg32k3a_generator + = mrg_generator_template>; + +template +using mrg32k3a_generator_host + = mrg_generator_template, + rocrand_device::mrg32k3a_engine, + default_config_provider>; + +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_MRG_H_ diff --git a/library/src/rng/mt19937.hpp b/library/src/rng/mt19937.hpp index 7b86004e..c2a691bb 100644 --- a/library/src/rng/mt19937.hpp +++ b/library/src/rng/mt19937.hpp @@ -67,11 +67,11 @@ #include -namespace rocrand_host::detail +namespace rocrand_impl::host { /// Computes i % n, i must be in range [0, 2 * n) -MT_FQUALIFIERS unsigned int wrap_n(unsigned int i) +__forceinline__ __device__ unsigned int wrap_n(unsigned int i) { return i - (i < mt19937_constants::n ? 0 : mt19937_constants::n); } @@ -452,14 +452,12 @@ ROCRAND_KERNEL __launch_bounds__((get_block_size( accessor.save(thread_id, engine); } -} // end namespace rocrand_host::detail - template -class rocrand_mt19937_template : public rocrand_generator_impl_base +class mt19937_generator_template : public generator_impl_base { public: - using base_type = rocrand_generator_impl_base; - using octo_engine_type = ::rocrand_host::detail::mt19937_octo_engine; + using base_type = generator_impl_base; + using octo_engine_type = mt19937_octo_engine; static constexpr inline unsigned int threads_per_generator = octo_engine_type::threads_per_generator; @@ -467,24 +465,23 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base /// Number of threads per block for jump_ahead_kernel. Can be tweaked for performance. static constexpr inline unsigned int jump_ahead_thread_count = 128; - rocrand_mt19937_template(unsigned long long seed = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + mt19937_generator_template(unsigned long long seed = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, 0, stream), m_seed(seed) { // Allocate device random number engines auto error = hipMalloc(&m_engines, - m_generator_count * rocrand_host::detail::mt19937_constants::n - * sizeof(unsigned int)); + m_generator_count * mt19937_constants::n * sizeof(unsigned int)); if(error != hipSuccess) { throw ROCRAND_STATUS_ALLOCATION_FAILED; } } - rocrand_mt19937_template(const rocrand_mt19937_template&) = delete; + mt19937_generator_template(const mt19937_generator_template&) = delete; - rocrand_mt19937_template(rocrand_mt19937_template&& other) + mt19937_generator_template(mt19937_generator_template&& other) : base_type(other) , m_engines_initialized(std::exchange(other.m_engines_initialized, false)) , m_engines(std::exchange(other.m_engines, nullptr)) @@ -495,9 +492,9 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base , m_generator_count(other.m_generator_count) {} - rocrand_mt19937_template& operator=(const rocrand_mt19937_template&) = delete; + mt19937_generator_template& operator=(const mt19937_generator_template&) = delete; - rocrand_mt19937_template& operator=(rocrand_mt19937_template&& other) + mt19937_generator_template& operator=(mt19937_generator_template&& other) { *static_cast(this) = other; @@ -512,7 +509,7 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base return *this; } - ~rocrand_mt19937_template() + ~mt19937_generator_template() { if(m_engines != nullptr) { @@ -575,7 +572,7 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base // TODO: make a version for generators that don't support per-type specialization of the configs and use that one // For now: just use the void config, assuming that all configs are the same - rocrand_host::detail::generator_config config; + generator_config config; hipError_t err = ConfigProvider::template host_config(m_stream, m_order, config); if(err != hipSuccess) { @@ -589,8 +586,7 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base } // Allocate device random number engines err = hipMalloc(reinterpret_cast(&m_engines), - m_generator_count * rocrand_host::detail::mt19937_constants::n - * sizeof(unsigned int)); + m_generator_count * mt19937_constants::n * sizeof(unsigned int)); if(err != hipSuccess) { return ROCRAND_STATUS_ALLOCATION_FAILED; @@ -598,8 +594,7 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base unsigned int* d_engines{}; err = hipMalloc(&d_engines, - m_generator_count * rocrand_host::detail::mt19937_constants::n - * sizeof(unsigned int)); + m_generator_count * mt19937_constants::n * sizeof(unsigned int)); if(err != hipSuccess) { return ROCRAND_STATUS_ALLOCATION_FAILED; @@ -624,14 +619,13 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base return ROCRAND_STATUS_INTERNAL_ERROR; } - rocrand_host::detail::dynamic_dispatch( + dynamic_dispatch( m_order, [&, this](auto is_dynamic) { hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_host::detail::jump_ahead_kernel), + HIP_KERNEL_NAME( + jump_ahead_kernel), dim3(m_generator_count), dim3(jump_ahead_thread_count), 0, @@ -657,20 +651,18 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base } // This kernel is not actually tuned for ordering, but config is needed for device-side compile time check of the generator count - rocrand_host::detail::dynamic_dispatch( - m_order, - [&, this](auto is_dynamic) - { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - rocrand_host::detail::init_engines_kernel), - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - d_engines); - }); + dynamic_dispatch(m_order, + [&, this](auto is_dynamic) + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_engines_kernel), + dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + d_engines); + }); err = hipStreamSynchronize(m_stream); if(err != hipSuccess) @@ -704,11 +696,11 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base constexpr unsigned int input_width = Distribution::input_width; constexpr unsigned int output_width = Distribution::output_width; constexpr unsigned int inputs_per_state - = (rocrand_host::detail::mt19937_constants::n / threads_per_generator) / input_width; + = (mt19937_constants::n / threads_per_generator) / input_width; const unsigned int stride = threads_per_generator * m_generator_count; const unsigned int full_stride = stride * inputs_per_state; - rocrand_host::detail::generator_config config; + generator_config config; hipError_t err = ConfigProvider::template host_config(m_stream, m_order, config); if(err != hipSuccess) { @@ -755,13 +747,12 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base // Engines have enough values, generated by the previous generate_long_kernel call. // This kernel does not load and store engines but loads values directly from global // memory. - rocrand_host::detail::dynamic_dispatch( + dynamic_dispatch( m_order, [&, this](auto is_dynamic) { hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_host::detail::generate_short_kernel), + HIP_KERNEL_NAME(generate_short_kernel), dim3(config.blocks), dim3(config.threads), 0, @@ -780,13 +771,12 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base else { // There are not enough generated values or no values at all - rocrand_host::detail::dynamic_dispatch( + dynamic_dispatch( m_order, [&, this](auto is_dynamic) { hipLaunchKernelGGL( - HIP_KERNEL_NAME( - rocrand_host::detail::generate_long_kernel), + HIP_KERNEL_NAME(generate_long_kernel), dim3(config.blocks), dim3(config.threads), 0, @@ -885,7 +875,9 @@ class rocrand_mt19937_template : public rocrand_generator_impl_base unsigned int m_generator_count = 0; }; -using rocrand_mt19937 = rocrand_mt19937_template< - rocrand_host::detail::default_config_provider>; +using mt19937_generator + = mt19937_generator_template>; + +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_MT19937_H_ diff --git a/library/src/rng/mt19937_octo_engine.hpp b/library/src/rng/mt19937_octo_engine.hpp index 9ef2c660..b0d71477 100644 --- a/library/src/rng/mt19937_octo_engine.hpp +++ b/library/src/rng/mt19937_octo_engine.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -51,13 +51,11 @@ #ifndef ROCRAND_RNG_MT19937_OCTO_ENGINE_H_ #define ROCRAND_RNG_MT19937_OCTO_ENGINE_H_ -#define MT_FQUALIFIERS __forceinline__ __device__ - #include #include -namespace rocrand_host::detail +namespace rocrand_impl::host { namespace mt19937_constants { @@ -138,7 +136,7 @@ struct mt19937_octo_engine static constexpr inline unsigned int i568 = 1 + items_per_thread * 10; /// Initialize the octo engine from the engine it shares with seven other threads. - MT_FQUALIFIERS void gather(const unsigned int engine[mt19937_constants::n]) + __forceinline__ __device__ void gather(const unsigned int engine[mt19937_constants::n]) { constexpr unsigned int off_cnt = 11; /// Used to map the \p mt19937_octo_state.mt indices to \p mt19937_state.mt indices. @@ -167,24 +165,24 @@ struct mt19937_octo_engine } /// Returns \p val from thread tid mod 8. - static MT_FQUALIFIERS unsigned int shuffle(unsigned int val, unsigned int tid) + static __forceinline__ __device__ unsigned int shuffle(unsigned int val, unsigned int tid) { return __shfl(val, tid, 8); } /// For thread i, returns \p val from thread (i + 1) mod 8 - static MT_FQUALIFIERS unsigned int shuffle_down(unsigned int val) + static __forceinline__ __device__ unsigned int shuffle_down(unsigned int val) { return __shfl_down(val, 1, 8); } /// For thread i, returns \p val from thread (i - 1) mod 8 - static MT_FQUALIFIERS unsigned int shuffle_up(unsigned int val) + static __forceinline__ __device__ unsigned int shuffle_up(unsigned int val) { return __shfl_up(val, 1, 8); } /// Calculates value of index \p i using values i, (i + 1) % n, and (i + m) % n. - static MT_FQUALIFIERS unsigned int + static __forceinline__ __device__ unsigned int comp(unsigned int mt_i, unsigned int mt_i_1, unsigned int mt_i_m) { const unsigned int y @@ -200,10 +198,10 @@ struct mt19937_octo_engine /// \p idx_m is the local address of m: i + ipt * tid + m. /// \p last_dep_tid_7 is the value of i + ipt * (tid + 1), which is /// required as it is the only value not owned by thread pid. - MT_FQUALIFIERS void comp_vector(unsigned int tid, - unsigned int idx_i, - unsigned int idx_m, - unsigned int last_dep_tid_7) + __forceinline__ __device__ void comp_vector(unsigned int tid, + unsigned int idx_i, + unsigned int idx_m, + unsigned int last_dep_tid_7) { // communicate the dependency for the last value unsigned int last_dep = shuffle_down(m_state.mt[idx_i]); @@ -222,7 +220,7 @@ struct mt19937_octo_engine } /// Eights threads collaborate in computing the n next values. - MT_FQUALIFIERS void gen_next_n() + __forceinline__ __device__ void gen_next_n() { const unsigned int tid = threadIdx.x & 7U; @@ -370,13 +368,13 @@ struct mt19937_octo_engine } /// Return \p i state value without tempering - MT_FQUALIFIERS unsigned int get(unsigned int i) const + __forceinline__ __device__ unsigned int get(unsigned int i) const { return m_state.mt[i]; } /// Perform tempering on y - static MT_FQUALIFIERS unsigned int temper(unsigned int y) + static __forceinline__ __device__ unsigned int temper(unsigned int y) { constexpr unsigned int TEMPERING_MASK_B = 0x9D2C5680U; constexpr unsigned int TEMPERING_MASK_C = 0xEFC60000U; @@ -396,18 +394,19 @@ struct mt19937_octo_engine template struct mt19937_octo_engine_accessor { - MT_FQUALIFIERS explicit mt19937_octo_engine_accessor(unsigned int* _engines) : engines(_engines) + __forceinline__ __device__ explicit mt19937_octo_engine_accessor(unsigned int* _engines) + : engines(_engines) {} /// Load one value \p i of the octo engine \p engine_id from global memory with coalesced /// access - MT_FQUALIFIERS unsigned int load_value(unsigned int engine_id, unsigned int i) const + __forceinline__ __device__ unsigned int load_value(unsigned int engine_id, unsigned int i) const { return engines[i * stride + engine_id]; } /// Load the octo engine from global memory with coalesced access - MT_FQUALIFIERS mt19937_octo_engine load(unsigned int engine_id) const + __forceinline__ __device__ mt19937_octo_engine load(unsigned int engine_id) const { mt19937_octo_engine engine; #pragma unroll @@ -419,7 +418,8 @@ struct mt19937_octo_engine_accessor } /// Save the octo engine to global memory with coalesced access - MT_FQUALIFIERS void save(unsigned int engine_id, const mt19937_octo_engine& engine) const + __forceinline__ __device__ void save(unsigned int engine_id, + const mt19937_octo_engine& engine) const { #pragma unroll for(unsigned int i = 0; i < mt19937_constants::n / threads_per_generator; i++) @@ -435,6 +435,6 @@ struct mt19937_octo_engine_accessor unsigned int* engines; }; -} // end namespace rocrand_host::detail +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_MT19937_OCTO_ENGINE_H_ diff --git a/library/src/rng/mtgp32.hpp b/library/src/rng/mtgp32.hpp index dc08db24..d16b3d6d 100644 --- a/library/src/rng/mtgp32.hpp +++ b/library/src/rng/mtgp32.hpp @@ -55,49 +55,210 @@ #ifndef ROCRAND_RNG_MTGP32_H_ #define ROCRAND_RNG_MTGP32_H_ -#include -#include - -// common.hpp MUST be included prior to the device engines -// to correctly define FQUALIFIERS #include "common.hpp" - -#include -#include - #include "config/config_defaults.hpp" #include "config/mtgp32_config.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" +#include "system.hpp" -namespace rocrand_host::detail +#include +#include +#include + +#include + +#include + +namespace rocrand_impl::host { -typedef ::rocrand_device::mtgp32_engine mtgp32_device_engine; +struct mtgp32_device_engine : ::rocrand_device::mtgp32_engine +{ + // suppress warning about no initialization for __shared__ variables + __host__ __device__ mtgp32_device_engine(){}; + + __host__ __device__ unsigned int next() + { +#ifdef __HIP_DEVICE_COMPILE__ + // all threads in block produce one value and advance the state by that many values + return ::rocrand_device::mtgp32_engine::next(); +#else + // produce one value and advance the state by one value + const unsigned int o = next_thread(0); + m_state.offset = (++m_state.offset) & MTGP_MASK; + return o; +#endif + } +}; + +template +__host__ void generate(unsigned int (&input)[BlockSize][Distribution::input_width], + T (&output)[BlockSize][Distribution::output_width], + Distribution& distribution, + mtgp32_device_engine& engine) +{ + for(unsigned int i = 0; i < Distribution::input_width; i++) + { + for(unsigned int j = 0; j < BlockSize; j++) + { + input[j][i] = engine.next(); + } + } + for(unsigned int j = 0; j < BlockSize; j++) + { + distribution(input[j], output[j]); + } +} + +template +__device__ void generate(unsigned int (&input)[Distribution::input_width], + T (&output)[Distribution::output_width], + Distribution& distribution, + mtgp32_device_engine& engine) +{ + for(unsigned int i = 0; i < Distribution::input_width; i++) + { + input[i] = engine.next(); + } + distribution(input, output); +} + +template +__host__ void save_vec_n(vec_type* vec_data, T (&output)[BlockSize][output_width], size_t index) +{ + for(unsigned int j = 0; j < BlockSize; j++) + { + vec_data[index + j] = *reinterpret_cast(output[j]); + } +} + +template +__device__ void save_vec_n(vec_type* vec_data, T (&output)[output_width], size_t index) +{ + vec_data[index] = *reinterpret_cast(output); +} + +template +__host__ void + save_n(vec_type* vec_data, T (&output)[BlockSize][output_width], size_t index, size_t vec_n) +{ + for(unsigned int j = 0; j < BlockSize; j++) + { + if(index + j < vec_n) + { + vec_data[index + j] = *reinterpret_cast(output[j]); + } + } +} + +template +__device__ void save_n(vec_type* vec_data, T (&output)[output_width], size_t index, size_t vec_n) +{ + if(index < vec_n) + { + vec_data[index] = *reinterpret_cast(output); + } +} + +template +__host__ __device__ void save_head_tail_impl(T (&output)[output_width], + size_t index, + T* data, + size_t n, + size_t head_size, + size_t tail_size, + size_t vec_n_up) +{ + if(index == vec_n_up) + { + for(unsigned int o = 0; o < output_width; o++) + { + if(o < head_size) + { + data[o] = output[o]; + } + } + } + + if(index == vec_n_up + 1) + { + for(unsigned int o = 0; o < output_width; o++) + { + if(o < tail_size) + { + data[n - tail_size + o] = output[o]; + } + } + } +} + +template +__host__ void save_head_tail(T (&output)[BlockSize][output_width], + size_t index, + T* data, + size_t n, + size_t head_size, + size_t tail_size, + size_t vec_n_up) +{ + for(unsigned int j = 0; j < BlockSize; j++) + { + save_head_tail_impl(output[j], index + j, data, n, head_size, tail_size, vec_n_up); + } +} + +template +__device__ void save_head_tail(T (&output)[output_width], + size_t index, + T* data, + size_t n, + size_t head_size, + size_t tail_size, + size_t vec_n_up) +{ + save_head_tail_impl(output, index, data, n, head_size, tail_size, vec_n_up); +} template -ROCRAND_KERNEL - __launch_bounds__((get_block_size(IsDynamic))) void generate_kernel( - mtgp32_device_engine* engines, T* data, const size_t n, Distribution distribution) +__host__ __device__ void generate_mtgp(dim3 block_idx, + dim3 thread_idx, + dim3 grid_dim, + dim3 /*block_dim*/, + mtgp32_device_engine* engines, + T* data, + const size_t n, + Distribution distribution) { + static_assert(is_single_tile_config(IsDynamic), + "This kernel should only be used with single tile configs"); constexpr unsigned int BlockSize = get_block_size(IsDynamic); constexpr unsigned int input_width = Distribution::input_width; constexpr unsigned int output_width = Distribution::output_width; using vec_type = aligned_vec_type; - const unsigned int engine_id = blockIdx.x; - const unsigned int stride = gridDim.x * BlockSize; - size_t index = blockIdx.x * BlockSize + threadIdx.x; + const unsigned int engine_id = block_idx.x; + const unsigned int stride = grid_dim.x * BlockSize; + size_t index = block_idx.x * BlockSize + thread_idx.x; - // Load device engine - __shared__ mtgp32_device_engine engine; +// Load device engine +#ifdef __HIP_DEVICE_COMPILE__ + __shared__ +#endif + mtgp32_device_engine engine; engine.copy(&engines[engine_id]); +#ifdef __HIP_DEVICE_COMPILE__ unsigned int input[input_width]; T output[output_width]; +#else + // Due to the lock-step-like behavior of the device generator, the first value of a distribution + // for thread i is i, the next value is i + BlockSize, etc. Hence, all values must be cached for the host generator. + unsigned int input[BlockSize][input_width]; + T output[BlockSize][output_width]; +#endif const uintptr_t uintptr = reinterpret_cast(data); const size_t misalignment = (output_width - uintptr / sizeof(T) % output_width) % output_width; @@ -111,91 +272,49 @@ ROCRAND_KERNEL const size_t vec_n_up = remainder_value == 0 ? vec_n_down : (vec_n_down + BlockSize); vec_type* vec_data = reinterpret_cast(data + misalignment); + // Generate and store all aligned vector multiples while(index < vec_n_down) { - for(unsigned int i = 0; i < input_width; i++) - { - input[i] = engine(); - } - distribution(input, output); - - vec_data[index] = *reinterpret_cast(output); - // Next position + generate(input, output, distribution, engine); + save_vec_n(vec_data, output, index); index += stride; } + // Generate and store all aligned vector multiples for which not all threads participate in storing if(index < vec_n_up) { - for(unsigned int i = 0; i < input_width; i++) - { - input[i] = engine(); - } - distribution(input, output); - - // All threads generate (hence call __syncthreads) but not all write - if(index < vec_n) - { - vec_data[index] = *reinterpret_cast(output); - } - // Next position + generate(input, output, distribution, engine); + save_n(vec_data, output, index, vec_n); index += stride; } - - // Check if we need to save head and tail. + // Generate and store the remaining T that are not aligned to vec_type if(output_width > 1 && (head_size > 0 || tail_size > 0)) { - for(unsigned int i = 0; i < input_width; i++) - { - input[i] = engine(); - } - distribution(input, output); - - // If data is not aligned by sizeof(vec_type) - if(index == vec_n_up) - { - for(unsigned int o = 0; o < output_width; o++) - { - if(o < head_size) - { - data[o] = output[o]; - } - } - } - - if(index == vec_n_up + 1) - { - for(unsigned int o = 0; o < output_width; o++) - { - if(o < tail_size) - { - data[n - tail_size + o] = output[o]; - } - } - } + generate(input, output, distribution, engine); + save_head_tail(output, index, data, n, head_size, tail_size, vec_n_up); } // Save engine with its state engines[engine_id].copy(&engine); } -} // end namespace rocrand_host::detail - -template -class rocrand_mtgp32_template : public rocrand_generator_impl_base +template +class mtgp32_generator_template : public generator_impl_base { public: - using base_type = rocrand_generator_impl_base; - using engine_type = ::rocrand_host::detail::mtgp32_device_engine; - - rocrand_mtgp32_template(unsigned long long seed = 0, - unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + using base_type = generator_impl_base; + using engine_type = mtgp32_device_engine; + using system_type = System; + + mtgp32_generator_template(unsigned long long seed = 0, + unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream), m_seed(seed) {} - rocrand_mtgp32_template(const rocrand_mtgp32_template&) = delete; + mtgp32_generator_template(const mtgp32_generator_template&) = delete; - rocrand_mtgp32_template(rocrand_mtgp32_template&& other) + mtgp32_generator_template(mtgp32_generator_template&& other) : base_type(other) , m_engines_initialized(other.m_engines_initialized) , m_engines(other.m_engines) @@ -207,9 +326,9 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base other.m_engines = nullptr; } - rocrand_mtgp32_template& operator=(const rocrand_mtgp32_template&) = delete; + mtgp32_generator_template& operator=(const mtgp32_generator_template&) = delete; - rocrand_mtgp32_template& operator=(rocrand_mtgp32_template&& other) + mtgp32_generator_template& operator=(mtgp32_generator_template&& other) { *static_cast(this) = other; m_engines_initialized = other.m_engines_initialized; @@ -224,11 +343,12 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base return *this; } - ~rocrand_mtgp32_template() + ~mtgp32_generator_template() { if(m_engines != nullptr) { - ROCRAND_HIP_FATAL_ASSERT(hipFree(m_engines)); + system_type::free(m_engines); + m_engines = nullptr; } } @@ -263,6 +383,10 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base rocrand_status set_order(rocrand_ordering order) { + if(!system_type::is_device() && order == ROCRAND_ORDERING_PSEUDO_DYNAMIC) + { + return ROCRAND_STATUS_OUT_OF_RANGE; + } static constexpr std::array supported_orderings{ ROCRAND_ORDERING_PSEUDO_DEFAULT, ROCRAND_ORDERING_PSEUDO_DYNAMIC, @@ -286,7 +410,7 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base return ROCRAND_STATUS_SUCCESS; } - rocrand_host::detail::generator_config config; + generator_config config; // Assuming that the config is the same for every type. hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); @@ -301,16 +425,16 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base return ROCRAND_STATUS_ALLOCATION_FAILED; } - error = hipMalloc(&m_engines, sizeof(engine_type) * m_engines_size); - if(error != hipSuccess) + rocrand_status status = system_type::alloc(&m_engines, m_engines_size); + if(status != ROCRAND_STATUS_SUCCESS) { - return ROCRAND_STATUS_ALLOCATION_FAILED; + return status; } - rocrand_status status = rocrand_make_state_mtgp32(m_engines, - mtgp32dc_params_fast_11213, - m_engines_size, - m_seed); + status = rocrand_make_state_mtgp32(m_engines, + mtgp32dc_params_fast_11213, + m_engines_size, + m_seed); if(status != ROCRAND_STATUS_SUCCESS) { return ROCRAND_STATUS_ALLOCATION_FAILED; @@ -331,34 +455,38 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base return status; } - rocrand_host::detail::generator_config config; + generator_config config; const hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; } - rocrand_host::detail::dynamic_dispatch( - m_order, - [&, this](auto is_dynamic) - { - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - rocrand_host::detail::generate_kernel), - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - data, - data_size, - distribution); - }); + // The host generator uses a block of size one to emulate a device generator that uses a shared memory state + const dim3 threads + = std::is_same_v ? config.threads : dim3(1); + status + = dynamic_dispatch(m_order, + [&, this](auto is_dynamic) + { + return system_type::template launch< + generate_mtgp, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(threads), + 0, + m_stream, + m_engines, + data, + data_size, + distribution); + }); // Check kernel status - if(hipGetLastError() != hipSuccess) + if(status != ROCRAND_STATUS_SUCCESS) { - return ROCRAND_STATUS_LAUNCH_FAILURE; + return status; } return ROCRAND_STATUS_SUCCESS; @@ -424,13 +552,20 @@ class rocrand_mtgp32_template : public rocrand_generator_impl_base unsigned long long m_seed; // For caching of Poisson for consecutive generations with the same lambda - poisson_distribution_manager<> m_poisson; + poisson_distribution_manager m_poisson; // m_seed from base_type // m_offset from base_type }; -using rocrand_mtgp32 = rocrand_mtgp32_template< - rocrand_host::detail::default_config_provider>; +using mtgp32_generator + = mtgp32_generator_template>; +template +using mtgp32_generator_host + = mtgp32_generator_template, + default_config_provider>; + +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_MTGP32_H_ diff --git a/library/src/rng/philox4x32_10.hpp b/library/src/rng/philox4x32_10.hpp index 89684328..c2aa578d 100644 --- a/library/src/rng/philox4x32_10.hpp +++ b/library/src/rng/philox4x32_10.hpp @@ -57,18 +57,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" #include "system.hpp" #include +#include #include #include -namespace rocrand_host::detail +namespace rocrand_impl::host { struct philox4x32_10_device_engine : public ::rocrand_device::philox4x32_10_engine { @@ -212,20 +212,18 @@ __host__ __device__ void generate_philox(dim3 block_idx, } } -} // end namespace rocrand_host::detail - template -class rocrand_philox4x32_10_template : public rocrand_generator_impl_base +class philox4x32_10_generator_template : public generator_impl_base { public: - using base_type = rocrand_generator_impl_base; - using engine_type = ::rocrand_host::detail::philox4x32_10_device_engine; + using base_type = generator_impl_base; + using engine_type = philox4x32_10_device_engine; using system_type = System; - rocrand_philox4x32_10_template(unsigned long long seed = 0, - unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + philox4x32_10_generator_template(unsigned long long seed = 0, + unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream), m_seed(seed) {} @@ -253,6 +251,10 @@ class rocrand_philox4x32_10_template : public rocrand_generator_impl_base rocrand_status set_order(rocrand_ordering order) { + if(!system_type::is_device() && order == ROCRAND_ORDERING_PSEUDO_DYNAMIC) + { + return ROCRAND_STATUS_OUT_OF_RANGE; + } static constexpr std::array supported_orderings{ ROCRAND_ORDERING_PSEUDO_DEFAULT, ROCRAND_ORDERING_PSEUDO_DYNAMIC, @@ -288,29 +290,28 @@ class rocrand_philox4x32_10_template : public rocrand_generator_impl_base { return status; } - rocrand_host::detail::generator_config config; + generator_config config; const hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; } - status = rocrand_host::detail::dynamic_dispatch( + status = dynamic_dispatch( m_order, [&, this](auto is_dynamic) { - return system_type::template launch< - rocrand_host::detail::generate_philox, - ConfigProvider, - T, - is_dynamic>(dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engine, - data, - data_size, - distribution); + return system_type::template launch, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engine, + data, + data_size, + distribution); }); if(status != ROCRAND_STATUS_SUCCESS) { @@ -386,19 +387,21 @@ class rocrand_philox4x32_10_template : public rocrand_generator_impl_base unsigned long long m_seed; // For caching of Poisson for consecutive generations with the same lambda - poisson_distribution_manager - m_poisson; + poisson_distribution_manager m_poisson; // m_seed from base_type // m_offset from base_type }; -using rocrand_philox4x32_10 = rocrand_philox4x32_10_template< - rocrand_system_device, - rocrand_host::detail::default_config_provider>; +using philox4x32_10_generator + = philox4x32_10_generator_template>; + +template +using philox4x32_10_generator_host + = philox4x32_10_generator_template, + default_config_provider>; -using rocrand_philox4x32_10_host = rocrand_philox4x32_10_template< - rocrand_system_host, - rocrand_host::detail::default_config_provider>; +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_PHILOX4X32_10_H_ diff --git a/library/src/rng/sobol.hpp b/library/src/rng/sobol.hpp index 985cc861..290d5546 100644 --- a/library/src/rng/sobol.hpp +++ b/library/src/rng/sobol.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -23,12 +23,15 @@ #include "common.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" #include "system.hpp" -#include +#include +#include +#include +#include + #include #include #include @@ -36,6 +39,8 @@ #include #include +#include + #include #include @@ -43,7 +48,7 @@ #include #include -namespace rocrand_host::detail +namespace rocrand_impl::host { template{}(); + system::syncthreads{}(); return shared_vectors; } else @@ -419,24 +424,20 @@ class sobol_constant_accessor } }; -} // end namespace rocrand_host::detail - template -class rocrand_sobol_template : public rocrand_generator_impl_base +class sobol_generator_template : public generator_impl_base { public: static constexpr inline bool is_scrambled = Scrambled; using system_type = System; - using base_type = rocrand_generator_impl_base; - using engine_type - = ::rocrand_host::detail::sobol_device_engine_t; + using base_type = generator_impl_base; + using engine_type = sobol_device_engine_t; using constant_type = std::conditional_t; - using constant_accessor - = rocrand_host::detail::sobol_constant_accessor; + using constant_accessor = sobol_constant_accessor; - rocrand_sobol_template(unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_QUASI_DEFAULT, - hipStream_t stream = 0) + sobol_generator_template(unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_QUASI_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream) { rocrand_status status = get_constants().get_direction_vectors(&m_direction_vectors); @@ -494,7 +495,7 @@ class rocrand_sobol_template : public rocrand_generator_impl_base rocrand_status set_order(rocrand_ordering order) { - if(!rocrand_host::detail::is_ordering_quasi(order)) + if(!is_ordering_quasi(order)) { return ROCRAND_STATUS_OUT_OF_RANGE; } @@ -573,26 +574,24 @@ class rocrand_sobol_template : public rocrand_generator_impl_base const uint32_t blocks_x = next_power2((blocks + m_dimensions - 1) / m_dimensions); const uint32_t blocks_y = m_dimensions; - using block_size_provider - = rocrand_host::detail::static_block_size_config_provider; - - status - = system_type::template launch, - block_size_provider>(dim3(blocks_x, blocks_y), - dim3(threads), - shared_mem_bytes, - m_stream, - data, - size, - m_direction_vectors, - m_scramble_constants, - m_current_offset, - distribution); + using block_size_provider = static_block_size_config_provider; + + status = system_type::template launch, + block_size_provider>(dim3(blocks_x, blocks_y), + dim3(threads), + shared_mem_bytes, + m_stream, + data, + size, + m_direction_vectors, + m_scramble_constants, + m_current_offset, + distribution); // Check kernel status if(status != ROCRAND_STATUS_SUCCESS) { @@ -657,7 +656,7 @@ class rocrand_sobol_template : public rocrand_generator_impl_base const constant_type* m_scramble_constants = nullptr; // For caching of Poisson for consecutive generations with the same lambda - poisson_distribution_manager m_poisson; + poisson_distribution_manager m_poisson; // m_offset from base_type @@ -672,13 +671,23 @@ class rocrand_sobol_template : public rocrand_generator_impl_base } }; -using rocrand_sobol32 = rocrand_sobol_template; -using rocrand_sobol64 = rocrand_sobol_template; -using rocrand_scrambled_sobol32 = rocrand_sobol_template; -using rocrand_scrambled_sobol64 = rocrand_sobol_template; -using rocrand_sobol32_host = rocrand_sobol_template; -using rocrand_sobol64_host = rocrand_sobol_template; -using rocrand_scrambled_sobol32_host = rocrand_sobol_template; -using rocrand_scrambled_sobol64_host = rocrand_sobol_template; +using sobol32_generator = sobol_generator_template; +using sobol64_generator = sobol_generator_template; +using scrambled_sobol32_generator = sobol_generator_template; +using scrambled_sobol64_generator = sobol_generator_template; +template +using sobol32_generator_host + = sobol_generator_template, false, false>; +template +using sobol64_generator_host + = sobol_generator_template, true, false>; +template +using scrambled_sobol32_generator_host + = sobol_generator_template, false, true>; +template +using scrambled_sobol64_generator_host + = sobol_generator_template, true, true>; + +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_SOBOL_H_ diff --git a/library/src/rng/system.hpp b/library/src/rng/system.hpp index c3b95018..424d0f0d 100644 --- a/library/src/rng/system.hpp +++ b/library/src/rng/system.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -46,7 +46,13 @@ #include -struct rocrand_system_host +namespace rocrand_impl::system +{ + +/// \tparam UseHostFunc If true, launching will enqueue the kernel in the stream. Otherwise, +/// execute the kernel synchronously. +template +struct host_system { static constexpr bool is_device() { @@ -90,15 +96,15 @@ struct rocrand_system_host } template, + typename ConfigProvider + = host::static_block_size_config_provider, typename T = unsigned int, bool IsDynamic = false, typename... Args> - static rocrand_status launch(dim3 num_blocks, - dim3 num_threads, - unsigned int shared_bytes, - hipStream_t stream, + static rocrand_status launch(dim3 num_blocks, + dim3 num_threads, + unsigned int shared_bytes, + [[maybe_unused]] hipStream_t stream, Args... args) { (void)IsDynamic; // Not relevant on host launches @@ -144,15 +150,22 @@ struct rocrand_system_host auto* kernel_args = new KernelArgsType{num_blocks, num_threads, std::tuple(args...)}; - hipError_t status = hipLaunchHostFunc(stream, kernel_callback, kernel_args); + if constexpr(UseHostFunc) + { + hipError_t status = hipLaunchHostFunc(stream, kernel_callback, kernel_args); - if(status != hipSuccess) + if(status != hipSuccess) + { + // At this point, if the callback has not been invoked, there will be a memory + // leak. It is unclear whether hipLaunchHostFunc can return an error after the + // callback has already been invoked, but in such case there would be a double + // free (crash) instead of a memory leak, so we will just leak it. + return ROCRAND_STATUS_LAUNCH_FAILURE; + } + } + else { - // At this point, if the callback has not been invoked, there will be a memory - // leak. It is unclear whether hipLaunchHostFunc can return an error after the - // callback has already been invoked, but in such case there would be a double - // free (crash) instead of a memory leak, so we will just leak it. - return ROCRAND_STATUS_LAUNCH_FAILURE; + kernel_callback(kernel_args); } return ROCRAND_STATUS_SUCCESS; @@ -163,15 +176,15 @@ namespace detail { template -__global__ __launch_bounds__((rocrand_host::detail::get_block_size( - IsDynamic))) void kernel_wrapper(Args... args) +__global__ __launch_bounds__( + (host::get_block_size(IsDynamic))) void kernel_wrapper(Args... args) { Kernel(blockIdx, threadIdx, gridDim, blockDim, args...); } } // namespace detail -struct rocrand_system_device +struct device_system { static constexpr bool is_device() { @@ -196,8 +209,8 @@ struct rocrand_system_device } template, + typename ConfigProvider + = host::static_block_size_config_provider, typename T = unsigned int, bool IsDynamic = false, typename... Args> @@ -235,4 +248,6 @@ struct syncthreads void operator()() {} }; +} // namespace rocrand_impl::system + #endif diff --git a/library/src/rng/threefry.hpp b/library/src/rng/threefry.hpp index 58272574..8a0ea7b1 100644 --- a/library/src/rng/threefry.hpp +++ b/library/src/rng/threefry.hpp @@ -28,19 +28,22 @@ #include "common.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" #include "system.hpp" #include "utils/cpp_utils.hpp" #include +#include +#include +#include +#include #include #include -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -194,21 +197,19 @@ __host__ __device__ void generate_threefry(dim3 block_idx, } } -} // end namespace rocrand_host::detail - template -class rocrand_threefry_template : public rocrand_generator_impl_base +class threefry_generator_template : public generator_impl_base { public: - using base_type = rocrand_generator_impl_base; + using base_type = generator_impl_base; using engine_type = Engine; using scalar_type = typename engine_type::scalar_type; using system_type = System; - rocrand_threefry_template(unsigned long long seed = 0, - unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + threefry_generator_template(unsigned long long seed = 0, + unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream), m_seed(seed) {} @@ -257,6 +258,10 @@ class rocrand_threefry_template : public rocrand_generator_impl_base rocrand_status set_order(rocrand_ordering order) { + if(!system_type::is_device() && order == ROCRAND_ORDERING_PSEUDO_DYNAMIC) + { + return ROCRAND_STATUS_OUT_OF_RANGE; + } static constexpr std::array supported_orderings{ ROCRAND_ORDERING_PSEUDO_DEFAULT, ROCRAND_ORDERING_PSEUDO_DYNAMIC, @@ -300,7 +305,7 @@ class rocrand_threefry_template : public rocrand_generator_impl_base return status; } - rocrand_host::detail::generator_config config; + generator_config config; const hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); if(error != hipSuccess) @@ -308,23 +313,22 @@ class rocrand_threefry_template : public rocrand_generator_impl_base return ROCRAND_STATUS_INTERNAL_ERROR; } - status = rocrand_host::detail::dynamic_dispatch( - m_order, - [&, this](auto is_dynamic) - { - return system_type::template launch< - rocrand_host::detail::generate_threefry, - ConfigProvider, - T, - is_dynamic>(dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engine, - data, - data_size, - distribution); - }); + status = dynamic_dispatch(m_order, + [&, this](auto is_dynamic) + { + return system_type::template launch< + generate_threefry, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engine, + data, + data_size, + distribution); + }); // Check kernel status if(status != ROCRAND_STATUS_SUCCESS) @@ -398,8 +402,7 @@ class rocrand_threefry_template : public rocrand_generator_impl_base unsigned long long m_seed; // For caching of Poisson for consecutive generations with the same lambda - poisson_distribution_manager - m_poisson; + poisson_distribution_manager m_poisson; // m_seed from base_type // m_offset from base_type @@ -415,44 +418,50 @@ constexpr inline unsigned int log_normal_distribution_max_input_width = 2; -using rocrand_threefry2x32_20 = rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry2x32_20_host = rocrand_threefry_template< - rocrand_system_host, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry2x64_20 = rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry2x64_20_host = rocrand_threefry_template< - rocrand_system_host, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry4x32_20 = rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry4x32_20_host = rocrand_threefry_template< - rocrand_system_host, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry4x64_20 = rocrand_threefry_template< - rocrand_system_device, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; - -using rocrand_threefry4x64_20_host = rocrand_threefry_template< - rocrand_system_host, - rocrand_host::detail::threefry_device_engine, - rocrand_host::detail::default_config_provider>; +using threefry2x32_20_generator + = threefry_generator_template, + default_config_provider>; + +template +using threefry2x32_20_generator_host + = threefry_generator_template, + threefry_device_engine, + default_config_provider>; + +using threefry2x64_20_generator + = threefry_generator_template, + default_config_provider>; + +template +using threefry2x64_20_generator_host + = threefry_generator_template, + threefry_device_engine, + default_config_provider>; + +using threefry4x32_20_generator + = threefry_generator_template, + default_config_provider>; + +template +using threefry4x32_20_generator_host + = threefry_generator_template, + threefry_device_engine, + default_config_provider>; + +using threefry4x64_20_generator + = threefry_generator_template, + default_config_provider>; + +template +using threefry4x64_20_generator_host + = threefry_generator_template, + threefry_device_engine, + default_config_provider>; + +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_THREEFRY2X32_20_H_ diff --git a/library/src/rng/utils/cpp_utils.hpp b/library/src/rng/utils/cpp_utils.hpp index 4d3e6a07..341cb9f1 100644 --- a/library/src/rng/utils/cpp_utils.hpp +++ b/library/src/rng/utils/cpp_utils.hpp @@ -37,7 +37,7 @@ #include #include -namespace cpp_utils +namespace rocrand_impl::cpp_utils { /// \brief Invoke a function on each element of a \c std::tuple separately. @@ -249,6 +249,6 @@ struct vec_wrapper template __host__ __device__ vec_wrapper(V) -> vec_wrapper; -} // end namespace cpp_utils +} // end namespace rocrand_impl::cpp_utils #endif // ROCRAND_RNG_CPP_UTILS_HPP_ diff --git a/library/src/rng/utils/threedim_iterator.hpp b/library/src/rng/utils/threedim_iterator.hpp index 517dec46..c86fb096 100644 --- a/library/src/rng/utils/threedim_iterator.hpp +++ b/library/src/rng/utils/threedim_iterator.hpp @@ -28,7 +28,7 @@ #include -namespace cpp_utils +namespace rocrand_impl::cpp_utils { /// \brief A random access iterator that converts linear indices to three-dimensional indexing. @@ -180,6 +180,6 @@ class threedim_iterator size_t m_index; }; -} // end namespace cpp_utils +} // end namespace rocrand_impl::cpp_utils #endif // ROCRAND_RNG_UTILS_THREEDIM_ITERATOR_ diff --git a/library/src/rng/utils/unreachable.hpp b/library/src/rng/utils/unreachable.hpp index 7fac5d83..c5dced06 100644 --- a/library/src/rng/utils/unreachable.hpp +++ b/library/src/rng/utils/unreachable.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -45,12 +45,15 @@ #define ROCRAND_BUILTIN_TRAP std::abort(); #endif +namespace rocrand_impl +{ + [[noreturn]] #if defined(__CUDACC__) || defined(__HIP__) __host__ __device__ #endif inline static void - rocrand_unreachable_internal(const char* msg, const char* file, unsigned line) + unreachable_internal(const char* msg, const char* file, unsigned line) { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) (void)msg; @@ -67,10 +70,12 @@ __host__ __device__ #endif } +} // namespace rocrand_impl + #if !defined(NDEBUG) - #define ROCRAND_UNREACHABLE(msg) rocrand_unreachable_internal(msg, __FILE__, __LINE__) + #define ROCRAND_UNREACHABLE(msg) ::rocrand_impl::unreachable_internal(msg, __FILE__, __LINE__) #elif !defined(ROCRAND_BUILTIN_UNREACHABLE) - #define ROCRAND_UNREACHABLE(msg) rocrand_unreachable_internal(msg, __FILE__, __LINE__) + #define ROCRAND_UNREACHABLE(msg) ::rocrand_impl::unreachable_internal(msg, __FILE__, __LINE__) #else #define ROCRAND_UNREACHABLE(msg) \ do \ diff --git a/library/src/rng/xorwow.hpp b/library/src/rng/xorwow.hpp index 228d2a5b..4113c9bd 100644 --- a/library/src/rng/xorwow.hpp +++ b/library/src/rng/xorwow.hpp @@ -21,21 +21,22 @@ #ifndef ROCRAND_RNG_XORWOW_H_ #define ROCRAND_RNG_XORWOW_H_ -#include -#include - -#include - #include "config/xorwow_config.hpp" #include "common.hpp" #include "config_types.hpp" -#include "device_engines.hpp" #include "distributions.hpp" #include "generator_type.hpp" #include "system.hpp" -namespace rocrand_host::detail +#include +#include + +#include + +#include + +namespace rocrand_impl::host { typedef ::rocrand_device::xorwow_engine xorwow_device_engine; @@ -161,26 +162,24 @@ __host__ __device__ void generate_xorwow(dim3 block_idx, engines[engine_id] = engine; } -} // end namespace rocrand_host::detail - template -class rocrand_xorwow_template : public rocrand_generator_impl_base +class xorwow_generator_template : public generator_impl_base { public: - using base_type = rocrand_generator_impl_base; - using engine_type = ::rocrand_host::detail::xorwow_device_engine; + using base_type = generator_impl_base; + using engine_type = xorwow_device_engine; using system_type = System; - rocrand_xorwow_template(unsigned long long seed = 0, - unsigned long long offset = 0, - rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, - hipStream_t stream = 0) + xorwow_generator_template(unsigned long long seed = 0, + unsigned long long offset = 0, + rocrand_ordering order = ROCRAND_ORDERING_PSEUDO_DEFAULT, + hipStream_t stream = 0) : base_type(order, offset, stream), m_seed(seed) {} - rocrand_xorwow_template(const rocrand_xorwow_template&) = delete; + xorwow_generator_template(const xorwow_generator_template&) = delete; - rocrand_xorwow_template(rocrand_xorwow_template&& other) + xorwow_generator_template(xorwow_generator_template&& other) : base_type(other) , m_engines_initialized(other.m_engines_initialized) , m_engines(other.m_engines) @@ -193,9 +192,9 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base other.m_engines = nullptr; } - rocrand_xorwow_template& operator=(const rocrand_xorwow_template&) = delete; + xorwow_generator_template& operator=(const xorwow_generator_template&) = delete; - rocrand_xorwow_template& operator=(rocrand_xorwow_template&& other) + xorwow_generator_template& operator=(xorwow_generator_template&& other) { *static_cast(this) = other; m_engines_initialized = other.m_engines_initialized; @@ -211,7 +210,7 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base return *this; } - ~rocrand_xorwow_template() + ~xorwow_generator_template() { if(m_engines != nullptr) { @@ -244,6 +243,10 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base rocrand_status set_order(rocrand_ordering order) { + if(!system_type::is_device() && order == ROCRAND_ORDERING_PSEUDO_DYNAMIC) + { + return ROCRAND_STATUS_OUT_OF_RANGE; + } static constexpr std::array supported_orderings{ ROCRAND_ORDERING_PSEUDO_DEFAULT, ROCRAND_ORDERING_PSEUDO_DYNAMIC, @@ -269,9 +272,7 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base } hipError_t error - = rocrand_host::detail::get_least_common_grid_size(m_stream, - m_order, - m_engines_size); + = get_least_common_grid_size(m_stream, m_order, m_engines_size); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; @@ -292,9 +293,8 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base constexpr unsigned int init_threads = ROCRAND_DEFAULT_MAX_BLOCK_SIZE; const unsigned int init_blocks = (m_engines_size + init_threads - 1) / init_threads; - status = system_type::template launch< - rocrand_host::detail::init_xorwow_engines, - rocrand_host::detail::static_block_size_config_provider>( + status = system_type::template launch>( dim3(init_blocks), dim3(init_threads), 0, @@ -323,32 +323,32 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base return status; } - rocrand_host::detail::generator_config config; + generator_config config; const hipError_t error = ConfigProvider::template host_config(m_stream, m_order, config); if(error != hipSuccess) { return ROCRAND_STATUS_INTERNAL_ERROR; } - status = rocrand_host::detail::dynamic_dispatch( - m_order, - [&, this](auto is_dynamic) - { - return system_type::template launch< - rocrand_host::detail:: - generate_xorwow, - ConfigProvider, - T, - is_dynamic>(dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - m_start_engine_id, - data, - data_size, - distribution); - }); + status + = dynamic_dispatch(m_order, + [&, this](auto is_dynamic) + { + return system_type::template launch< + + generate_xorwow, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + m_start_engine_id, + data, + data_size, + distribution); + }); // Check kernel status if(status != ROCRAND_STATUS_SUCCESS) @@ -427,19 +427,21 @@ class rocrand_xorwow_template : public rocrand_generator_impl_base unsigned long long m_seed; // For caching of Poisson for consecutive generations with the same lambda - poisson_distribution_manager - m_poisson; + poisson_distribution_manager m_poisson; // m_seed from base_type // m_offset from base_type }; -using rocrand_xorwow = rocrand_xorwow_template< - rocrand_system_device, - rocrand_host::detail::default_config_provider>; +using xorwow_generator + = xorwow_generator_template>; + +template +using xorwow_generator_host + = xorwow_generator_template, + default_config_provider>; -using rocrand_xorwow_host = rocrand_xorwow_template< - rocrand_system_host, - rocrand_host::detail::default_config_provider>; +} // namespace rocrand_impl::host #endif // ROCRAND_RNG_XORWOW_H_ diff --git a/library/src/rocrand.cpp b/library/src/rocrand.cpp index 1e360353..f3b81437 100644 --- a/library/src/rocrand.cpp +++ b/library/src/rocrand.cpp @@ -26,65 +26,60 @@ #include #include -#if defined(__cplusplus) -extern "C" { -#endif /* __cplusplus */ - -rocrand_status ROCRANDAPI rocrand_create_generator(rocrand_generator* generator, - rocrand_rng_type rng_type) +template +rocrand_status create_generator_host(rocrand_generator* generator, rocrand_rng_type rng_type) { + using namespace rocrand_impl::host; try { // clang-format off switch(rng_type) { + case ROCRAND_RNG_PSEUDO_LFSR113: + *generator = new generator_type>(); + break; case ROCRAND_RNG_PSEUDO_PHILOX4_32_10: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; case ROCRAND_RNG_PSEUDO_MRG31K3P: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; case ROCRAND_RNG_PSEUDO_MRG32K3A: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; - case ROCRAND_RNG_PSEUDO_DEFAULT: - case ROCRAND_RNG_PSEUDO_XORWOW: - *generator = new rocrand_generator_type(); + case ROCRAND_RNG_PSEUDO_THREEFRY2_32_20: + *generator = new generator_type>(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY2_64_20: + *generator = new generator_type>(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY4_32_20: + *generator = new generator_type>(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY4_64_20: + *generator = new generator_type>(); break; case ROCRAND_RNG_QUASI_DEFAULT: case ROCRAND_RNG_QUASI_SOBOL32: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; case ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; case ROCRAND_RNG_QUASI_SOBOL64: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; case ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64: - *generator = new rocrand_generator_type(); + *generator = new generator_type>(); break; - case ROCRAND_RNG_PSEUDO_MTGP32: - *generator = new rocrand_generator_type(); + case ROCRAND_RNG_PSEUDO_DEFAULT: + case ROCRAND_RNG_PSEUDO_XORWOW: + *generator = new generator_type>(); break; - case ROCRAND_RNG_PSEUDO_LFSR113: - *generator = new rocrand_generator_type(); + case ROCRAND_RNG_PSEUDO_MTGP32: + *generator = new generator_type>(); break; case ROCRAND_RNG_PSEUDO_MT19937: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY2_32_20: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY2_64_20: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY4_32_20: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY4_64_20: - *generator = new rocrand_generator_type(); - break; default: return ROCRAND_STATUS_TYPE_ERROR; } @@ -101,57 +96,74 @@ rocrand_status ROCRANDAPI rocrand_create_generator(rocrand_generator* generator, return ROCRAND_STATUS_SUCCESS; } -rocrand_status ROCRANDAPI rocrand_create_generator_host(rocrand_generator* generator, - rocrand_rng_type rng_type) +rocrand_status create_generator_host(rocrand_generator* generator, + rocrand_rng_type rng_type, + bool use_host_func) +{ + return use_host_func ? create_generator_host(generator, rng_type) + : create_generator_host(generator, rng_type); +} + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +rocrand_status ROCRANDAPI rocrand_create_generator(rocrand_generator* generator, + rocrand_rng_type rng_type) { + using namespace rocrand_impl::host; try { // clang-format off switch(rng_type) { - case ROCRAND_RNG_PSEUDO_LFSR113: - *generator = new rocrand_generator_type(); - break; case ROCRAND_RNG_PSEUDO_PHILOX4_32_10: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; case ROCRAND_RNG_PSEUDO_MRG31K3P: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; case ROCRAND_RNG_PSEUDO_MRG32K3A: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY2_32_20: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY2_64_20: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; - case ROCRAND_RNG_PSEUDO_THREEFRY4_32_20: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_THREEFRY4_64_20: - *generator = new rocrand_generator_type(); + case ROCRAND_RNG_PSEUDO_DEFAULT: + case ROCRAND_RNG_PSEUDO_XORWOW: + *generator = new generator_type(); break; case ROCRAND_RNG_QUASI_DEFAULT: case ROCRAND_RNG_QUASI_SOBOL32: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; case ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; case ROCRAND_RNG_QUASI_SOBOL64: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; case ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64: - *generator = new rocrand_generator_type(); - break; - case ROCRAND_RNG_PSEUDO_DEFAULT: - case ROCRAND_RNG_PSEUDO_XORWOW: - *generator = new rocrand_generator_type(); + *generator = new generator_type(); break; case ROCRAND_RNG_PSEUDO_MTGP32: + *generator = new generator_type(); + break; + case ROCRAND_RNG_PSEUDO_LFSR113: + *generator = new generator_type(); + break; case ROCRAND_RNG_PSEUDO_MT19937: + *generator = new generator_type(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY2_32_20: + *generator = new generator_type(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY2_64_20: + *generator = new generator_type(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY4_32_20: + *generator = new generator_type(); + break; + case ROCRAND_RNG_PSEUDO_THREEFRY4_64_20: + *generator = new generator_type(); + break; default: return ROCRAND_STATUS_TYPE_ERROR; } @@ -168,6 +180,18 @@ rocrand_status ROCRANDAPI rocrand_create_generator_host(rocrand_generator* gener return ROCRAND_STATUS_SUCCESS; } +rocrand_status ROCRANDAPI rocrand_create_generator_host(rocrand_generator* generator, + rocrand_rng_type rng_type) +{ + return create_generator_host(generator, rng_type, true); +} + +rocrand_status ROCRANDAPI rocrand_create_generator_host_blocking(rocrand_generator* generator, + rocrand_rng_type rng_type) +{ + return create_generator_host(generator, rng_type, false); +} + rocrand_status ROCRANDAPI rocrand_destroy_generator(rocrand_generator generator) { try @@ -435,6 +459,7 @@ rocrand_status ROCRANDAPI rocrand_get_version(int* version) rocrand_status ROCRANDAPI rocrand_create_poisson_distribution( double lambda, rocrand_discrete_distribution* discrete_distribution) { + using namespace rocrand_impl::host; if(discrete_distribution == NULL) { return ROCRAND_STATUS_OUT_OF_RANGE; @@ -444,10 +469,10 @@ rocrand_status ROCRANDAPI rocrand_create_poisson_distribution( return ROCRAND_STATUS_OUT_OF_RANGE; } - rocrand_poisson_distribution h_dis; + poisson_distribution h_dis; try { - h_dis = rocrand_poisson_distribution(lambda); + h_dis = poisson_distribution(lambda); } catch(const std::exception& e) { @@ -482,6 +507,7 @@ rocrand_status ROCRANDAPI unsigned int offset, rocrand_discrete_distribution* discrete_distribution) { + using namespace rocrand_impl::host; if(discrete_distribution == NULL) { return ROCRAND_STATUS_OUT_OF_RANGE; @@ -491,12 +517,10 @@ rocrand_status ROCRANDAPI return ROCRAND_STATUS_OUT_OF_RANGE; } - rocrand_discrete_distribution_base h_dis; + discrete_distribution_base h_dis; try { - h_dis = rocrand_discrete_distribution_base(probabilities, - size, - offset); + h_dis = discrete_distribution_base(probabilities, size, offset); } catch(const std::exception& e) { @@ -528,12 +552,13 @@ rocrand_status ROCRANDAPI rocrand_status ROCRANDAPI rocrand_destroy_discrete_distribution(rocrand_discrete_distribution discrete_distribution) { + using namespace rocrand_impl::host; if(discrete_distribution == NULL) { return ROCRAND_STATUS_OUT_OF_RANGE; } - rocrand_discrete_distribution_base h_dis; + discrete_distribution_base h_dis; hipError_t error; error = hipMemcpy(&h_dis, diff --git a/python/rocrand/tests/rocrand_test.py b/python/rocrand/tests/rocrand_test.py index 5fd675b9..98de39b3 100644 --- a/python/rocrand/tests/rocrand_test.py +++ b/python/rocrand/tests/rocrand_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -59,6 +59,7 @@ def test_ctor(self): PRNG(self.rngtype, seed=2345678, offset=7654) make_test(TestCtorPRNG, "DEFAULT", rngtype=PRNG.DEFAULT) +make_test(TestCtorPRNG, "LFSR113", rngtype=PRNG.LFSR113) make_test(TestCtorPRNG, "XORWOW", rngtype=PRNG.XORWOW) make_test(TestCtorPRNG, "MRG31K3P", rngtype=PRNG.MRG31K3P) make_test(TestCtorPRNG, "MRG32K3A", rngtype=PRNG.MRG32K3A) @@ -80,17 +81,6 @@ def test_ctor(self): make_test(TestCtorPRNGMT, "MTGP32", rngtype=PRNG.MTGP32) make_test(TestCtorPRNGMT, "MT19937", rngtype=PRNG.MT19937) -class TestCtorPRNGLFSR113(TestRNGBase): - rngtype = PRNG.LFSR113 - - def test_ctor(self): - PRNG(self.rngtype) - PRNG(self.rngtype, seed=123456) - with self.assertRaises(RocRandError): - PRNG(self.rngtype, offset=987654) - with self.assertRaises(RocRandError): - PRNG(self.rngtype, seed=2345678, offset=7654) - class TestCtorQRNG(TestRNGBase): def test_ctor(self): QRNG(self.rngtype) @@ -127,6 +117,7 @@ def test_offset(self): self.assertEqual(self.rng.offset, 2323423) make_test(TestParamsPRNG, "DEFAULT", rngtype=PRNG.DEFAULT) +make_test(TestParamsPRNG, "LFSR113", rngtype=PRNG.LFSR113) make_test(TestParamsPRNG, "XORWOW", rngtype=PRNG.XORWOW) make_test(TestParamsPRNG, "MRG31K3P", rngtype=PRNG.MRG31K3P) make_test(TestParamsPRNG, "MRG32K3A", rngtype=PRNG.MRG32K3A) @@ -159,28 +150,6 @@ def test_offset(self): make_test(TestParamsPRNGMT, "MTGP32", rngtype=PRNG.MTGP32) make_test(TestParamsPRNGMT, "MT19937", rngtype=PRNG.MT19937) -class TestParamsPRNGLFSR113(TestRNGBase): - rngtype = PRNG.LFSR113 - - def setUp(self): - super(TestParamsPRNGLFSR113, self).setUp() - self.rng = PRNG(self.rngtype) - - def tearDown(self): - del self.rng - - def test_seed(self): - self.assertIsNone(self.rng.seed) - self.rng.seed = 0 - self.assertEqual(self.rng.seed, 0) - self.rng.seed = 54654634456365 - self.assertEqual(self.rng.seed, 54654634456365) - - def test_offset(self): - self.assertEqual(self.rng.offset, 0) - with self.assertRaises(RocRandError): - self.rng.offset = 2323423 - class TestParamsQRNG(TestRNGBase): def setUp(self): super(TestParamsQRNG, self).setUp() diff --git a/scripts/config-tuning/templates/config_defaults_template b/scripts/config-tuning/templates/config_defaults_template index d20b45cf..30c741c0 100644 --- a/scripts/config-tuning/templates/config_defaults_template +++ b/scripts/config-tuning/templates/config_defaults_template @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,7 @@ * This file is automatically generated by `/scripts/config-tuning/generate_config_defaults.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { {% for generator_enum, config in configs.items() %} template @@ -37,7 +37,7 @@ struct generator_config_defaults<{{ generator_enum }}, T> static constexpr inline unsigned int blocks = {{config['grid_size']}}; }; {% endfor %} -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host #endif // ROCRAND_RNG_CONFIG_CONFIG_DEFAULTS_HPP_ diff --git a/scripts/config-tuning/templates/config_template b/scripts/config-tuning/templates/config_template index b5b051f9..fc9422c6 100644 --- a/scripts/config-tuning/templates/config_template +++ b/scripts/config-tuning/templates/config_template @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { {% set generator_id %}ROCRAND_RNG_{{ generator_type|upper }}_{{ generator|upper }}{% endset %} @@ -60,6 +60,6 @@ struct generator_config_selector<{{ generator_id }}, T> } }; -} // end namespace rocrand_host::detail +} // end namespace rocrand_impl::host #endif // ROCRAND_RNG_CONFIG_{{ generator|upper }}_HPP_ diff --git a/test/cpp_wrapper/CMakeLists.txt b/test/cpp_wrapper/CMakeLists.txt new file mode 100644 index 00000000..9aee5b32 --- /dev/null +++ b/test/cpp_wrapper/CMakeLists.txt @@ -0,0 +1,77 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Package test +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +project(rocrand_package_test CXX) + +# CMake modules +list(APPEND CMAKE_PREFIX_PATH $ENV{ROCM_PATH} $ENV{ROCM_PATH}/hip) +list(APPEND CMAKE_MODULE_PATH + $ENV{ROCM_PATH}/lib/cmake/hip + ${HIP_PATH}/cmake $ENV{ROCM_PATH}/hip/cmake # FindHIP.cmake +) + +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +include(cmake/Dependencies.cmake) + +# Find rocRAND +find_package(rocrand REQUIRED CONFIG HINTS ${rocrand_DIR} PATHS "$ENV{ROCM_PATH}/rocrand") + +# Get sources +file(GLOB rocrand_pkg_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../test_rocrand*.cpp) +# Exclude kernel tests +list(FILTER rocrand_pkg_TEST_SRCS EXCLUDE REGEX ".*test_rocrand_kernel.*\\.cpp$") + +# Enable testing (ctest) +enable_testing() + +function(add_relative_test test_name test_target) + get_target_property(EXE_PATH ${test_target} RUNTIME_OUTPUT_DIRECTORY) + if(EXE_PATH STREQUAL "EXE_PATH-NOTFOUND") + set(EXE_PATH ".") + endif() + get_filename_component(EXE_PATH "${EXE_PATH}" ABSOLUTE BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}") + get_target_property(EXE_NAME ${test_target} RUNTIME_OUTPUT_NAME) + if(EXE_NAME STREQUAL "EXE_NAME-NOTFOUND") + get_target_property(EXE_NAME ${test_target} OUTPUT_NAME) + if(EXE_NAME STREQUAL "EXE_NAME-NOTFOUND") + set(EXE_NAME "${test_target}") + endif() + endif() + file(RELATIVE_PATH rel_path "${CMAKE_CURRENT_BINARY_DIR}" "${EXE_PATH}/${EXE_NAME}") + add_test(NAME "${test_name}" COMMAND "./${rel_path}") +endfunction() + +# Build +foreach(test_src ${rocrand_pkg_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") + target_link_libraries(${test_name} PRIVATE roc::rocrand GTest::gtest GTest::gtest_main) + target_link_libraries(${test_name} PRIVATE hip::host) + add_relative_test(${test_name} ${test_name}) +endforeach() diff --git a/test/cpp_wrapper/cmake/Dependencies.cmake b/test/cpp_wrapper/cmake/Dependencies.cmake new file mode 100644 index 00000000..1bc9e6ff --- /dev/null +++ b/test/cpp_wrapper/cmake/Dependencies.cmake @@ -0,0 +1,76 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.16) + +# find_package() uses upper-case _ROOT variables. +# altough we use GTEST_ROOT for our purposes, it is actually even benefecial for +# find_package() to look for it there (that's where we are going to put it anyway) +if(POLICY CMP0144) + cmake_policy(SET CMP0144 NEW) +endif() + +# Dependencies + +# HIP dependency is handled earlier in the project cmake file +# when VerifyCompiler.cmake is included. + +# For downloading, building, and installing required dependencies +include(../../cmake/DownloadProject.cmake) + +# NOTE: Google Test has created a mess with legacy FindGTest.cmake and newer GTestConfig.cmake +# +# FindGTest.cmake defines: GTest::GTest, GTest::Main, GTEST_FOUND +# +# GTestConfig.cmake defines: GTest::gtest, GTest::gtest_main, GTest::gmock, GTest::gmock_main +# +# NOTE2: Finding GTest in MODULE mode, one cannot invoke find_package in CONFIG mode, because targets +# will be duplicately defined. +if(NOT DEPENDENCIES_FORCE_DOWNLOAD) + # Google Test (https://github.com/google/googletest) + find_package(GTest QUIET) +endif() + +if(NOT TARGET GTest::GTest AND NOT TARGET GTest::gtest) + message(STATUS "GTest not found or force download GTest on. Downloading and building GTest.") + set(GTEST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/gtest CACHE PATH "") + if(DEFINED CMAKE_CXX_COMPILER) + set(CXX_COMPILER_OPTION "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + endif() + if(DEFINED CMAKE_C_COMPILER) + set(C_COMPILER_OPTION "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}") + endif() + download_project( + PROJ googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.11.0 + INSTALL_DIR ${GTEST_ROOT} + CMAKE_ARGS -DBUILD_GTEST=ON -DINSTALL_GTEST=ON -Dgtest_force_shared_crt=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX= ${CXX_COMPILER_OPTION} ${C_COMPILER_OPTION} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + LOG_DOWNLOAD TRUE + LOG_CONFIGURE TRUE + LOG_BUILD TRUE + LOG_INSTALL TRUE + BUILD_PROJECT TRUE + UPDATE_DISCONNECTED TRUE # Never update automatically from the remote repository + ) + find_package(GTest CONFIG REQUIRED PATHS ${GTEST_ROOT} NO_DEFAULT_PATH) +endif() diff --git a/test/internal/test_config_dispatch.cpp b/test/internal/test_config_dispatch.cpp index 511293db..1d1cda08 100644 --- a/test/internal/test_config_dispatch.cpp +++ b/test/internal/test_config_dispatch.cpp @@ -23,15 +23,15 @@ #include "test_common.hpp" #include -__global__ void write_target_arch(rocrand_host::detail::target_arch* dest_arch) +__global__ void write_target_arch(rocrand_impl::host::target_arch* dest_arch) { - constexpr auto arch = rocrand_host::detail::get_device_arch(); + constexpr auto arch = rocrand_impl::host::get_device_arch(); *dest_arch = arch; } static constexpr rocrand_rng_type dummy_rng_type = rocrand_rng_type(0); -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -55,10 +55,10 @@ struct generator_config_defaults static constexpr inline unsigned int blocks = 7; }; -} // end namespace rocrand_host::detail +} // namespace rocrand_impl::host template::template device_config(true) .threads> __global__ __launch_bounds__(BlockSize) void write_config(unsigned int* block_size, @@ -75,26 +75,26 @@ TEST(rocrand_config_dispatch_tests, host_matches_device) { const hipStream_t stream = 0; - rocrand_host::detail::target_arch host_arch; - HIP_CHECK(rocrand_host::detail::get_device_arch(stream, host_arch)); + rocrand_impl::host::target_arch host_arch; + HIP_CHECK(rocrand_impl::host::get_device_arch(stream, host_arch)); - rocrand_host::detail::target_arch* device_arch_ptr; + rocrand_impl::host::target_arch* device_arch_ptr; HIP_CHECK(hipMallocHelper(&device_arch_ptr, sizeof(*device_arch_ptr))); hipLaunchKernelGGL(write_target_arch, dim3(1), dim3(1), 0, stream, device_arch_ptr); HIP_CHECK(hipGetLastError()); - rocrand_host::detail::target_arch device_arch; + rocrand_impl::host::target_arch device_arch; HIP_CHECK(hipMemcpy(&device_arch, device_arch_ptr, sizeof(device_arch), hipMemcpyDeviceToHost)); - ASSERT_NE(host_arch, rocrand_host::detail::target_arch::invalid); + ASSERT_NE(host_arch, rocrand_impl::host::target_arch::invalid); ASSERT_EQ(host_arch, device_arch); } TEST(rocrand_config_dispatch_tests, parse_common_architectures) { - using rocrand_host::detail::parse_gcn_arch; - using rocrand_host::detail::target_arch; + using rocrand_impl::host::parse_gcn_arch; + using rocrand_impl::host::target_arch; ASSERT_EQ(parse_gcn_arch(""), target_arch::unknown); ASSERT_EQ(parse_gcn_arch("not a gfx arch"), target_arch::unknown); @@ -117,8 +117,8 @@ TEST(rocrand_config_dispatch_tests, get_config_on_host_and_device) HIP_CHECK(hipMallocHelper(&d_block_size, sizeof(*d_block_size))); HIP_CHECK(hipMallocHelper(&d_grid_size, sizeof(*d_grid_size))); - rocrand_host::detail::generator_config config{}; - const hipError_t error = rocrand_host::detail::get_generator_config( + rocrand_impl::host::generator_config config{}; + const hipError_t error = rocrand_impl::host::get_generator_config( stream, ROCRAND_ORDERING_PSEUDO_DEFAULT, config); @@ -147,7 +147,7 @@ TEST(rocrand_config_dispatch_tests, get_config_on_host_and_device) #ifdef USE_DEVICE_DISPATCH TEST(rocrand_config_dispatch_tests, device_id_from_stream) { - using rocrand_host::detail::get_device_from_stream; + using rocrand_impl::host::get_device_from_stream; hipDevice_t device_id; HIP_CHECK(hipGetDevice(&device_id)); @@ -175,17 +175,17 @@ template __global__ void least_common_grid_size_kernel(unsigned int* least_common_grid_size, rocrand_ordering order) { - *least_common_grid_size = rocrand_host::detail::get_least_common_grid_size( - rocrand_host::detail::is_ordering_dynamic(order)); + *least_common_grid_size = rocrand_impl::host::get_least_common_grid_size( + rocrand_impl::host::is_ordering_dynamic(order)); } TEST(rocrand_config_dispatch_tests, default_config_provider) { - using config_provider = rocrand_host::detail::default_config_provider; + using config_provider = rocrand_impl::host::default_config_provider; static constexpr hipStream_t default_stream = 0; static constexpr rocrand_ordering ordering = ROCRAND_ORDERING_PSEUDO_DEFAULT; - rocrand_host::detail::generator_config config{}; + rocrand_impl::host::generator_config config{}; ASSERT_EQ(config_provider::host_config(default_stream, ordering, config), hipSuccess); ASSERT_EQ(config.blocks, 1); @@ -198,9 +198,9 @@ TEST(rocrand_config_dispatch_tests, default_config_provider) unsigned int least_common_grid_size{}; ASSERT_EQ( - rocrand_host::detail::get_least_common_grid_size(default_stream, - ordering, - least_common_grid_size), + rocrand_impl::host::get_least_common_grid_size(default_stream, + ordering, + least_common_grid_size), hipSuccess); ASSERT_EQ(least_common_grid_size, 512 * 2 * 7); @@ -233,7 +233,7 @@ __global__ void config_selector_kernel(unsigned int* output) } } -namespace rocrand_host::detail +namespace rocrand_impl::host { template<> @@ -252,7 +252,7 @@ struct generator_config_selector } }; -} // namespace rocrand_host::detail +} // namespace rocrand_impl::host TEST(rocrand_config_dispatch_tests, config_selection) { @@ -260,9 +260,9 @@ TEST(rocrand_config_dispatch_tests, config_selection) constexpr std::size_t size = 2; HIP_CHECK(hipMallocHelper(&d_output, size * sizeof(*d_output))); - using config_provider_t = rocrand_host::detail::default_config_provider; + using config_provider_t = rocrand_impl::host::default_config_provider; config_provider_t config_provider{}; - rocrand_host::detail::generator_config config{}; + rocrand_impl::host::generator_config config{}; static constexpr hipStream_t default_stream = 0; static constexpr rocrand_ordering ordering = ROCRAND_ORDERING_PSEUDO_DYNAMIC; diff --git a/test/internal/test_cpp_utils.cpp b/test/internal/test_cpp_utils.cpp index c797d77f..08741abf 100644 --- a/test/internal/test_cpp_utils.cpp +++ b/test/internal/test_cpp_utils.cpp @@ -35,6 +35,8 @@ #include +using namespace rocrand_impl; + TEST(rocrand_cpp_utils_tests, visit_tuple) { std::tuple t{1, std::size_t(2), (unsigned short)(3)}; diff --git a/test/internal/test_log_normal_distribution.cpp b/test/internal/test_log_normal_distribution.cpp index eeee0060..efdf0f41 100644 --- a/test/internal/test_log_normal_distribution.cpp +++ b/test/internal/test_log_normal_distribution.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -25,6 +25,8 @@ #include +using namespace rocrand_impl::host; + TEST(log_normal_distribution_tests, float_test) { std::random_device rd; diff --git a/test/internal/test_normal_distribution.cpp b/test/internal/test_normal_distribution.cpp index 71c6d663..b1226fd6 100644 --- a/test/internal/test_normal_distribution.cpp +++ b/test/internal/test_normal_distribution.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -25,6 +25,8 @@ #include +using namespace rocrand_impl::host; + TEST(normal_distribution_tests, float_test) { std::random_device rd; diff --git a/test/internal/test_poisson_distribution.cpp b/test/internal/test_poisson_distribution.cpp index ab87fd32..968247bf 100644 --- a/test/internal/test_poisson_distribution.cpp +++ b/test/internal/test_poisson_distribution.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,8 @@ #include +using namespace rocrand_impl::host; + template double get_mean(std::vector values) { @@ -59,7 +61,7 @@ TEST_P(poisson_distribution_tests, mean_var) std::random_device rd; std::mt19937 gen(rd()); - rocrand_poisson_distribution dis; + poisson_distribution dis; dis.set_lambda(lambda); const size_t samples_count = static_cast(std::max(2.0, sqrt(lambda))) * 100000; @@ -88,7 +90,7 @@ TEST_P(poisson_distribution_tests, histogram_compare) std::mt19937 gen(rd()); std::poisson_distribution host_dis(lambda); - rocrand_poisson_distribution dis; + poisson_distribution dis; dis.set_lambda(lambda); const size_t samples_count = static_cast(std::max(2.0, sqrt(lambda))) * 100000; diff --git a/test/internal/test_rocrand_generator_type.cpp b/test/internal/test_rocrand_generator_type.cpp index a10e9cbe..7bf5fe0b 100644 --- a/test/internal/test_rocrand_generator_type.cpp +++ b/test/internal/test_rocrand_generator_type.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -31,9 +31,11 @@ #define HIP_CHECK(state) ASSERT_EQ(state, hipSuccess) -struct dummy_generator : rocrand_generator_impl_base +using namespace rocrand_impl::host; + +struct dummy_generator : generator_impl_base { - dummy_generator() : rocrand_generator_impl_base(ROCRAND_ORDERING_PSEUDO_DEFAULT, 0, 0) {} + dummy_generator() : generator_impl_base(ROCRAND_ORDERING_PSEUDO_DEFAULT, 0, 0) {} static constexpr rocrand_rng_type type() { @@ -115,7 +117,7 @@ TEST(rocrand_generator_type_tests, rocrand_generator) rocrand_generator g = NULL; EXPECT_EQ(g, static_cast(0)); - g = new rocrand_generator_type; + g = new generator_type; auto gg = static_cast(g); EXPECT_NE(gg, static_cast(0)); EXPECT_EQ(gg->get_offset(), 0ULL); @@ -130,7 +132,7 @@ TEST(rocrand_generator_type_tests, rocrand_generator) TEST(rocrand_generator_type_tests, set_stream_test) { - rocrand_generator_type g; + generator_type g; EXPECT_EQ(g.get_stream(), (hipStream_t)(0)); hipStream_t stream; HIP_CHECK(hipStreamCreate(&stream)); @@ -143,7 +145,7 @@ TEST(rocrand_generator_type_tests, set_stream_test) TEST(rocrand_generator_type_tests, generate_test) { - rocrand_generator_type g; + generator_type g; std::vector output(123); g.generate_short(output.data(), output.size()); diff --git a/test/internal/test_rocrand_lfsr113_prng.cpp b/test/internal/test_rocrand_lfsr113_prng.cpp index 3775085a..e839994c 100644 --- a/test/internal/test_rocrand_lfsr113_prng.cpp +++ b/test/internal/test_rocrand_lfsr113_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,25 +18,59 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include - -#include +#include "test_common.hpp" +#include "test_rocrand_common.hpp" +#include "test_rocrand_prng.hpp" #include -#include #include -#include "test_common.hpp" -#include "test_rocrand_common.hpp" +#include + +#include -struct rocrand_lfsr113_prng_tests : public testing::TestWithParam +#include + +using rocrand_impl::host::lfsr113_generator; + +// Generator API tests +using lfsr113_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; + +using lfsr113_generator_prng_offset_tests_types = ::testing::Types< + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(lfsr113_generator, + generator_prng_tests, + lfsr113_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(lfsr113_generator, + generator_prng_continuity_tests, + lfsr113_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(lfsr113_generator, + generator_prng_offset_tests, + lfsr113_generator_prng_offset_tests_types); + +// lfsr113-specific generator API tests +template +struct lfsr113_generator_prng_tests : public testing::Test { - rocrand_lfsr113 get_generator() const + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; + + auto get_generator() const { - rocrand_lfsr113 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) { throw std::runtime_error("Could not set ordering for generator"); } @@ -44,219 +78,9 @@ struct rocrand_lfsr113_prng_tests : public testing::TestWithParam(0.01f * size)); - - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_lfsr113_prng_tests, same_seed_test) -{ - const uint4 seeds = {0, 2, 4, 6}; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_lfsr113 g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed_uint4(seeds); - g1.set_seed_uint4(seeds); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seeds should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_lfsr113_prng_tests, different_seed_test) +TYPED_TEST(lfsr113_generator_prng_tests, different_seed_test) { const unsigned long long seed0 = 5ULL; const unsigned long long seed1 = 10ULL; @@ -267,7 +91,7 @@ TEST_P(rocrand_lfsr113_prng_tests, different_seed_test) HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); // Generators - rocrand_lfsr113 g0 = get_generator(), g1 = get_generator(); + auto g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); // Set different seeds g0.set_seed(seed0); g1.set_seed(seed1); @@ -308,7 +132,7 @@ TEST_P(rocrand_lfsr113_prng_tests, different_seed_test) // Checks if generators with the same seed and in the same state generate // the same numbers -TEST_P(rocrand_lfsr113_prng_tests, different_seed_uint4_test) +TYPED_TEST(lfsr113_generator_prng_tests, different_seed_uint4_test) { const uint4 seeds0[] = { { 0, 2, 4, 6}, @@ -332,7 +156,7 @@ TEST_P(rocrand_lfsr113_prng_tests, different_seed_uint4_test) HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); // Generators - rocrand_lfsr113 g0 = get_generator(), g1 = get_generator(); + auto g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); // Set different seeds g0.set_seed_uint4(seed0); g1.set_seed_uint4(seed1); @@ -376,3 +200,74 @@ TEST_P(rocrand_lfsr113_prng_tests, different_seed_uint4_test) HIP_CHECK(hipFree(data)); } } + +// Engine API tests +struct lfsr113_engine_api_tests : public lfsr113_generator::engine_type +{}; + +TEST(lfsr113_engine_api_tests, discard_test) +{ + using generator_t = lfsr113_generator; + using engine_t = typename generator_t::engine_type; + + const uint4 seed = {1234567U, 12345678U, 123456789U, 1234567890U}; + engine_t engine1(seed, 0, 678U); + engine_t engine2(seed, 0, 677U); + + // Check next() function + (void)engine2.next(); + + EXPECT_EQ(engine1(), engine2()); + + // Check discard() function + (void)engine1.next(); + engine2.discard(); + + EXPECT_EQ(engine1(), engine2()); + + // Check discard(offset) function + const unsigned int offsets[] + = {1U, 4U, 37U, 583U, 7452U, 21032U, 35678U, 66778U, 10313475U, 82120230U}; + + for(auto offset : offsets) + { + for(unsigned int i = 0; i < offset; i++) + { + (void)engine1.next(); + } + engine2.discard(offset); + + EXPECT_EQ(engine1(), engine2()); + } +} + +TEST(lfsr113_engine_api_tests, discard_sequence_test) +{ + using generator_t = lfsr113_generator; + using engine_t = typename generator_t::engine_type; + + const uint4 seed = {1234567U, 12345678U, 123456789U, 1234567890U}; + engine_t engine1(seed, 0, 444U); + engine_t engine2(seed, 123U, 444U); + + engine1.discard_subsequence(123U); + + EXPECT_EQ(engine1(), engine2()); + + engine1.discard(5356446450U); + engine1.discard_subsequence(123U); + engine1.discard(30000000006U); + + engine2.discard_subsequence(3U); + engine2.discard(35356446456U); + engine2.discard_subsequence(120U); + + EXPECT_EQ(engine1(), engine2()); + + engine1.discard_subsequence(3456000U); + engine1.discard_subsequence(1000005U); + + engine2.discard_subsequence(4456005U); + + EXPECT_EQ(engine1(), engine2()); +} diff --git a/test/internal/test_rocrand_mrg_prng.cpp b/test/internal/test_rocrand_mrg_prng.cpp index 6adcf2c3..2249b45a 100644 --- a/test/internal/test_rocrand_mrg_prng.cpp +++ b/test/internal/test_rocrand_mrg_prng.cpp @@ -18,19 +18,76 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include -#include - -#include +#include "test_common.hpp" +#include "test_rocrand_common.hpp" +#include "test_rocrand_prng.hpp" #include -#include #include -#include "test_common.hpp" -#include "test_rocrand_common.hpp" +#include + +#include + +#include + +using rocrand_impl::host::mrg31k3p_generator; +using rocrand_impl::host::mrg32k3a_generator; + +// Generator API tests +using mrg_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params, + generator_prng_tests_params, + generator_prng_tests_params>; + +using mrg_generator_prng_offset_tests_types = ::testing::Types< + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_mrg, generator_prng_tests, mrg_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_mrg, + generator_prng_continuity_tests, + mrg_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_mrg, + generator_prng_offset_tests, + mrg_generator_prng_offset_tests_types); + +// mrg-specific generator API tests +template +struct mrg_generator_prng_tests : public ::testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; + + auto get_generator() const + { + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) + { + throw std::runtime_error("Could not set ordering for generator"); + } + return g; + } +}; + +TYPED_TEST_SUITE(mrg_generator_prng_tests, mrg_generator_prng_tests_types); using rocrand_device::detail::mad_u64_u32; @@ -49,7 +106,7 @@ __global__ __launch_bounds__(1) void mad_u64_u32_kernel(const unsigned int* r[7] = mad_u64_u32(23, 45, 67ULL); } -TEST(rocrand_mrg_prng_tests, mad_u64_u32_test) +TEST(mrg_generator_prng_tests, mad_u64_u32_test) { const size_t size = 8; @@ -106,126 +163,19 @@ TEST(rocrand_mrg_prng_tests, mad_u64_u32_test) HIP_CHECK(hipFree(r)); } -template -struct rocrand_mrg_prng_tests : public ::testing::Test -{ - using generator_t = typename Params::generator_t; - static inline constexpr rocrand_ordering ordering = Params::ordering; - - auto get_generator() const - { - generator_t g; - if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_mrg_prng_tests_params -{ - using generator_t = GeneratorType; - static inline constexpr rocrand_ordering ordering = Ordering; -}; - -using rocrand_mrg_prng_test_types = ::testing::Types< - rocrand_mrg_prng_tests_params, - rocrand_mrg_prng_tests_params, - rocrand_mrg_prng_tests_params, - rocrand_mrg_prng_tests_params>; - -TYPED_TEST_SUITE(rocrand_mrg_prng_tests, rocrand_mrg_prng_test_types); - -TYPED_TEST(rocrand_mrg_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); - - HIP_CHECK(hipFree(data)); -} - -TYPED_TEST(rocrand_mrg_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -TYPED_TEST(rocrand_mrg_prng_tests, uniform_double_test) -{ - const size_t size = 1313; - double* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(double) * size)); - - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - double host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(double) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const double mean = sum / size; - ASSERT_NEAR(mean, 0.5, 0.05); - - HIP_CHECK(hipFree(data)); -} - -TYPED_TEST(rocrand_mrg_prng_tests, uniform_float_range_test) +template +void uniform_floating_point_range_test(rocrand_ordering ordering) { const size_t size = 1 << 26; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); + T* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(*data) * size)); - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + Generator g; + g.set_order(ordering); + ROCRAND_CHECK(g.generate_uniform(data, size)); - float* host_data = new float[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + T* host_data = new T[size]; + HIP_CHECK(hipMemcpy(host_data, data, sizeof(*host_data) * size, hipMemcpyDeviceToHost)); for(size_t i = 0; i < size; i++) { @@ -237,242 +187,34 @@ TYPED_TEST(rocrand_mrg_prng_tests, uniform_float_range_test) delete[] host_data; } -TYPED_TEST(rocrand_mrg_prng_tests, uniform_double_range_test) -{ - const size_t size = 1 << 26; - double* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(double) * size)); - - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - double* host_data = new double[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(double) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0); - ASSERT_LE(host_data[i], 1.0); - } - - HIP_CHECK(hipFree(data)); - delete[] host_data; -} - -TYPED_TEST(rocrand_mrg_prng_tests, normal_float_test) +TYPED_TEST(mrg_generator_prng_tests, uniform_float_range_test) { - const size_t size = 1314; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate_normal(data, size, 2.0f, 5.0f)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - float mean = 0.0f; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i]; - } - mean = mean / size; - - float std = 0.0f; - for(size_t i = 0; i < size; i++) - { - std += std::pow(host_data[i] - mean, 2); - } - std = sqrt(std / size); - - EXPECT_NEAR(2.0f, mean, 0.4f); // 20% - EXPECT_NEAR(5.0f, std, 1.0f); // 20% - - HIP_CHECK(hipFree(data)); -} - -TYPED_TEST(rocrand_mrg_prng_tests, poisson_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - auto g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate_poisson(data, size, 5.5)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double mean = 0.0; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i]; - } - mean = mean / size; - - double var = 0.0; - for(size_t i = 0; i < size; i++) - { - double x = host_data[i] - mean; - var += x * x; - } - var = var / size; - - EXPECT_NEAR(mean, 5.5, std::max(1.0, 5.5 * 1e-2)); - EXPECT_NEAR(var, 5.5, std::max(1.0, 5.5 * 1e-2)); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TYPED_TEST(rocrand_mrg_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generator - auto g0 = TestFixture::get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TYPED_TEST(rocrand_mrg_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - auto g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; - HIP_CHECK(hipFree(data)); + uniform_floating_point_range_test(ordering); } -// Checks if generators with the same seed and in the same state generate -// the same numbers -TYPED_TEST(rocrand_mrg_prng_tests, different_seed_test) +TYPED_TEST(mrg_generator_prng_tests, uniform_double_range_test) { - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - auto g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; - HIP_CHECK(hipFree(data)); + uniform_floating_point_range_test(ordering); } -template -struct rocrand_mrg_prng_engine_tests : public ::testing::Test +// Engine API tests +template +struct mrg_prng_engine_tests : public ::testing::Test { - using mrg_type = GeneratorType; + using mrg_type = Generator; }; -using rocrand_mrg_prng_engine_tests_types = ::testing::Types; +using mrg_prng_engine_tests_types = ::testing::Types; -TYPED_TEST_SUITE(rocrand_mrg_prng_engine_tests, rocrand_mrg_prng_engine_tests_types); +TYPED_TEST_SUITE(mrg_prng_engine_tests, mrg_prng_engine_tests_types); -TYPED_TEST(rocrand_mrg_prng_engine_tests, discard_test) +TYPED_TEST(mrg_prng_engine_tests, discard_test) { typedef typename TestFixture::mrg_type mrg_type; const unsigned long long seed = 12345ULL; @@ -506,7 +248,7 @@ TYPED_TEST(rocrand_mrg_prng_engine_tests, discard_test) } } -TYPED_TEST(rocrand_mrg_prng_engine_tests, discard_sequence_test) +TYPED_TEST(mrg_prng_engine_tests, discard_sequence_test) { typedef typename TestFixture::mrg_type mrg_type; const unsigned long long seed = 23456ULL; @@ -533,7 +275,7 @@ TYPED_TEST(rocrand_mrg_prng_engine_tests, discard_sequence_test) EXPECT_EQ(engine1(), engine2()); } -TYPED_TEST(rocrand_mrg_prng_engine_tests, discard_subsequence_test) +TYPED_TEST(mrg_prng_engine_tests, discard_subsequence_test) { typedef typename TestFixture::mrg_type mrg_type; const unsigned long long seed = 23456ULL; @@ -561,222 +303,3 @@ TYPED_TEST(rocrand_mrg_prng_engine_tests, discard_subsequence_test) EXPECT_EQ(engine1(), engine2()); } - -template -struct rocrand_mrg_prng_offset_params -{ - using output_t = T; - using generator_t = GeneratorType; - static constexpr inline rocrand_ordering ordering = Ordering; -}; - -template -struct rocrand_mrg_prng_offset : public ::testing::Test -{ - using params = Params; - - auto get_generator() const - { - typename params::generator_t g; - if(g.set_order(params::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -using rocrand_mrg_prng_offset_types = ::testing::Types< - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params, - rocrand_mrg_prng_offset_params>; - -TYPED_TEST_SUITE(rocrand_mrg_prng_offset, rocrand_mrg_prng_offset_types); - -TYPED_TEST(rocrand_mrg_prng_offset, offsets_test) -{ - using Params = typename TestFixture::params; - using T = typename Params::output_t; - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 11, 112233}; - - for(const auto offset : offsets) - { - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - auto g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - auto g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned int divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - GeneratorType g0; - g0.set_order(ordering); - GeneratorType g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_uniform_uint_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, unsigned int* data, size_t s) - { g.generate(data, s); }, - ordering); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_uniform_char_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, unsigned char* data, size_t s) - { g.generate(data, s); }, - ordering, - 4); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_uniform_float_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, float* data, size_t s) - { g.generate_uniform(data, s); }, - ordering); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_uniform_double_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, double* data, size_t s) - { g.generate_uniform(data, s); }, - ordering); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_normal_float_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, float* data, size_t s) - { g.generate_normal(data, s, 0.0f, 1.0f); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_normal_double_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_log_normal_float_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, float* data, size_t s) - { g.generate_log_normal(data, s, 0.0f, 1.0f); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_log_normal_double_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, double* data, size_t s) - { g.generate_log_normal(data, s, 0.0, 1.0); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mrg_prng_tests, continuity_poisson_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - continuity_test([](generator_t& g, unsigned int* data, size_t s) - { g.generate_poisson(data, s, 100.0); }, - ordering); -} diff --git a/test/internal/test_rocrand_mt19937_prng.cpp b/test/internal/test_rocrand_mt19937_prng.cpp index ee0642ed..c0d08fd8 100644 --- a/test/internal/test_rocrand_mt19937_prng.cpp +++ b/test/internal/test_rocrand_mt19937_prng.cpp @@ -20,21 +20,42 @@ #include "test_common.hpp" #include "test_rocrand_common.hpp" +#include "test_rocrand_prng.hpp" +#include #include -#include #include -#include +#include -#include #include -#include + +#include +#include +#include #include +#include +#include +#include #include +using namespace rocrand_impl::host; + +// Generator API tests +using mt19937_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(mt19937_generator, + generator_prng_tests, + mt19937_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(mt19937_generator, + generator_prng_continuity_tests, + mt19937_generator_prng_tests_types); + +// mt19937-specific generator API tests template -struct rocrand_mt19937_prng_tests : public ::testing::Test +struct mt19937_generator_prng_tests : public ::testing::Test { using generator_t = typename Params::generator_t; static inline constexpr rocrand_ordering ordering = Params::ordering; @@ -50,282 +71,479 @@ struct rocrand_mt19937_prng_tests : public ::testing::Test } }; -template -struct rocrand_mt19937_prng_tests_params -{ - using generator_t = GeneratorType; - static inline constexpr rocrand_ordering ordering = Ordering; -}; - -using rocrand_mt19937_prng_test_types = ::testing::Types< - rocrand_mt19937_prng_tests_params>; +TYPED_TEST_SUITE(mt19937_generator_prng_tests, mt19937_generator_prng_tests_types); -TYPED_TEST_SUITE(rocrand_mt19937_prng_tests, rocrand_mt19937_prng_test_types); - -TYPED_TEST(rocrand_mt19937_prng_tests, uniform_uint_test) +// Check that that heads and tails are generated correctly for misaligned pointers or sizes. +template +void head_and_tail_test(GenerateFunc generate_func, rocrand_ordering ordering, unsigned int divisor) { - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + generator_config config; + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - rocrand_mt19937 g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + const unsigned int generator_count + = config.threads * config.blocks / mt19937_octo_engine::threads_per_generator; - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + const size_t stride = mt19937_constants::n * generator_count * divisor; - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); + // Large sizes are used for triggering all code paths in the kernels. + std::vector + sizes{stride, 1, stride * 2 + 45651, 5, stride * 3 + 123, 6, 45, stride - 12}; - HIP_CHECK(hipFree(data)); -} + const size_t max_size = *std::max_element(sizes.cbegin(), sizes.cend()); + const size_t canary_size = 16; + const size_t max_size_with_canary = max_size + canary_size * 2; -TYPED_TEST(rocrand_mt19937_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); + const T canary = std::numeric_limits::max(); - rocrand_mt19937 g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + Generator g; + g.set_order(ordering); - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + std::vector host_data(max_size_with_canary); + T* data; + HIP_CHECK(hipMalloc(&data, sizeof(T) * max_size_with_canary)); - double sum = 0; - for(size_t i = 0; i < size; i++) + for(size_t offset : {0, 1, 2, 3}) { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, normal_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); + for(size_t s : sizes) + { + const size_t s_with_canary = s + canary_size * 2; + for(size_t i = 0; i < s_with_canary; i++) + { + host_data[i] = canary; + } + HIP_CHECK( + hipMemcpy(data, host_data.data(), sizeof(T) * s_with_canary, hipMemcpyDefault)); - rocrand_mt19937 g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate_normal(data, size, 2.0f, 5.0f)); - HIP_CHECK(hipDeviceSynchronize()); + generate_func(g, data + canary_size + offset, s); - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK( + hipMemcpy(host_data.data(), data, sizeof(T) * s_with_canary, hipMemcpyDefault)); - float mean = 0.0f; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i]; - } - mean /= size; + // Check that the generator does not write more values than needed for head and tail + // (so canary areas, or memory before and after data passed to generate(), are intact) + for(size_t i = 0; i < canary_size + offset; i++) + { + ASSERT_EQ(host_data[i], canary); + } + for(size_t i = s_with_canary - (canary_size - offset); i < s_with_canary; i++) + { + ASSERT_EQ(host_data[i], canary); + } - float std = 0.0f; - for(size_t i = 0; i < size; i++) - { - std += std::pow(host_data[i] - mean, 2); + // Check if head and tail are generated (canary value, used as an initial value, + // can not be generated because it is not in the range of the distribution) + size_t incorrect = 0; + for(size_t i = canary_size + offset; i < s_with_canary - (canary_size - offset); i++) + { + if(host_data[i] == canary) + { + incorrect++; + } + } + ASSERT_EQ(incorrect, 0); + } } - std = sqrt(std / size); - - EXPECT_NEAR(2.0f, mean, 0.4f); // 20% - EXPECT_NEAR(5.0f, std, 1.0f); // 20% - HIP_CHECK(hipFree(data)); } -TYPED_TEST(rocrand_mt19937_prng_tests, poisson_test) +TYPED_TEST(mt19937_generator_prng_tests, head_and_tail_normal_float_test) { - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - rocrand_mt19937 g = TestFixture::get_generator(); - ROCRAND_CHECK(g.generate_poisson(data, size, 5.5)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double mean = 0.0; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i]; - } - mean /= size; + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + head_and_tail_test( + [](mt19937_generator& g, float* data, size_t s) { g.generate_normal(data, s, 0.0f, 1.0f); }, + ordering, + 2); +} - double var = 0.0; - for(size_t i = 0; i < size; i++) - { - double x = host_data[i] - mean; - var += x * x; - } - var /= size; +TYPED_TEST(mt19937_generator_prng_tests, head_and_tail_normal_double_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + head_and_tail_test( + [](mt19937_generator& g, double* data, size_t s) { g.generate_normal(data, s, 0.0, 1.0); }, + ordering, + 2); +} - EXPECT_NEAR(mean, 5.5, std::max(1.0, 5.5 * 1e-2)); - EXPECT_NEAR(var, 5.5, std::max(1.0, 5.5 * 1e-2)); +TYPED_TEST(mt19937_generator_prng_tests, head_and_tail_log_normal_float_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + head_and_tail_test( + [](mt19937_generator& g, float* data, size_t s) + { g.generate_log_normal(data, s, 0.0f, 1.0f); }, + ordering, + 2); +} - HIP_CHECK(hipFree(data)); +TYPED_TEST(mt19937_generator_prng_tests, head_and_tail_log_normal_double_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + head_and_tail_test( + [](mt19937_generator& g, double* data, size_t s) + { g.generate_log_normal(data, s, 0.0, 1.0); }, + ordering, + 2); } -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TYPED_TEST(rocrand_mt19937_prng_tests, state_progress_test) +// Check if changing distribution sets m_start_input correctly +template +void change_distribution_test(GenerateFunc0 generate_func0, + GenerateFunc1 generate_func1, + size_t size0, + size_t start1, + rocrand_ordering ordering) { - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + SCOPED_TRACE(testing::Message() << "size0 = " << size0 << " start1 = " << start1); - // Generator - rocrand_mt19937 g0 = TestFixture::get_generator(); + generator_config config; + // Configs for mt19937 are independent of type, so just use T0 + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + const size_t size1 = config.threads * config.blocks * 3; - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + T0* data0; + T1* data10; + T1* data11; + HIP_CHECK(hipMalloc(&data0, sizeof(T0) * size0)); + HIP_CHECK(hipMalloc(&data10, sizeof(T1) * size1)); + HIP_CHECK(hipMalloc(&data11, sizeof(T1) * (start1 + size1))); - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + Generator g0; + // Generate the first distribution + generate_func0(g0, data0, size0); + // Change distribution to the second + generate_func1(g0, data10, size1); - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + Generator g1; + // Generate the second distribution considering that first `start1` values correspond to + // `size0` values of the first distribution and some discarded values + generate_func1(g1, data11, start1 + size1); + + std::vector host_data10(size1); + std::vector host_data11(size1); + HIP_CHECK(hipMemcpy(host_data10.data(), data10, sizeof(T1) * size1, hipMemcpyDefault)); + // Ignore `start1` values + HIP_CHECK(hipMemcpy(host_data11.data(), data11 + start1, sizeof(T1) * size1, hipMemcpyDefault)); - size_t same = 0; - for(size_t i = 0; i < size; i++) + for(size_t i = 0; i < size1; i++) { - if(host_data1[i] == host_data2[i]) - same++; + ASSERT_EQ(host_data10[i], host_data11[i]); } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); + HIP_CHECK(hipFree(data0)); + HIP_CHECK(hipFree(data10)); + HIP_CHECK(hipFree(data11)); } -// Checks if generators with the same seed and in the same state -// generate the same numbers -TYPED_TEST(rocrand_mt19937_prng_tests, same_seed_test) +TYPED_TEST(mt19937_generator_prng_tests, change_distribution0_test) { - const unsigned long long seed = 5ULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_mt19937 g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + generator_config config; + // Configs for mt19937 are independent, just use void + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + const size_t s = config.threads * config.blocks; - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) + // Larger type (normal float) to smaller type (uniform uint) + std::vector> test_cases{ + { (s + 4) * 2, s * 4}, + {(s * 2 + s - 10) * 2, s * 6}, + { (s * 3) * 2, s * 6}, + { (s * 4) * 2, s * 8}, + }; + for(auto test_case : test_cases) { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); + change_distribution_test( + [](mt19937_generator& g, float* data, size_t s) + { g.generate_normal(data, s, 0.0f, 1.0f); }, + [](mt19937_generator& g, unsigned int* data, size_t s) { g.generate(data, s); }, + test_case.first, + test_case.second, + ordering); } - - HIP_CHECK(hipFree(data)); } -// Checks if generators with the same seed and in the same state generate -// the same numbers -TYPED_TEST(rocrand_mt19937_prng_tests, different_seed_test) +TYPED_TEST(mt19937_generator_prng_tests, change_distribution1_test) { - const unsigned long long seed0 = 5ULL; - const unsigned long long seed1 = 10ULL; + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + generator_config config; + // Configs for mt19937 are independent, just use void + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_mt19937 g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); + const size_t s = config.threads * config.blocks; - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + // Smaller type (uniform float) to larger type (normal double) + std::vector> test_cases{ + {s * 2 + 100, (s * 1) * 2}, + { s * 4 + 10, (s * 2) * 2}, + { s * 2, (s * 1) * 2}, + { s * 8, (s * 2) * 2}, + { s * 77, (s * 19) * 2} + }; + for(auto test_case : test_cases) + { + change_distribution_test( + [](mt19937_generator& g, float* data, size_t s) { g.generate_uniform(data, s); }, + [](mt19937_generator& g, double* data, size_t s) + { g.generate_normal(data, s, 0.0, 1.0); }, + test_case.first, + test_case.second, + ordering); + } +} - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); +TYPED_TEST(mt19937_generator_prng_tests, change_distribution2_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + generator_config config; + // Configs for mt19937 are independent, just use void + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); + const size_t s = config.threads * config.blocks; - size_t same = 0; - for(size_t i = 0; i < size; i++) + // Smaller type (uniform double) to larger type (normal double) + std::vector> test_cases{ + {s * 2 + 400, (s * 2) * 2}, + { s * 5 + 10, (s * 3) * 2}, + { s * 3, (s * 2) * 2}, + { s * 4, (s * 2) * 2}, + }; + for(auto test_case : test_cases) { - if(g1_host_data[i] == g0_host_data[i]) - same++; + change_distribution_test( + [](mt19937_generator& g, double* data, size_t s) { g.generate_uniform(data, s); }, + [](mt19937_generator& g, double* data, size_t s) + { g.generate_normal(data, s, 0.0, 1.0); }, + test_case.first, + test_case.second, + ordering); } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - - HIP_CHECK(hipFree(data)); } -using mt19937_octo_engine = rocrand_host::detail::mt19937_octo_engine; - -/// Initialize the octo engines for both generators. Skip \p subsequence_size for the first generator. -__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_engines_kernel( - mt19937_octo_engine* octo_engines, const unsigned int* engines, unsigned int subsequence_size) +TYPED_TEST(mt19937_generator_prng_tests, change_distribution3_test) { - constexpr unsigned int n = rocrand_host::detail::mt19937_constants::n; - const unsigned int thread_id = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int engine_id = thread_id / mt19937_octo_engine::threads_per_generator; - mt19937_octo_engine engine = octo_engines[thread_id]; - engine.gather(&engines[engine_id * n]); + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + using ConfigProvider = default_config_provider; + generator_config config; + // Configs for mt19937 are independent, just use void + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); + + const size_t s = config.threads * config.blocks; + + // Larger type (normal double) to smaller type (uniform ushort) + std::vector> test_cases{ + { 100 * 2, s * 8}, + {(s + 10) * 2, s * 16}, + { (s * 2) * 2, s * 16}, + { (s * 3) * 2, s * 24}, + }; + for(auto test_case : test_cases) + { + change_distribution_test( + [](mt19937_generator& g, double* data, size_t s) + { g.generate_normal(data, s, 0.0, 1.0); }, + [](mt19937_generator& g, unsigned short* data, size_t s) { g.generate(data, s); }, + test_case.first, + test_case.second, + ordering); + } +} + +// mt19937-specific generator API continuity tests +template +struct mt19937_generator_prng_continuity_tests : public ::testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; +}; + +TYPED_TEST_SUITE(mt19937_generator_prng_continuity_tests, mt19937_generator_prng_tests_types); + +// Check that subsequent generations of different sizes produce one +// sequence without gaps, no matter how many values are generated per call. +template::value, bool> = true> +void continuity_test(GenerateFunc generate_func, + rocrand_ordering ordering, + unsigned int divisor = 1) +{ + using ConfigProvider = default_config_provider; + + generator_config config; + HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); + + const unsigned int generator_count + = config.threads * config.blocks / mt19937_octo_engine::threads_per_generator; + + const size_t stride = mt19937_constants::n * generator_count * divisor; + + // Large sizes are used for triggering all code paths in the kernels (generating of middle, + // start and end sequences). + std::vector sizes0{stride, + 2, + stride, + 100, + 1, + 24783, + stride / 2, + 3 * stride + 704400, + 2, + stride + 776543, + 44176}; + std::vector sizes1{2 * stride, + 1024, + 55, + 65536, + stride / 2, + stride + 623456, + 3 * stride - 300000, + 1048576, + 111331}; + + // Round by the distribution's granularity (2 for normals, 2 for short and half, 4 for uchar). + // Sizes not divisible by the granularity or pointers not aligned by it work but without strict + // continuity. + if(divisor > 1) + { + for(size_t& s : sizes0) + s = (s + divisor - 1) & ~static_cast(divisor - 1); + for(size_t& s : sizes1) + s = (s + divisor - 1) & ~static_cast(divisor - 1); + } + + const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); + const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); + const size_t size2 = std::min(size0, size1); + + mt19937_generator g0; + g0.set_order(ordering); + mt19937_generator g1; + g1.set_order(ordering); + mt19937_generator g2; + g2.set_order(ordering); + + std::vector host_data0(size0); + std::vector host_data1(size1); + std::vector host_data2(size2); + + size_t current0 = 0; + for(size_t s : sizes0) + { + T* data0; + HIP_CHECK(hipMalloc(&data0, sizeof(T) * s)); + HIP_CHECK(hipMemset(data0, -1, sizeof(T) * s)); + generate_func(g0, data0, s); + HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); + current0 += s; + HIP_CHECK(hipFree(data0)); + } + size_t current1 = 0; + for(size_t s : sizes1) + { + T* data1; + HIP_CHECK(hipMalloc(&data1, sizeof(T) * s)); + HIP_CHECK(hipMemset(data1, -1, sizeof(T) * s)); + generate_func(g1, data1, s); + HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); + current1 += s; + HIP_CHECK(hipFree(data1)); + } + T* data2; + HIP_CHECK(hipMalloc(&data2, sizeof(T) * size2)); + HIP_CHECK(hipMemset(data2, -1, sizeof(T) * size2)); + generate_func(g2, data2, size2); + HIP_CHECK(hipMemcpy(host_data2.data(), data2, sizeof(T) * size2, hipMemcpyDefault)); + HIP_CHECK(hipFree(data2)); + + size_t incorrect = 0; + for(size_t i = 0; i < size2; i++) + { + if constexpr(std::is_same::value) + { + if(__half2float(host_data0[i]) != __half2float(host_data1[i]) + || __half2float(host_data0[i]) != __half2float(host_data2[i])) + { + incorrect++; + } + } + else + { + if(host_data0[i] != host_data1[i] || host_data0[i] != host_data2[i]) + { + incorrect++; + } + } + } + ASSERT_EQ(incorrect, 0); +} + +TYPED_TEST(mt19937_generator_prng_continuity_tests, continuity_uniform_short_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + typedef unsigned short output_t; + + continuity_test([](generator_t& g, output_t* data, size_t s) + { g.generate_uniform(data, s); }, + ordering, + 2); +} + +TYPED_TEST(mt19937_generator_prng_continuity_tests, continuity_uniform_half_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + typedef __half output_t; + + continuity_test([](generator_t& g, output_t* data, size_t s) + { g.generate_uniform(data, s); }, + ordering, + 2); +} + +// Engine API tests +template +struct mt19937_generator_engine_tests : public ::testing::Test +{ + using generator_t = Generator; +}; + +using mt19937_generator_engine_tests_types = ::testing::Types; + +TYPED_TEST_SUITE(mt19937_generator_engine_tests, mt19937_generator_engine_tests_types); + +/// Initialize the octo engines for both generators. Skip \p subsequence_size for the first generator. +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_engines_kernel( + mt19937_octo_engine* octo_engines, const unsigned int* engines, unsigned int subsequence_size) +{ + constexpr unsigned int n = mt19937_constants::n; + const unsigned int thread_id = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int engine_id = thread_id / mt19937_octo_engine::threads_per_generator; + mt19937_octo_engine engine = octo_engines[thread_id]; + engine.gather(&engines[engine_id * n]); engine.gen_next_n(); if(engine_id == 0) @@ -346,7 +564,7 @@ __global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void generate_kerne unsigned int elements_per_generator, unsigned int subsequence_size) { - constexpr unsigned int n = rocrand_host::detail::mt19937_constants::n; + constexpr unsigned int n = mt19937_constants::n; constexpr unsigned int threads_per_generator = mt19937_octo_engine::threads_per_generator; const unsigned int local_thread_id = threadIdx.x & 7U; const unsigned int thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -372,101 +590,102 @@ __global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void generate_kerne engines[thread_id] = engine; } -TYPED_TEST(rocrand_mt19937_prng_tests, subsequence_test) +TYPED_TEST(mt19937_generator_engine_tests, subsequence_test) { - using octo_engine_type = mt19937_octo_engine; - constexpr unsigned int threads_per_generator = mt19937_octo_engine::threads_per_generator; - constexpr unsigned long long int seed = 1ULL; - constexpr unsigned int subsequence_size = 552552U; + using generator_t = typename TestFixture::generator_t; + using octo_engine_type = mt19937_octo_engine; + constexpr unsigned int threads_per_generator = mt19937_octo_engine::threads_per_generator; + constexpr unsigned long long int seed = 1ULL; + constexpr unsigned int subsequence_size = 552552U; // The size of the subsequence must be a multiple of threads per generator // otherwise the non-jumping generator cannot align due to generating // threads_per_generator per call static_assert(subsequence_size % threads_per_generator == 0, "size of subsequence must be multiple of eight"); constexpr unsigned int generator_count = 2U; - constexpr unsigned int state_size = rocrand_host::detail::mt19937_constants::n; + constexpr unsigned int state_size = mt19937_constants::n; // Constants to skip subsequence_size states. // Generated with tools/mt19937_precomputed_generator.cpp // clang-format off static constexpr unsigned int jump[mt19937_p_size] = { - 16836620U, 1241597017U, 2267910466U, 3327083746U, 1938175615U, 2308764182U, 3494318232U, 233458728U, - 3431172561U, 4034658304U, 540715081U, 2223083524U, 1290021546U, 4615588U, 2755265028U, 1487667568U, - 817793568U, 688998677U, 4177128730U, 1327584593U, 3083575336U, 3130198036U, 1730866499U, 236199859U, - 1319739361U, 1227620352U, 2030748367U, 338426818U, 3401904732U, 2068477099U, 9314332U, 1394U, - 1631647232U, 3360850049U, 3947386387U, 169910306U, 3403351184U, 2759828497U, 3936398567U, 3981649994U, - 3545643632U, 1211945956U, 4214442729U, 1516073261U, 1973528206U, 3127605291U, 1657881179U, 2639065177U, - 1695629247U, 1483473424U, 1922960899U, 147373172U, 2671913376U, 3824567940U, 719742235U, 3137653860U, - 464968244U, 1223024043U, 130661U, 1203785820U, 4020201862U, 2505398286U, 3526255407U, 419792716U, - 523023476U, 371258051U, 3673049403U, 1836542343U, 2302995407U, 89010016U, 345597150U, 215916134U, - 302835354U, 1549276007U, 78962U, 2610524389U, 3144872708U, 1810683989U, 3751221315U, 1590451824U, - 3344450054U, 700934502U, 110016935U, 2156795150U, 3785730224U, 2631375440U, 1974637886U, 3292329605U, - 3142139957U, 3701811334U, 1549078486U, 129980226U, 1391930951U, 2556241742U, 2185354446U, 887051003U, - 3413484806U, 1342283353U, 1424278535U, 2917569624U, 3429457066U, 924053705U, 4113066678U, 3805305864U, - 3627143398U, 4011722511U, 479136705U, 2075096001U, 1721089795U, 3074239461U, 4254620365U, 3246832812U, - 2600113446U, 2754943943U, 3388450324U, 1677024071U, 2500806419U, 158791876U, 3463832935U, 2458673960U, - 2747463520U, 3548197763U, 3182431084U, 17380539U, 1557533732U, 2107756592U, 46491733U, 1796341916U, - 1240657450U, 3670925904U, 546492343U, 4211712370U, 3978286571U, 2301647531U, 4277260054U, 1195041504U, - 3554107626U, 3536404767U, 3391935859U, 2250749215U, 882048618U, 2827024245U, 569173904U, 2149235115U, - 60945640U, 3833593866U, 956451456U, 1021006574U, 4236771596U, 2265058560U, 2215696731U, 778465250U, - 1751590318U, 741942625U, 3458004120U, 248619343U, 3843017115U, 839840654U, 1828044965U, 2617355055U, - 3779651646U, 525835946U, 537395281U, 992151708U, 1321781591U, 478234930U, 3884143138U, 2864985020U, - 2015099986U, 2965894308U, 1428387075U, 1310499846U, 1109267475U, 1075643877U, 2258267789U, 1069867669U, - 4205025922U, 2634970836U, 3132427367U, 2853906496U, 2678425777U, 4276765991U, 2575927964U, 2671947668U, - 1121017226U, 2080611588U, 2727225830U, 503316657U, 4042256386U, 1112199335U, 464744163U, 3572201075U, - 636055961U, 3899908992U, 892046540U, 896896758U, 393156791U, 49352486U, 3161122923U, 2585141935U, - 4006516250U, 2805665689U, 1866241881U, 462229762U, 1563106787U, 551098178U, 2128817785U, 3135100052U, - 4260031810U, 2726884032U, 892574702U, 3308689842U, 2326295075U, 3099353849U, 1166714571U, 1734201378U, - 3476395591U, 4061325047U, 2522521477U, 357030804U, 3726519752U, 2630348914U, 2394970464U, 539386556U, - 500917831U, 2573533705U, 3575402563U, 1930818072U, 2100596270U, 1470081741U, 1740257674U, 3408964191U, - 3883874908U, 3501867562U, 3979810829U, 491029434U, 3873155198U, 767047573U, 3512288254U, 3973630301U, - 918919925U, 505569179U, 2622866775U, 476301621U, 1653555785U, 588982683U, 2892810634U, 1481364624U, - 2900983412U, 1237365144U, 654858224U, 3888079105U, 880084185U, 2319840370U, 4149829702U, 2598788559U, - 1795697197U, 1141358839U, 1488115545U, 2023012969U, 1700767613U, 318307034U, 3741324886U, 2723508372U, - 2877869759U, 1089131774U, 3746529157U, 2032861327U, 1916908398U, 3979971761U, 1104333434U, 164965263U, - 2861111U, 4061197830U, 1227752574U, 1557686937U, 1372189256U, 1320514631U, 3006463383U, 995374202U, - 2475599921U, 2062684198U, 273818196U, 230102535U, 846172554U, 549643817U, 1291845833U, 313868405U, - 1684607830U, 3007524804U, 2696481972U, 1496449755U, 2336358181U, 2907909674U, 2046381710U, 1616425720U, - 2784933411U, 3488376037U, 4237079453U, 239405390U, 2260619063U, 4248573305U, 2538035035U, 3162243525U, - 1688422919U, 4214141536U, 2822423904U, 3616596572U, 1124198315U, 561546231U, 1450818809U, 1395937417U, - 2624134660U, 1180897824U, 1879143185U, 2716824526U, 269138413U, 147307449U, 2713808648U, 1397164069U, - 3082484694U, 3996518249U, 3822948465U, 2600900044U, 2073322101U, 3405864806U, 1465988883U, 2263895854U, - 1193632090U, 878563614U, 3633274523U, 832840620U, 1918686864U, 52700353U, 2164193894U, 4099319060U, - 1271654821U, 2934121786U, 4003740838U, 2025639926U, 1964764541U, 3711204924U, 1811665390U, 2651395047U, - 2574986913U, 2946806898U, 3374950428U, 407088658U, 3098475549U, 3678329103U, 1442862364U, 1149148015U, - 3829525455U, 1129287955U, 1691241488U, 2948237333U, 4111137958U, 3121299510U, 3228891983U, 1422674833U, - 1046218249U, 4255146817U, 1789888035U, 2790533305U, 2766564283U, 882036014U, 3138447493U, 1705216849U, - 4086442903U, 2084466724U, 1452448031U, 2518232572U, 2826320536U, 1155360986U, 1652202635U, 3192309572U, - 4278103747U, 4207611316U, 3471787642U, 3949425339U, 2428040116U, 3916643455U, 1047874548U, 3522678507U, - 679503438U, 3367670533U, 3305388393U, 1545534614U, 365979387U, 3813719383U, 2293866348U, 2311446870U, - 1294235417U, 1239874267U, 3149790803U, 3459617970U, 3553443070U, 4294547149U, 677815218U, 2480790846U, - 42062043U, 2099057395U, 608270600U, 555980767U, 3040155855U, 4220769542U, 442527978U, 4264098298U, - 748043208U, 1510266257U, 605080615U, 1870700985U, 2815631381U, 3546827936U, 389630240U, 530771376U, - 1023017605U, 570366154U, 943073743U, 2345589488U, 3331719336U, 3393668839U, 2125550217U, 3316487643U, - 2926149375U, 1563725876U, 2234336736U, 2691239566U, 4276736289U, 1534407008U, 634560074U, 537405234U, - 1007405586U, 3435919387U, 3526571791U, 1251978681U, 2423612524U, 4240584245U, 3077336530U, 1394628396U, - 667872456U, 3838386423U, 1264339781U, 2977905273U, 2493098225U, 1408656392U, 458665826U, 1193671488U, - 3192651130U, 3408345436U, 3416261569U, 2099306740U, 1457701667U, 1798196661U, 4078467644U, 1823475953U, - 3683947560U, 3421638713U, 1333246224U, 2253616524U, 3468957120U, 2028012393U, 3950669141U, 3379074497U, - 264072228U, 1478066089U, 3810691100U, 105177718U, 3239667112U, 548511418U, 462182385U, 1880566371U, - 3449712347U, 3945393228U, 1722247886U, 834860904U, 2023403204U, 1764955460U, 2314861647U, 2541662287U, - 174235623U, 1025135151U, 4183459969U, 140612992U, 2179351739U, 838789589U, 2321249467U, 3580648575U, - 1937895840U, 4082767776U, 4208618097U, 1150513792U, 3159007105U, 957333854U, 3121611318U, 1468699888U, - 2600870933U, 1887140383U, 1158880479U, 2021223243U, 4216226924U, 3362953576U, 148281745U, 743454457U, - 2055203028U, 1388596477U, 3430662102U, 14508704U, 3293390847U, 868061153U, 2803772691U, 167322815U, - 702986243U, 970691543U, 3211870732U, 132211178U, 1878239493U, 493828035U, 578005384U, 2083193988U, - 618003366U, 3516838252U, 1628942083U, 771893287U, 1783055259U, 2664147198U, 1123182254U, 3428230259U, - 4246995520U, 540075809U, 3006764017U, 2048903824U, 4018761289U, 1959828143U, 1058168427U, 2139631077U, - 2641577125U, 1819365340U, 391416789U, 408543984U, 2374873865U, 2638340220U, 2279187081U, 3509490132U, - 109546995U, 2943006029U, 301017297U, 2159298247U, 1201337642U, 3955051167U, 1131485785U, 4026925695U, - 2288659668U, 3259615238U, 610986470U, 1262822694U, 3447317355U, 3261746329U, 900784498U, 4163475604U, - 3571695718U, 95546624U, 765597843U, 1239045105U, 1375372467U, 2689038155U, 246401506U, 1717907899U, - 2072005013U, 2562942296U, 1328060883U, 3345146601U, 2369611890U, 1541866911U, 3503192374U, 1705689374U, - 444830279U, 308855830U, 1960063476U, 1642058452U, 1044063781U, 239034752U, 2929660102U, 2476585518U, - 3525477572U, 4104693897U, 2573076031U, 4190865194U, 2395897238U, 2400843904U, 1695065775U, 4178846862U, - 826627422U, 914883664U, 2172966192U, 375087119U, 1284236820U, 2458751356U, 2286795808U, 648305751U, - 2336236161U, 3238612623U, 3320228067U, 769191U, 430179840U, 2186883080U, 1430612668U, 973149413U, - 1121709821U, 90179392U, 411379749U, 552994832U, 10U, 4016980240U, 2433283182U, 1182819972U, + 16836620U, 1241597017U, 2267910466U, 3327083746U, 1938175615U, 2308764182U, 3494318232U, 233458728U, + 3431172561U, 4034658304U, 540715081U, 2223083524U, 1290021546U, 4615588U, 2755265028U, 1487667568U, + 817793568U, 688998677U, 4177128730U, 1327584593U, 3083575336U, 3130198036U, 1730866499U, 236199859U, + 1319739361U, 1227620352U, 2030748367U, 338426818U, 3401904732U, 2068477099U, 9314332U, 1394U, + 1631647232U, 3360850049U, 3947386387U, 169910306U, 3403351184U, 2759828497U, 3936398567U, 3981649994U, + 3545643632U, 1211945956U, 4214442729U, 1516073261U, 1973528206U, 3127605291U, 1657881179U, 2639065177U, + 1695629247U, 1483473424U, 1922960899U, 147373172U, 2671913376U, 3824567940U, 719742235U, 3137653860U, + 464968244U, 1223024043U, 130661U, 1203785820U, 4020201862U, 2505398286U, 3526255407U, 419792716U, + 523023476U, 371258051U, 3673049403U, 1836542343U, 2302995407U, 89010016U, 345597150U, 215916134U, + 302835354U, 1549276007U, 78962U, 2610524389U, 3144872708U, 1810683989U, 3751221315U, 1590451824U, + 3344450054U, 700934502U, 110016935U, 2156795150U, 3785730224U, 2631375440U, 1974637886U, 3292329605U, + 3142139957U, 3701811334U, 1549078486U, 129980226U, 1391930951U, 2556241742U, 2185354446U, 887051003U, + 3413484806U, 1342283353U, 1424278535U, 2917569624U, 3429457066U, 924053705U, 4113066678U, 3805305864U, + 3627143398U, 4011722511U, 479136705U, 2075096001U, 1721089795U, 3074239461U, 4254620365U, 3246832812U, + 2600113446U, 2754943943U, 3388450324U, 1677024071U, 2500806419U, 158791876U, 3463832935U, 2458673960U, + 2747463520U, 3548197763U, 3182431084U, 17380539U, 1557533732U, 2107756592U, 46491733U, 1796341916U, + 1240657450U, 3670925904U, 546492343U, 4211712370U, 3978286571U, 2301647531U, 4277260054U, 1195041504U, + 3554107626U, 3536404767U, 3391935859U, 2250749215U, 882048618U, 2827024245U, 569173904U, 2149235115U, + 60945640U, 3833593866U, 956451456U, 1021006574U, 4236771596U, 2265058560U, 2215696731U, 778465250U, + 1751590318U, 741942625U, 3458004120U, 248619343U, 3843017115U, 839840654U, 1828044965U, 2617355055U, + 3779651646U, 525835946U, 537395281U, 992151708U, 1321781591U, 478234930U, 3884143138U, 2864985020U, + 2015099986U, 2965894308U, 1428387075U, 1310499846U, 1109267475U, 1075643877U, 2258267789U, 1069867669U, + 4205025922U, 2634970836U, 3132427367U, 2853906496U, 2678425777U, 4276765991U, 2575927964U, 2671947668U, + 1121017226U, 2080611588U, 2727225830U, 503316657U, 4042256386U, 1112199335U, 464744163U, 3572201075U, + 636055961U, 3899908992U, 892046540U, 896896758U, 393156791U, 49352486U, 3161122923U, 2585141935U, + 4006516250U, 2805665689U, 1866241881U, 462229762U, 1563106787U, 551098178U, 2128817785U, 3135100052U, + 4260031810U, 2726884032U, 892574702U, 3308689842U, 2326295075U, 3099353849U, 1166714571U, 1734201378U, + 3476395591U, 4061325047U, 2522521477U, 357030804U, 3726519752U, 2630348914U, 2394970464U, 539386556U, + 500917831U, 2573533705U, 3575402563U, 1930818072U, 2100596270U, 1470081741U, 1740257674U, 3408964191U, + 3883874908U, 3501867562U, 3979810829U, 491029434U, 3873155198U, 767047573U, 3512288254U, 3973630301U, + 918919925U, 505569179U, 2622866775U, 476301621U, 1653555785U, 588982683U, 2892810634U, 1481364624U, + 2900983412U, 1237365144U, 654858224U, 3888079105U, 880084185U, 2319840370U, 4149829702U, 2598788559U, + 1795697197U, 1141358839U, 1488115545U, 2023012969U, 1700767613U, 318307034U, 3741324886U, 2723508372U, + 2877869759U, 1089131774U, 3746529157U, 2032861327U, 1916908398U, 3979971761U, 1104333434U, 164965263U, + 2861111U, 4061197830U, 1227752574U, 1557686937U, 1372189256U, 1320514631U, 3006463383U, 995374202U, + 2475599921U, 2062684198U, 273818196U, 230102535U, 846172554U, 549643817U, 1291845833U, 313868405U, + 1684607830U, 3007524804U, 2696481972U, 1496449755U, 2336358181U, 2907909674U, 2046381710U, 1616425720U, + 2784933411U, 3488376037U, 4237079453U, 239405390U, 2260619063U, 4248573305U, 2538035035U, 3162243525U, + 1688422919U, 4214141536U, 2822423904U, 3616596572U, 1124198315U, 561546231U, 1450818809U, 1395937417U, + 2624134660U, 1180897824U, 1879143185U, 2716824526U, 269138413U, 147307449U, 2713808648U, 1397164069U, + 3082484694U, 3996518249U, 3822948465U, 2600900044U, 2073322101U, 3405864806U, 1465988883U, 2263895854U, + 1193632090U, 878563614U, 3633274523U, 832840620U, 1918686864U, 52700353U, 2164193894U, 4099319060U, + 1271654821U, 2934121786U, 4003740838U, 2025639926U, 1964764541U, 3711204924U, 1811665390U, 2651395047U, + 2574986913U, 2946806898U, 3374950428U, 407088658U, 3098475549U, 3678329103U, 1442862364U, 1149148015U, + 3829525455U, 1129287955U, 1691241488U, 2948237333U, 4111137958U, 3121299510U, 3228891983U, 1422674833U, + 1046218249U, 4255146817U, 1789888035U, 2790533305U, 2766564283U, 882036014U, 3138447493U, 1705216849U, + 4086442903U, 2084466724U, 1452448031U, 2518232572U, 2826320536U, 1155360986U, 1652202635U, 3192309572U, + 4278103747U, 4207611316U, 3471787642U, 3949425339U, 2428040116U, 3916643455U, 1047874548U, 3522678507U, + 679503438U, 3367670533U, 3305388393U, 1545534614U, 365979387U, 3813719383U, 2293866348U, 2311446870U, + 1294235417U, 1239874267U, 3149790803U, 3459617970U, 3553443070U, 4294547149U, 677815218U, 2480790846U, + 42062043U, 2099057395U, 608270600U, 555980767U, 3040155855U, 4220769542U, 442527978U, 4264098298U, + 748043208U, 1510266257U, 605080615U, 1870700985U, 2815631381U, 3546827936U, 389630240U, 530771376U, + 1023017605U, 570366154U, 943073743U, 2345589488U, 3331719336U, 3393668839U, 2125550217U, 3316487643U, + 2926149375U, 1563725876U, 2234336736U, 2691239566U, 4276736289U, 1534407008U, 634560074U, 537405234U, + 1007405586U, 3435919387U, 3526571791U, 1251978681U, 2423612524U, 4240584245U, 3077336530U, 1394628396U, + 667872456U, 3838386423U, 1264339781U, 2977905273U, 2493098225U, 1408656392U, 458665826U, 1193671488U, + 3192651130U, 3408345436U, 3416261569U, 2099306740U, 1457701667U, 1798196661U, 4078467644U, 1823475953U, + 3683947560U, 3421638713U, 1333246224U, 2253616524U, 3468957120U, 2028012393U, 3950669141U, 3379074497U, + 264072228U, 1478066089U, 3810691100U, 105177718U, 3239667112U, 548511418U, 462182385U, 1880566371U, + 3449712347U, 3945393228U, 1722247886U, 834860904U, 2023403204U, 1764955460U, 2314861647U, 2541662287U, + 174235623U, 1025135151U, 4183459969U, 140612992U, 2179351739U, 838789589U, 2321249467U, 3580648575U, + 1937895840U, 4082767776U, 4208618097U, 1150513792U, 3159007105U, 957333854U, 3121611318U, 1468699888U, + 2600870933U, 1887140383U, 1158880479U, 2021223243U, 4216226924U, 3362953576U, 148281745U, 743454457U, + 2055203028U, 1388596477U, 3430662102U, 14508704U, 3293390847U, 868061153U, 2803772691U, 167322815U, + 702986243U, 970691543U, 3211870732U, 132211178U, 1878239493U, 493828035U, 578005384U, 2083193988U, + 618003366U, 3516838252U, 1628942083U, 771893287U, 1783055259U, 2664147198U, 1123182254U, 3428230259U, + 4246995520U, 540075809U, 3006764017U, 2048903824U, 4018761289U, 1959828143U, 1058168427U, 2139631077U, + 2641577125U, 1819365340U, 391416789U, 408543984U, 2374873865U, 2638340220U, 2279187081U, 3509490132U, + 109546995U, 2943006029U, 301017297U, 2159298247U, 1201337642U, 3955051167U, 1131485785U, 4026925695U, + 2288659668U, 3259615238U, 610986470U, 1262822694U, 3447317355U, 3261746329U, 900784498U, 4163475604U, + 3571695718U, 95546624U, 765597843U, 1239045105U, 1375372467U, 2689038155U, 246401506U, 1717907899U, + 2072005013U, 2562942296U, 1328060883U, 3345146601U, 2369611890U, 1541866911U, 3503192374U, 1705689374U, + 444830279U, 308855830U, 1960063476U, 1642058452U, 1044063781U, 239034752U, 2929660102U, 2476585518U, + 3525477572U, 4104693897U, 2573076031U, 4190865194U, 2395897238U, 2400843904U, 1695065775U, 4178846862U, + 826627422U, 914883664U, 2172966192U, 375087119U, 1284236820U, 2458751356U, 2286795808U, 648305751U, + 2336236161U, 3238612623U, 3320228067U, 769191U, 430179840U, 2186883080U, 1430612668U, 973149413U, + 1121709821U, 90179392U, 411379749U, 552994832U, 10U, 4016980240U, 2433283182U, 1182819972U, 2993305185U, 1410353515U, 2105574608U, 38722U, 1668746496U, 2299044730U, 4019202397U, 0U }; // clang-format on @@ -478,33 +697,33 @@ TYPED_TEST(rocrand_mt19937_prng_tests, subsequence_test) // First, eight special values 0U, 113U, 170U, 283U, 340U, 397U, 510U, 567U, // Then, the regular pattern - 1U, 8U, 15U, 22U, 29U, 36U, 43U, 50U, 2U, 9U, 16U, 23U, 30U, 37U, 44U, 51U, 3U, 10U, 17U, 24U, 31U, 38U, 45U, 52U, 4U, 11U, 18U, 25U, 32U, 39U, 46U, 53U, 5U, 12U, 19U, 26U, 33U, 40U, 47U, 54U, 6U, 13U, 20U, 27U, 34U, 41U, 48U, 55U, 7U, 14U, 21U, 28U, 35U, 42U, 49U, 56U, - 57U, 64U, 71U, 78U, 85U, 92U, 99U, 106U, 58U, 65U, 72U, 79U, 86U, 93U, 100U, 107U, 59U, 66U, 73U, 80U, 87U, 94U, 101U, 108U, 60U, 67U, 74U, 81U, 88U, 95U, 102U, 109U, 61U, 68U, 75U, 82U, 89U, 96U, 103U, 110U, 62U, 69U, 76U, 83U, 90U, 97U, 104U, 111U, 63U, 70U, 77U, 84U, 91U, 98U, 105U, 112U, - 114U, 121U, 128U, 135U, 142U, 149U, 156U, 163U, 115U, 122U, 129U, 136U, 143U, 150U, 157U, 164U, 116U, 123U, 130U, 137U, 144U, 151U, 158U, 165U, 117U, 124U, 131U, 138U, 145U, 152U, 159U, 166U, 118U, 125U, 132U, 139U, 146U, 153U, 160U, 167U, 119U, 126U, 133U, 140U, 147U, 154U, 161U, 168U, 120U, 127U, 134U, 141U, 148U, 155U, 162U, 169U, - 171U, 178U, 185U, 192U, 199U, 206U, 213U, 220U, 172U, 179U, 186U, 193U, 200U, 207U, 214U, 221U, 173U, 180U, 187U, 194U, 201U, 208U, 215U, 222U, 174U, 181U, 188U, 195U, 202U, 209U, 216U, 223U, 175U, 182U, 189U, 196U, 203U, 210U, 217U, 224U, 176U, 183U, 190U, 197U, 204U, 211U, 218U, 225U, 177U, 184U, 191U, 198U, 205U, 212U, 219U, 226U, - 227U, 234U, 241U, 248U, 255U, 262U, 269U, 276U, 228U, 235U, 242U, 249U, 256U, 263U, 270U, 277U, 229U, 236U, 243U, 250U, 257U, 264U, 271U, 278U, 230U, 237U, 244U, 251U, 258U, 265U, 272U, 279U, 231U, 238U, 245U, 252U, 259U, 266U, 273U, 280U, 232U, 239U, 246U, 253U, 260U, 267U, 274U, 281U, 233U, 240U, 247U, 254U, 261U, 268U, 275U, 282U, - 284U, 291U, 298U, 305U, 312U, 319U, 326U, 333U, 285U, 292U, 299U, 306U, 313U, 320U, 327U, 334U, 286U, 293U, 300U, 307U, 314U, 321U, 328U, 335U, 287U, 294U, 301U, 308U, 315U, 322U, 329U, 336U, 288U, 295U, 302U, 309U, 316U, 323U, 330U, 337U, 289U, 296U, 303U, 310U, 317U, 324U, 331U, 338U, 290U, 297U, 304U, 311U, 318U, 325U, 332U, 339U, - 341U, 348U, 355U, 362U, 369U, 376U, 383U, 390U, 342U, 349U, 356U, 363U, 370U, 377U, 384U, 391U, 343U, 350U, 357U, 364U, 371U, 378U, 385U, 392U, 344U, 351U, 358U, 365U, 372U, 379U, 386U, 393U, 345U, 352U, 359U, 366U, 373U, 380U, 387U, 394U, 346U, 353U, 360U, 367U, 374U, 381U, 388U, 395U, 347U, 354U, 361U, 368U, 375U, 382U, 389U, 396U, - 398U, 405U, 412U, 419U, 426U, 433U, 440U, 447U, 399U, 406U, 413U, 420U, 427U, 434U, 441U, 448U, 400U, 407U, 414U, 421U, 428U, 435U, 442U, 449U, 401U, 408U, 415U, 422U, 429U, 436U, 443U, 450U, 402U, 409U, 416U, 423U, 430U, 437U, 444U, 451U, 403U, 410U, 417U, 424U, 431U, 438U, 445U, 452U, 404U, 411U, 418U, 425U, 432U, 439U, 446U, 453U, - 454U, 461U, 468U, 475U, 482U, 489U, 496U, 503U, 455U, 462U, 469U, 476U, 483U, 490U, 497U, 504U, 456U, 463U, 470U, 477U, 484U, 491U, 498U, 505U, 457U, 464U, 471U, 478U, 485U, 492U, 499U, 506U, 458U, 465U, 472U, 479U, 486U, 493U, 500U, 507U, 459U, 466U, 473U, 480U, 487U, 494U, 501U, 508U, 460U, 467U, 474U, 481U, 488U, 495U, 502U, 509U, - 511U, 518U, 525U, 532U, 539U, 546U, 553U, 560U, 512U, 519U, 526U, 533U, 540U, 547U, 554U, 561U, 513U, 520U, 527U, 534U, 541U, 548U, 555U, 562U, 514U, 521U, 528U, 535U, 542U, 549U, 556U, 563U, 515U, 522U, 529U, 536U, 543U, 550U, 557U, 564U, 516U, 523U, 530U, 537U, 544U, 551U, 558U, 565U, 517U, 524U, 531U, 538U, 545U, 552U, 559U, 566U, + 1U, 8U, 15U, 22U, 29U, 36U, 43U, 50U, 2U, 9U, 16U, 23U, 30U, 37U, 44U, 51U, 3U, 10U, 17U, 24U, 31U, 38U, 45U, 52U, 4U, 11U, 18U, 25U, 32U, 39U, 46U, 53U, 5U, 12U, 19U, 26U, 33U, 40U, 47U, 54U, 6U, 13U, 20U, 27U, 34U, 41U, 48U, 55U, 7U, 14U, 21U, 28U, 35U, 42U, 49U, 56U, + 57U, 64U, 71U, 78U, 85U, 92U, 99U, 106U, 58U, 65U, 72U, 79U, 86U, 93U, 100U, 107U, 59U, 66U, 73U, 80U, 87U, 94U, 101U, 108U, 60U, 67U, 74U, 81U, 88U, 95U, 102U, 109U, 61U, 68U, 75U, 82U, 89U, 96U, 103U, 110U, 62U, 69U, 76U, 83U, 90U, 97U, 104U, 111U, 63U, 70U, 77U, 84U, 91U, 98U, 105U, 112U, + 114U, 121U, 128U, 135U, 142U, 149U, 156U, 163U, 115U, 122U, 129U, 136U, 143U, 150U, 157U, 164U, 116U, 123U, 130U, 137U, 144U, 151U, 158U, 165U, 117U, 124U, 131U, 138U, 145U, 152U, 159U, 166U, 118U, 125U, 132U, 139U, 146U, 153U, 160U, 167U, 119U, 126U, 133U, 140U, 147U, 154U, 161U, 168U, 120U, 127U, 134U, 141U, 148U, 155U, 162U, 169U, + 171U, 178U, 185U, 192U, 199U, 206U, 213U, 220U, 172U, 179U, 186U, 193U, 200U, 207U, 214U, 221U, 173U, 180U, 187U, 194U, 201U, 208U, 215U, 222U, 174U, 181U, 188U, 195U, 202U, 209U, 216U, 223U, 175U, 182U, 189U, 196U, 203U, 210U, 217U, 224U, 176U, 183U, 190U, 197U, 204U, 211U, 218U, 225U, 177U, 184U, 191U, 198U, 205U, 212U, 219U, 226U, + 227U, 234U, 241U, 248U, 255U, 262U, 269U, 276U, 228U, 235U, 242U, 249U, 256U, 263U, 270U, 277U, 229U, 236U, 243U, 250U, 257U, 264U, 271U, 278U, 230U, 237U, 244U, 251U, 258U, 265U, 272U, 279U, 231U, 238U, 245U, 252U, 259U, 266U, 273U, 280U, 232U, 239U, 246U, 253U, 260U, 267U, 274U, 281U, 233U, 240U, 247U, 254U, 261U, 268U, 275U, 282U, + 284U, 291U, 298U, 305U, 312U, 319U, 326U, 333U, 285U, 292U, 299U, 306U, 313U, 320U, 327U, 334U, 286U, 293U, 300U, 307U, 314U, 321U, 328U, 335U, 287U, 294U, 301U, 308U, 315U, 322U, 329U, 336U, 288U, 295U, 302U, 309U, 316U, 323U, 330U, 337U, 289U, 296U, 303U, 310U, 317U, 324U, 331U, 338U, 290U, 297U, 304U, 311U, 318U, 325U, 332U, 339U, + 341U, 348U, 355U, 362U, 369U, 376U, 383U, 390U, 342U, 349U, 356U, 363U, 370U, 377U, 384U, 391U, 343U, 350U, 357U, 364U, 371U, 378U, 385U, 392U, 344U, 351U, 358U, 365U, 372U, 379U, 386U, 393U, 345U, 352U, 359U, 366U, 373U, 380U, 387U, 394U, 346U, 353U, 360U, 367U, 374U, 381U, 388U, 395U, 347U, 354U, 361U, 368U, 375U, 382U, 389U, 396U, + 398U, 405U, 412U, 419U, 426U, 433U, 440U, 447U, 399U, 406U, 413U, 420U, 427U, 434U, 441U, 448U, 400U, 407U, 414U, 421U, 428U, 435U, 442U, 449U, 401U, 408U, 415U, 422U, 429U, 436U, 443U, 450U, 402U, 409U, 416U, 423U, 430U, 437U, 444U, 451U, 403U, 410U, 417U, 424U, 431U, 438U, 445U, 452U, 404U, 411U, 418U, 425U, 432U, 439U, 446U, 453U, + 454U, 461U, 468U, 475U, 482U, 489U, 496U, 503U, 455U, 462U, 469U, 476U, 483U, 490U, 497U, 504U, 456U, 463U, 470U, 477U, 484U, 491U, 498U, 505U, 457U, 464U, 471U, 478U, 485U, 492U, 499U, 506U, 458U, 465U, 472U, 479U, 486U, 493U, 500U, 507U, 459U, 466U, 473U, 480U, 487U, 494U, 501U, 508U, 460U, 467U, 474U, 481U, 488U, 495U, 502U, 509U, + 511U, 518U, 525U, 532U, 539U, 546U, 553U, 560U, 512U, 519U, 526U, 533U, 540U, 547U, 554U, 561U, 513U, 520U, 527U, 534U, 541U, 548U, 555U, 562U, 514U, 521U, 528U, 535U, 542U, 549U, 556U, 563U, 515U, 522U, 529U, 536U, 543U, 550U, 557U, 564U, 516U, 523U, 530U, 537U, 544U, 551U, 558U, 565U, 517U, 524U, 531U, 538U, 545U, 552U, 559U, 566U, 568U, 575U, 582U, 589U, 596U, 603U, 610U, 617U, 569U, 576U, 583U, 590U, 597U, 604U, 611U, 618U, 570U, 577U, 584U, 591U, 598U, 605U, 612U, 619U, 571U, 578U, 585U, 592U, 599U, 606U, 613U, 620U, 572U, 579U, 586U, 593U, 600U, 607U, 614U, 621U, 573U, 580U, 587U, 594U, 601U, 608U, 615U, 622U, 574U, 581U, 588U, 595U, 602U, 609U, 616U, 623U, // clang-format on }; constexpr unsigned int rev_permutation_table[state_size] = { // clang-format off - 0U, 8U, 16U, 24U, 32U, 40U, 48U, 56U, - 9U, 17U, 25U, 33U, 41U, 49U, 57U, 10U, 18U, 26U, 34U, 42U, 50U, 58U, 11U, 19U, 27U, 35U, 43U, 51U, 59U, 12U, 20U, 28U, 36U, 44U, 52U, 60U, 13U, 21U, 29U, 37U, 45U, 53U, 61U, 14U, 22U, 30U, 38U, 46U, 54U, 62U, 15U, 23U, 31U, 39U, 47U, 55U, 63U, 64U, 72U, 80U, 88U, 96U, 104U, 112U, - 65U, 73U, 81U, 89U, 97U, 105U, 113U, 66U, 74U, 82U, 90U, 98U, 106U, 114U, 67U, 75U, 83U, 91U, 99U, 107U, 115U, 68U, 76U, 84U, 92U, 100U, 108U, 116U, 69U, 77U, 85U, 93U, 101U, 109U, 117U, 70U, 78U, 86U, 94U, 102U, 110U, 118U, 71U, 79U, 87U, 95U, 103U, 111U, 119U, 1U, 120U, 128U, 136U, 144U, 152U, 160U, - 168U, 121U, 129U, 137U, 145U, 153U, 161U, 169U, 122U, 130U, 138U, 146U, 154U, 162U, 170U, 123U, 131U, 139U, 147U, 155U, 163U, 171U, 124U, 132U, 140U, 148U, 156U, 164U, 172U, 125U, 133U, 141U, 149U, 157U, 165U, 173U, 126U, 134U, 142U, 150U, 158U, 166U, 174U, 127U, 135U, 143U, 151U, 159U, 167U, 175U, 2U, 176U, 184U, 192U, 200U, 208U, - 216U, 224U, 177U, 185U, 193U, 201U, 209U, 217U, 225U, 178U, 186U, 194U, 202U, 210U, 218U, 226U, 179U, 187U, 195U, 203U, 211U, 219U, 227U, 180U, 188U, 196U, 204U, 212U, 220U, 228U, 181U, 189U, 197U, 205U, 213U, 221U, 229U, 182U, 190U, 198U, 206U, 214U, 222U, 230U, 183U, 191U, 199U, 207U, 215U, 223U, 231U, 232U, 240U, 248U, 256U, 264U, - 272U, 280U, 233U, 241U, 249U, 257U, 265U, 273U, 281U, 234U, 242U, 250U, 258U, 266U, 274U, 282U, 235U, 243U, 251U, 259U, 267U, 275U, 283U, 236U, 244U, 252U, 260U, 268U, 276U, 284U, 237U, 245U, 253U, 261U, 269U, 277U, 285U, 238U, 246U, 254U, 262U, 270U, 278U, 286U, 239U, 247U, 255U, 263U, 271U, 279U, 287U, 3U, 288U, 296U, 304U, 312U, - 320U, 328U, 336U, 289U, 297U, 305U, 313U, 321U, 329U, 337U, 290U, 298U, 306U, 314U, 322U, 330U, 338U, 291U, 299U, 307U, 315U, 323U, 331U, 339U, 292U, 300U, 308U, 316U, 324U, 332U, 340U, 293U, 301U, 309U, 317U, 325U, 333U, 341U, 294U, 302U, 310U, 318U, 326U, 334U, 342U, 295U, 303U, 311U, 319U, 327U, 335U, 343U, 4U, 344U, 352U, 360U, - 368U, 376U, 384U, 392U, 345U, 353U, 361U, 369U, 377U, 385U, 393U, 346U, 354U, 362U, 370U, 378U, 386U, 394U, 347U, 355U, 363U, 371U, 379U, 387U, 395U, 348U, 356U, 364U, 372U, 380U, 388U, 396U, 349U, 357U, 365U, 373U, 381U, 389U, 397U, 350U, 358U, 366U, 374U, 382U, 390U, 398U, 351U, 359U, 367U, 375U, 383U, 391U, 399U, 5U, 400U, 408U, - 416U, 424U, 432U, 440U, 448U, 401U, 409U, 417U, 425U, 433U, 441U, 449U, 402U, 410U, 418U, 426U, 434U, 442U, 450U, 403U, 411U, 419U, 427U, 435U, 443U, 451U, 404U, 412U, 420U, 428U, 436U, 444U, 452U, 405U, 413U, 421U, 429U, 437U, 445U, 453U, 406U, 414U, 422U, 430U, 438U, 446U, 454U, 407U, 415U, 423U, 431U, 439U, 447U, 455U, 456U, 464U, - 472U, 480U, 488U, 496U, 504U, 457U, 465U, 473U, 481U, 489U, 497U, 505U, 458U, 466U, 474U, 482U, 490U, 498U, 506U, 459U, 467U, 475U, 483U, 491U, 499U, 507U, 460U, 468U, 476U, 484U, 492U, 500U, 508U, 461U, 469U, 477U, 485U, 493U, 501U, 509U, 462U, 470U, 478U, 486U, 494U, 502U, 510U, 463U, 471U, 479U, 487U, 495U, 503U, 511U, 6U, 512U, - 520U, 528U, 536U, 544U, 552U, 560U, 513U, 521U, 529U, 537U, 545U, 553U, 561U, 514U, 522U, 530U, 538U, 546U, 554U, 562U, 515U, 523U, 531U, 539U, 547U, 555U, 563U, 516U, 524U, 532U, 540U, 548U, 556U, 564U, 517U, 525U, 533U, 541U, 549U, 557U, 565U, 518U, 526U, 534U, 542U, 550U, 558U, 566U, 519U, 527U, 535U, 543U, 551U, 559U, 567U, 7U, + 0U, 8U, 16U, 24U, 32U, 40U, 48U, 56U, + 9U, 17U, 25U, 33U, 41U, 49U, 57U, 10U, 18U, 26U, 34U, 42U, 50U, 58U, 11U, 19U, 27U, 35U, 43U, 51U, 59U, 12U, 20U, 28U, 36U, 44U, 52U, 60U, 13U, 21U, 29U, 37U, 45U, 53U, 61U, 14U, 22U, 30U, 38U, 46U, 54U, 62U, 15U, 23U, 31U, 39U, 47U, 55U, 63U, 64U, 72U, 80U, 88U, 96U, 104U, 112U, + 65U, 73U, 81U, 89U, 97U, 105U, 113U, 66U, 74U, 82U, 90U, 98U, 106U, 114U, 67U, 75U, 83U, 91U, 99U, 107U, 115U, 68U, 76U, 84U, 92U, 100U, 108U, 116U, 69U, 77U, 85U, 93U, 101U, 109U, 117U, 70U, 78U, 86U, 94U, 102U, 110U, 118U, 71U, 79U, 87U, 95U, 103U, 111U, 119U, 1U, 120U, 128U, 136U, 144U, 152U, 160U, + 168U, 121U, 129U, 137U, 145U, 153U, 161U, 169U, 122U, 130U, 138U, 146U, 154U, 162U, 170U, 123U, 131U, 139U, 147U, 155U, 163U, 171U, 124U, 132U, 140U, 148U, 156U, 164U, 172U, 125U, 133U, 141U, 149U, 157U, 165U, 173U, 126U, 134U, 142U, 150U, 158U, 166U, 174U, 127U, 135U, 143U, 151U, 159U, 167U, 175U, 2U, 176U, 184U, 192U, 200U, 208U, + 216U, 224U, 177U, 185U, 193U, 201U, 209U, 217U, 225U, 178U, 186U, 194U, 202U, 210U, 218U, 226U, 179U, 187U, 195U, 203U, 211U, 219U, 227U, 180U, 188U, 196U, 204U, 212U, 220U, 228U, 181U, 189U, 197U, 205U, 213U, 221U, 229U, 182U, 190U, 198U, 206U, 214U, 222U, 230U, 183U, 191U, 199U, 207U, 215U, 223U, 231U, 232U, 240U, 248U, 256U, 264U, + 272U, 280U, 233U, 241U, 249U, 257U, 265U, 273U, 281U, 234U, 242U, 250U, 258U, 266U, 274U, 282U, 235U, 243U, 251U, 259U, 267U, 275U, 283U, 236U, 244U, 252U, 260U, 268U, 276U, 284U, 237U, 245U, 253U, 261U, 269U, 277U, 285U, 238U, 246U, 254U, 262U, 270U, 278U, 286U, 239U, 247U, 255U, 263U, 271U, 279U, 287U, 3U, 288U, 296U, 304U, 312U, + 320U, 328U, 336U, 289U, 297U, 305U, 313U, 321U, 329U, 337U, 290U, 298U, 306U, 314U, 322U, 330U, 338U, 291U, 299U, 307U, 315U, 323U, 331U, 339U, 292U, 300U, 308U, 316U, 324U, 332U, 340U, 293U, 301U, 309U, 317U, 325U, 333U, 341U, 294U, 302U, 310U, 318U, 326U, 334U, 342U, 295U, 303U, 311U, 319U, 327U, 335U, 343U, 4U, 344U, 352U, 360U, + 368U, 376U, 384U, 392U, 345U, 353U, 361U, 369U, 377U, 385U, 393U, 346U, 354U, 362U, 370U, 378U, 386U, 394U, 347U, 355U, 363U, 371U, 379U, 387U, 395U, 348U, 356U, 364U, 372U, 380U, 388U, 396U, 349U, 357U, 365U, 373U, 381U, 389U, 397U, 350U, 358U, 366U, 374U, 382U, 390U, 398U, 351U, 359U, 367U, 375U, 383U, 391U, 399U, 5U, 400U, 408U, + 416U, 424U, 432U, 440U, 448U, 401U, 409U, 417U, 425U, 433U, 441U, 449U, 402U, 410U, 418U, 426U, 434U, 442U, 450U, 403U, 411U, 419U, 427U, 435U, 443U, 451U, 404U, 412U, 420U, 428U, 436U, 444U, 452U, 405U, 413U, 421U, 429U, 437U, 445U, 453U, 406U, 414U, 422U, 430U, 438U, 446U, 454U, 407U, 415U, 423U, 431U, 439U, 447U, 455U, 456U, 464U, + 472U, 480U, 488U, 496U, 504U, 457U, 465U, 473U, 481U, 489U, 497U, 505U, 458U, 466U, 474U, 482U, 490U, 498U, 506U, 459U, 467U, 475U, 483U, 491U, 499U, 507U, 460U, 468U, 476U, 484U, 492U, 500U, 508U, 461U, 469U, 477U, 485U, 493U, 501U, 509U, 462U, 470U, 478U, 486U, 494U, 502U, 510U, 463U, 471U, 479U, 487U, 495U, 503U, 511U, 6U, 512U, + 520U, 528U, 536U, 544U, 552U, 560U, 513U, 521U, 529U, 537U, 545U, 553U, 561U, 514U, 522U, 530U, 538U, 546U, 554U, 562U, 515U, 523U, 531U, 539U, 547U, 555U, 563U, 516U, 524U, 532U, 540U, 548U, 556U, 564U, 517U, 525U, 533U, 541U, 549U, 557U, 565U, 518U, 526U, 534U, 542U, 550U, 558U, 566U, 519U, 527U, 535U, 543U, 551U, 559U, 567U, 7U, 568U, 576U, 584U, 592U, 600U, 608U, 616U, 569U, 577U, 585U, 593U, 601U, 609U, 617U, 570U, 578U, 586U, 594U, 602U, 610U, 618U, 571U, 579U, 587U, 595U, 603U, 611U, 619U, 572U, 580U, 588U, 596U, 604U, 612U, 620U, 573U, 581U, 589U, 597U, 605U, 613U, 621U, 574U, 582U, 590U, 598U, 606U, 614U, 622U, 575U, 583U, 591U, 599U, 607U, 615U, 623U, // clang-format on }; @@ -517,15 +736,14 @@ TYPED_TEST(rocrand_mt19937_prng_tests, subsequence_test) HIP_CHECK(hipMalloc(&d_engines, generator_count * state_size * sizeof(unsigned int))); // dummy config provider, kernel just needs to verify the amount of generators for the actual call - using ConfigProvider - = rocrand_host::detail::default_config_provider; + using ConfigProvider = default_config_provider; hipLaunchKernelGGL( HIP_KERNEL_NAME( - rocrand_host::detail:: - jump_ahead_kernel), + + jump_ahead_kernel), dim3(generator_count), - dim3(rocrand_mt19937::jump_ahead_thread_count), + dim3(generator_t::jump_ahead_thread_count), 0, 0, d_engines, @@ -634,20 +852,17 @@ TYPED_TEST(rocrand_mt19937_prng_tests, subsequence_test) struct mt19937_engine { - static constexpr inline unsigned int m = rocrand_host::detail::mt19937_constants::m; - static constexpr inline unsigned int mexp = rocrand_host::detail::mt19937_constants::mexp; - static constexpr inline unsigned int matrix_a - = rocrand_host::detail::mt19937_constants::matrix_a; - static constexpr inline unsigned int upper_mask - = rocrand_host::detail::mt19937_constants::upper_mask; - static constexpr inline unsigned int lower_mask - = rocrand_host::detail::mt19937_constants::lower_mask; + static constexpr inline unsigned int m = mt19937_constants::m; + static constexpr inline unsigned int mexp = mt19937_constants::mexp; + static constexpr inline unsigned int matrix_a = mt19937_constants::matrix_a; + static constexpr inline unsigned int upper_mask = mt19937_constants::upper_mask; + static constexpr inline unsigned int lower_mask = mt19937_constants::lower_mask; // Jumping constants. static constexpr inline unsigned int qq = 7; static constexpr inline unsigned int ll = 1U << qq; - static constexpr inline unsigned int n = rocrand_host::detail::mt19937_constants::n; + static constexpr inline unsigned int n = mt19937_constants::n; struct mt19937_state { @@ -887,7 +1102,7 @@ struct mt19937_engine } }; -TYPED_TEST(rocrand_mt19937_prng_tests, jump_ahead_test) +TYPED_TEST(mt19937_generator_engine_tests, jump_ahead_test) { // Compare states of all engines // * computed consecutively on host using Sliding window algorithm @@ -896,14 +1111,16 @@ TYPED_TEST(rocrand_mt19937_prng_tests, jump_ahead_test) // with precomputed jumps of i * 2 ^ 1000 and mt19937_jumps_radix * i * 2 ^ 1000 values // where i is in range [1; mt19937_jumps_radix). + using generator_t = typename TestFixture::generator_t; + const unsigned long long seed = 12345678; - constexpr unsigned int n = rocrand_host::detail::mt19937_constants::n; + constexpr unsigned int n = mt19937_constants::n; // Test for default config - using ConfigProvider - = rocrand_host::detail::default_config_provider; - rocrand_host::detail::generator_config config; - HIP_CHECK(ConfigProvider::host_config(0, TestFixture::ordering, config)); + using ConfigProvider = default_config_provider; + generator_config config; + HIP_CHECK( + ConfigProvider::host_config(0, ROCRAND_ORDERING_PSEUDO_DEFAULT, config)); const unsigned int generator_count = config.threads * config.blocks / mt19937_octo_engine::threads_per_generator; @@ -932,22 +1149,21 @@ TYPED_TEST(rocrand_mt19937_prng_tests, jump_ahead_test) unsigned int* d_engines1{}; HIP_CHECK(hipMalloc(&d_engines1, generator_count * n * sizeof(unsigned int))); - rocrand_host::detail::dynamic_dispatch( - TestFixture::ordering, - [&](auto is_dynamic) - { - hipLaunchKernelGGL(HIP_KERNEL_NAME(rocrand_host::detail::jump_ahead_kernel< - rocrand_mt19937::jump_ahead_thread_count, - ConfigProvider, - is_dynamic>), - dim3(generator_count), - dim3(rocrand_mt19937::jump_ahead_thread_count), - 0, - 0, - d_engines1, - seed, - d_mt19937_jump); - }); + dynamic_dispatch(ROCRAND_ORDERING_PSEUDO_DEFAULT, + [&](auto is_dynamic) + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(jump_ahead_kernel), + dim3(generator_count), + dim3(generator_t::jump_ahead_thread_count), + 0, + 0, + d_engines1, + seed, + d_mt19937_jump); + }); std::vector h_engines1(generator_count * n); HIP_CHECK(hipMemcpy(h_engines1.data(), @@ -975,530 +1191,3 @@ TYPED_TEST(rocrand_mt19937_prng_tests, jump_ahead_test) HIP_CHECK(hipFree(d_mt19937_jump)); HIP_CHECK(hipFree(d_engines1)); } - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned int divisor = 1) -{ - rocrand_host::detail::generator_config config; - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const unsigned int generator_count - = config.threads * config.blocks / mt19937_octo_engine::threads_per_generator; - - const size_t stride = rocrand_host::detail::mt19937_constants::n * generator_count * divisor; - - // Large sizes are used for triggering all code paths in the kernels (generating of middle, - // start and end sequences). - std::vector sizes0{stride, - 2, - stride, - 100, - 1, - 24783, - stride / 2, - 3 * stride + 704400, - 2, - stride + 776543, - 44176}; - std::vector sizes1{2 * stride, - 1024, - 55, - 65536, - stride / 2, - stride + 623456, - 3 * stride - 300000, - 1048576, - 111331}; - - // Round by the distribution's granularity (2 for normals, 2 for short and half, 4 for uchar). - // Sizes not divisible by the granularity or pointers not aligned by it work but without strict - // continuity. - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - const size_t size2 = std::min(size0, size1); - - GeneratorType g0; - g0.set_order(ordering); - GeneratorType g1; - g1.set_order(ordering); - GeneratorType g2; - g2.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - std::vector host_data2(size2); - - size_t current0 = 0; - for(size_t s : sizes0) - { - T* data0; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * s)); - HIP_CHECK(hipMemset(data0, -1, sizeof(T) * s)); - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - HIP_CHECK(hipFree(data0)); - } - size_t current1 = 0; - for(size_t s : sizes1) - { - T* data1; - HIP_CHECK(hipMalloc(&data1, sizeof(T) * s)); - HIP_CHECK(hipMemset(data1, -1, sizeof(T) * s)); - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - HIP_CHECK(hipFree(data1)); - } - T* data2; - HIP_CHECK(hipMalloc(&data2, sizeof(T) * size2)); - HIP_CHECK(hipMemset(data2, -1, sizeof(T) * size2)); - generate_func(g2, data2, size2); - HIP_CHECK(hipMemcpy(host_data2.data(), data2, sizeof(T) * size2, hipMemcpyDefault)); - HIP_CHECK(hipFree(data2)); - - size_t incorrect = 0; - for(size_t i = 0; i < size2; i++) - { - if constexpr(std::is_same::value) - { - if(__half2float(host_data0[i]) != __half2float(host_data1[i]) - || __half2float(host_data0[i]) != __half2float(host_data2[i])) - { - incorrect++; - } - } - else - { - if(host_data0[i] != host_data1[i] || host_data0[i] != host_data2[i]) - { - incorrect++; - } - } - } - ASSERT_EQ(incorrect, 0); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_uniform_uint_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](generator_t& g, unsigned int* data, size_t s) { g.generate(data, s); }, - ordering); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_uniform_char_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](generator_t& g, unsigned char* data, size_t s) { g.generate(data, s); }, - ordering, - 4); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_uniform_short_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](generator_t& g, unsigned short* data, size_t s) { g.generate(data, s); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_uniform_float_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](rocrand_mt19937& g, float* data, size_t s) { g.generate_uniform(data, s); }, - ordering); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_uniform_half_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test<__half, generator_t, ConfigProvider>([](generator_t& g, __half* data, size_t s) - { g.generate_uniform(data, s); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_uniform_double_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test([](generator_t& g, double* data, size_t s) - { g.generate_uniform(data, s); }, - ordering); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_normal_float_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test([](generator_t& g, float* data, size_t s) - { g.generate_normal(data, s, 0.0f, 1.0f); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_normal_double_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test([](generator_t& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_log_normal_float_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](generator_t& g, float* data, size_t s) { g.generate_log_normal(data, s, 0.0f, 1.0f); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_log_normal_double_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](generator_t& g, double* data, size_t s) { g.generate_log_normal(data, s, 0.0, 1.0); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, continuity_poisson_test) -{ - constexpr rocrand_ordering ordering = TestFixture::ordering; - using generator_t = typename TestFixture::generator_t; - using ConfigProvider = rocrand_host::detail::default_config_provider; - continuity_test( - [](generator_t& g, unsigned int* data, size_t s) { g.generate_poisson(data, s, 100.0); }, - ordering); -} - -// Check that that heads and tails are generated correctly for misaligned pointers or sizes. -template -void head_and_tail_test(GenerateFunc generate_func, rocrand_ordering ordering, unsigned int divisor) -{ - rocrand_host::detail::generator_config config; - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const unsigned int generator_count - = config.threads * config.blocks / mt19937_octo_engine::threads_per_generator; - - const size_t stride = rocrand_host::detail::mt19937_constants::n * generator_count * divisor; - - // Large sizes are used for triggering all code paths in the kernels. - std::vector - sizes{stride, 1, stride * 2 + 45651, 5, stride * 3 + 123, 6, 45, stride - 12}; - - const size_t max_size = *std::max_element(sizes.cbegin(), sizes.cend()); - const size_t canary_size = 16; - const size_t max_size_with_canary = max_size + canary_size * 2; - - const T canary = std::numeric_limits::max(); - - GeneratorType g; - g.set_order(ordering); - - std::vector host_data(max_size_with_canary); - T* data; - HIP_CHECK(hipMalloc(&data, sizeof(T) * max_size_with_canary)); - - for(size_t offset : {0, 1, 2, 3}) - { - for(size_t s : sizes) - { - const size_t s_with_canary = s + canary_size * 2; - for(size_t i = 0; i < s_with_canary; i++) - { - host_data[i] = canary; - } - HIP_CHECK( - hipMemcpy(data, host_data.data(), sizeof(T) * s_with_canary, hipMemcpyDefault)); - - generate_func(g, data + canary_size + offset, s); - - HIP_CHECK( - hipMemcpy(host_data.data(), data, sizeof(T) * s_with_canary, hipMemcpyDefault)); - - // Check that the generator does not write more values than needed for head and tail - // (so canary areas, or memory before and after data passed to generate(), are intact) - for(size_t i = 0; i < canary_size + offset; i++) - { - ASSERT_EQ(host_data[i], canary); - } - for(size_t i = s_with_canary - (canary_size - offset); i < s_with_canary; i++) - { - ASSERT_EQ(host_data[i], canary); - } - - // Check if head and tail are generated (canary value, used as an initial value, - // can not be generated because it is not in the range of the distribution) - size_t incorrect = 0; - for(size_t i = canary_size + offset; i < s_with_canary - (canary_size - offset); i++) - { - if(host_data[i] == canary) - { - incorrect++; - } - } - ASSERT_EQ(incorrect, 0); - } - } - HIP_CHECK(hipFree(data)); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, head_and_tail_normal_float_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - head_and_tail_test( - [](rocrand_mt19937& g, float* data, size_t s) { g.generate_normal(data, s, 0.0f, 1.0f); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, head_and_tail_normal_double_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - head_and_tail_test( - [](rocrand_mt19937& g, double* data, size_t s) { g.generate_normal(data, s, 0.0, 1.0); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, head_and_tail_log_normal_float_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - head_and_tail_test( - [](rocrand_mt19937& g, float* data, size_t s) - { g.generate_log_normal(data, s, 0.0f, 1.0f); }, - ordering, - 2); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, head_and_tail_log_normal_double_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - head_and_tail_test( - [](rocrand_mt19937& g, double* data, size_t s) - { g.generate_log_normal(data, s, 0.0, 1.0); }, - ordering, - 2); -} - -// Check if changing distribution sets m_start_input correctly -template -void change_distribution_test(GenerateFunc0 generate_func0, - GenerateFunc1 generate_func1, - size_t size0, - size_t start1, - rocrand_ordering ordering) -{ - SCOPED_TRACE(testing::Message() << "size0 = " << size0 << " start1 = " << start1); - - rocrand_host::detail::generator_config config; - // Configs for mt19937 are independent of type, so just use T0 - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const size_t size1 = config.threads * config.blocks * 3; - - T0* data0; - T1* data10; - T1* data11; - HIP_CHECK(hipMalloc(&data0, sizeof(T0) * size0)); - HIP_CHECK(hipMalloc(&data10, sizeof(T1) * size1)); - HIP_CHECK(hipMalloc(&data11, sizeof(T1) * (start1 + size1))); - - GeneratorType g0; - // Generate the first distribution - generate_func0(g0, data0, size0); - // Change distribution to the second - generate_func1(g0, data10, size1); - - GeneratorType g1; - // Generate the second distribution considering that first `start1` values correspond to - // `size0` values of the first distribution and some discarded values - generate_func1(g1, data11, start1 + size1); - - std::vector host_data10(size1); - std::vector host_data11(size1); - HIP_CHECK(hipMemcpy(host_data10.data(), data10, sizeof(T1) * size1, hipMemcpyDefault)); - // Ignore `start1` values - HIP_CHECK(hipMemcpy(host_data11.data(), data11 + start1, sizeof(T1) * size1, hipMemcpyDefault)); - - for(size_t i = 0; i < size1; i++) - { - ASSERT_EQ(host_data10[i], host_data11[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data10)); - HIP_CHECK(hipFree(data11)); -} - -TYPED_TEST(rocrand_mt19937_prng_tests, change_distribution0_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - rocrand_host::detail::generator_config config; - // Configs for mt19937 are independent, just use void - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const size_t s = config.threads * config.blocks; - - // Larger type (normal float) to smaller type (uniform uint) - std::vector> test_cases{ - { (s + 4) * 2, s * 4}, - {(s * 2 + s - 10) * 2, s * 6}, - { (s * 3) * 2, s * 6}, - { (s * 4) * 2, s * 8}, - }; - for(auto test_case : test_cases) - { - change_distribution_test( - [](rocrand_mt19937& g, float* data, size_t s) - { g.generate_normal(data, s, 0.0f, 1.0f); }, - [](rocrand_mt19937& g, unsigned int* data, size_t s) { g.generate(data, s); }, - test_case.first, - test_case.second, - ordering); - } -} - -TYPED_TEST(rocrand_mt19937_prng_tests, change_distribution1_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - rocrand_host::detail::generator_config config; - // Configs for mt19937 are independent, just use void - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const size_t s = config.threads * config.blocks; - - // Smaller type (uniform float) to larger type (normal double) - std::vector> test_cases{ - {s * 2 + 100, (s * 1) * 2}, - { s * 4 + 10, (s * 2) * 2}, - { s * 2, (s * 1) * 2}, - { s * 8, (s * 2) * 2}, - { s * 77, (s * 19) * 2} - }; - for(auto test_case : test_cases) - { - change_distribution_test( - [](rocrand_mt19937& g, float* data, size_t s) { g.generate_uniform(data, s); }, - [](rocrand_mt19937& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - test_case.first, - test_case.second, - ordering); - } -} - -TYPED_TEST(rocrand_mt19937_prng_tests, change_distribution2_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - rocrand_host::detail::generator_config config; - // Configs for mt19937 are independent, just use void - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const size_t s = config.threads * config.blocks; - - // Smaller type (uniform double) to larger type (normal double) - std::vector> test_cases{ - {s * 2 + 400, (s * 2) * 2}, - { s * 5 + 10, (s * 3) * 2}, - { s * 3, (s * 2) * 2}, - { s * 4, (s * 2) * 2}, - }; - for(auto test_case : test_cases) - { - change_distribution_test( - [](rocrand_mt19937& g, double* data, size_t s) { g.generate_uniform(data, s); }, - [](rocrand_mt19937& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - test_case.first, - test_case.second, - ordering); - } -} - -TYPED_TEST(rocrand_mt19937_prng_tests, change_distribution3_test) -{ - using generator_t = typename TestFixture::generator_t; - constexpr rocrand_ordering ordering = TestFixture::ordering; - using ConfigProvider = rocrand_host::detail::default_config_provider; - rocrand_host::detail::generator_config config; - // Configs for mt19937 are independent, just use void - HIP_CHECK(ConfigProvider::template host_config(0, ordering, config)); - - const size_t s = config.threads * config.blocks; - - // Larger type (normal double) to smaller type (uniform ushort) - std::vector> test_cases{ - { 100 * 2, s * 8}, - {(s + 10) * 2, s * 16}, - { (s * 2) * 2, s * 16}, - { (s * 3) * 2, s * 24}, - }; - for(auto test_case : test_cases) - { - change_distribution_test( - [](rocrand_mt19937& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - [](rocrand_mt19937& g, unsigned short* data, size_t s) { g.generate(data, s); }, - test_case.first, - test_case.second, - ordering); - } -} diff --git a/test/internal/test_rocrand_mtgp32_prng.cpp b/test/internal/test_rocrand_mtgp32_prng.cpp index 13a8e17c..aec3bfe7 100644 --- a/test/internal/test_rocrand_mtgp32_prng.cpp +++ b/test/internal/test_rocrand_mtgp32_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,285 +18,30 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include - -#include -#include - -#include -#include - #include "test_common.hpp" #include "test_rocrand_common.hpp" +#include "test_rocrand_prng.hpp" +#include -struct rocrand_mtgp32_prng_tests : public testing::TestWithParam -{ - rocrand_mtgp32 get_generator() const - { - rocrand_mtgp32 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - rocrand_mtgp32_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -TEST_P(rocrand_mtgp32_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - rocrand_mtgp32 g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_mtgp32_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_mtgp32 g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_mtgp32_prng_tests, normal_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_mtgp32 g = get_generator(); - ROCRAND_CHECK(g.generate_normal(data, size, 2.0f, 5.0f)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - float mean = 0.0f; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i]; - } - mean = mean / size; - - float std = 0.0f; - for(size_t i = 0; i < size; i++) - { - std += std::pow(host_data[i] - mean, 2); - } - std = sqrt(std / size); - - EXPECT_NEAR(2.0f, mean, 0.4f); // 20% - EXPECT_NEAR(5.0f, std, 1.0f); // 20% - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_mtgp32_prng_tests, poisson_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - rocrand_mtgp32 g = get_generator(); - ROCRAND_CHECK(g.generate_poisson(data, size, 5.5)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double mean = 0.0; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i]; - } - mean = mean / size; - - double var = 0.0; - for(size_t i = 0; i < size; i++) - { - double x = host_data[i] - mean; - var += x * x; - } - var = var / size; - - EXPECT_NEAR(mean, 5.5, std::max(1.0, 5.5 * 1e-2)); - EXPECT_NEAR(var, 5.5, std::max(1.0, 5.5 * 1e-2)); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(rocrand_mtgp32_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generator - rocrand_mtgp32 g0 = get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_mtgp32_prng_tests, same_seed_test) -{ - const unsigned long long seed = 5ULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_mtgp32 g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_mtgp32_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 5ULL; - const unsigned long long seed1 = 10ULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_mtgp32 g0 = get_generator(), g1 = get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); +#include - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); +#include - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); +#include - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); +using rocrand_impl::host::mtgp32_generator; - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); +// Generator API tests +using mtgp32_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); +INSTANTIATE_TYPED_TEST_SUITE_P(mtgp32_generator, + generator_prng_tests, + mtgp32_generator_prng_tests_types); - HIP_CHECK(hipFree(data)); -} +// Continuity cannot be implemented for MTGP32, as 'offset' is not supported for this +// generator. Therefore, continuity tests fail. +// INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_mtgp32, +// generator_prng_continuity_tests, +// rocrand_mtgp32_generator_prng_tests_types); diff --git a/test/internal/test_rocrand_philox_prng.cpp b/test/internal/test_rocrand_philox_prng.cpp index 4ffaeed9..7470cabc 100644 --- a/test/internal/test_rocrand_philox_prng.cpp +++ b/test/internal/test_rocrand_philox_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,235 +18,53 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include -#include - -#include -#include - -#include -#include - #include "test_common.hpp" #include "test_rocrand_common.hpp" +#include "test_rocrand_prng.hpp" +#include -struct rocrand_philox_prng_tests : public testing::TestWithParam -{ - rocrand_philox4x32_10 get_generator() const - { - rocrand_philox4x32_10 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - rocrand_philox_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -TEST_P(rocrand_philox_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * (size + 1))); - - rocrand_philox4x32_10 g = get_generator(); - ROCRAND_CHECK(g.generate(data + 1, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data + 1, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_philox_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_philox4x32_10 g = get_generator(); - g.set_order(GetParam()); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(rocrand_philox_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generator - rocrand_philox4x32_10 g0 = get_generator(); - g0.set_order(GetParam()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_philox_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_philox4x32_10 g0 = get_generator(), g1 = get_generator(); - g0.set_order(GetParam()); - g1.set_order(GetParam()); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_philox_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_philox4x32_10 g0 = get_generator(), g1 = get_generator(); - g0.set_order(GetParam()); - g1.set_order(GetParam()); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} +#include -/// -/// rocrand_philox_prng_state_tests TEST GROUP -/// +#include -// Just get access to internal state -class rocrand_philox4x32_10_engine_type_test : public rocrand_philox4x32_10::engine_type +using rocrand_impl::host::philox4x32_10_generator; + +// Generator API tests +using philox_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; + +using philox_generator_prng_offset_tests_types + = ::testing::Types, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(philox4x32_10_generator, + generator_prng_tests, + philox_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(philox4x32_10_generator, + generator_prng_continuity_tests, + philox_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(philox4x32_10_generator, + generator_prng_offset_tests, + philox_generator_prng_offset_tests_types); + +// Engine API tests +class philox4x32_10_engine_type_test : public philox4x32_10_generator::engine_type { public: - __host__ rocrand_philox4x32_10_engine_type_test() : rocrand_philox4x32_10::engine_type(0, 0, 0) - {} + __host__ philox4x32_10_engine_type_test() : philox4x32_10_generator::engine_type(0, 0, 0) {} __host__ state_type& internal_state_ref() { @@ -254,10 +72,10 @@ class rocrand_philox4x32_10_engine_type_test : public rocrand_philox4x32_10::eng } }; -TEST(rocrand_philox_prng_state_tests, seed_test) +TEST(philox_prng_state_tests, seed_test) { - rocrand_philox4x32_10_engine_type_test engine; - rocrand_philox4x32_10_engine_type_test::state_type& state = engine.internal_state_ref(); + philox4x32_10_engine_type_test engine; + philox4x32_10_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0U); EXPECT_EQ(state.counter.y, 0U); @@ -279,10 +97,10 @@ TEST(rocrand_philox_prng_state_tests, seed_test) // Check if the philox state counter is calculated correctly during // random number generation. -TEST(rocrand_philox_prng_state_tests, discard_test) +TEST(philox_prng_state_tests, discard_test) { - rocrand_philox4x32_10_engine_type_test engine; - rocrand_philox4x32_10_engine_type_test::state_type& state = engine.internal_state_ref(); + philox4x32_10_engine_type_test engine; + philox4x32_10_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0U); EXPECT_EQ(state.counter.y, 0U); @@ -356,10 +174,10 @@ TEST(rocrand_philox_prng_state_tests, discard_test) EXPECT_EQ(state.counter.w, 4U); } -TEST(rocrand_philox_prng_state_tests, discard_sequence_test) +TEST(philox_prng_state_tests, discard_sequence_test) { - rocrand_philox4x32_10_engine_type_test engine; - rocrand_philox4x32_10_engine_type_test::state_type& state = engine.internal_state_ref(); + philox4x32_10_engine_type_test engine; + philox4x32_10_engine_type_test::state_type& state = engine.internal_state_ref(); engine.discard_subsequence(UINT_MAX); EXPECT_EQ(state.counter.x, 0U); @@ -399,200 +217,3 @@ TEST(rocrand_philox_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.z, 0U); EXPECT_EQ(state.counter.w, 6U); } - -template -struct rocrand_philox_prng_offset : public ::testing::Test -{ - using type = T; - - rocrand_philox4x32_10 get_generator() const - { - rocrand_philox4x32_10 g; - if(g.set_order(T::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_philox_prng_offset_params -{ - using output_type = T; - static constexpr rocrand_ordering ordering = Ordering; -}; - -using RocrandPhiloxPrngOffsetTypes = ::testing::Types< - rocrand_philox_prng_offset_params, - rocrand_philox_prng_offset_params, - rocrand_philox_prng_offset_params, - rocrand_philox_prng_offset_params>; -TYPED_TEST_SUITE(rocrand_philox_prng_offset, RocrandPhiloxPrngOffsetTypes); - -TYPED_TEST(rocrand_philox_prng_offset, offsets_test) -{ - using params = typename TestFixture::type; - using T = typename params::output_type; - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 4, 11, 65536, 112233}; - - for(const auto offset : offsets) - { - SCOPED_TRACE(::testing::Message() << "with offset=" << offset); - - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_philox4x32_10 g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - rocrand_philox4x32_10 g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned int divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543, 1048576}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 1048576, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_philox4x32_10 g0; - g0.set_order(ordering); - rocrand_philox4x32_10 g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TEST_P(rocrand_philox_prng_tests, continuity_uniform_uint_test) -{ - continuity_test([](rocrand_philox4x32_10& g, unsigned int* data, size_t s) - { g.generate(data, s); }, - GetParam()); -} - -TEST_P(rocrand_philox_prng_tests, continuity_uniform_char_test) -{ - continuity_test([](rocrand_philox4x32_10& g, unsigned char* data, size_t s) - { g.generate(data, s); }, - GetParam(), - 4); -} - -TEST_P(rocrand_philox_prng_tests, continuity_uniform_float_test) -{ - continuity_test([](rocrand_philox4x32_10& g, float* data, size_t s) - { g.generate_uniform(data, s); }, - GetParam()); -} - -TEST_P(rocrand_philox_prng_tests, continuity_uniform_double_test) -{ - continuity_test([](rocrand_philox4x32_10& g, double* data, size_t s) - { g.generate_uniform(data, s); }, - GetParam()); -} - -TEST_P(rocrand_philox_prng_tests, continuity_normal_float_test) -{ - continuity_test([](rocrand_philox4x32_10& g, float* data, size_t s) - { g.generate_normal(data, s, 0.0f, 1.0f); }, - GetParam(), - 2); -} - -TEST_P(rocrand_philox_prng_tests, continuity_normal_double_test) -{ - continuity_test([](rocrand_philox4x32_10& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - GetParam(), - 2); -} - -TEST_P(rocrand_philox_prng_tests, continuity_log_normal_float_test) -{ - continuity_test([](rocrand_philox4x32_10& g, float* data, size_t s) - { g.generate_log_normal(data, s, 0.0f, 1.0f); }, - GetParam(), - 2); -} - -TEST_P(rocrand_philox_prng_tests, continuity_log_normal_double_test) -{ - continuity_test([](rocrand_philox4x32_10& g, double* data, size_t s) - { g.generate_log_normal(data, s, 0.0, 1.0); }, - GetParam(), - 2); -} - -TEST_P(rocrand_philox_prng_tests, continuity_poisson_test) -{ - continuity_test([](rocrand_philox4x32_10& g, unsigned int* data, size_t s) - { g.generate_poisson(data, s, 100.0); }, - GetParam()); -} diff --git a/test/internal/test_rocrand_prng.hpp b/test/internal/test_rocrand_prng.hpp new file mode 100644 index 00000000..5ffe372d --- /dev/null +++ b/test/internal/test_rocrand_prng.hpp @@ -0,0 +1,775 @@ +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ROCRAND_TEST_INTERNAL_TEST_ROCRAND_PRNG_HPP_ +#define ROCRAND_TEST_INTERNAL_TEST_ROCRAND_PRNG_HPP_ + +#include "test_common.hpp" +#include "test_rocrand_common.hpp" +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +// +// General generator host API tests +// +template +struct generator_prng_tests : public testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; + + auto get_generator() const + { + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) + { + throw std::runtime_error("Could not set ordering for generator"); + } + return g; + } +}; + +template +struct generator_prng_tests_params +{ + using generator_t = Generator; + static inline constexpr rocrand_ordering ordering = Ordering; +}; + +TYPED_TEST_SUITE_P(generator_prng_tests); + +TYPED_TEST_P(generator_prng_tests, init_test) +{ + auto g = TestFixture::get_generator(); // offset = 0 + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1337); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1048576); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1 << 24); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1 << 28); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset((1ULL << 36) + 1234567ULL); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); +} + +TYPED_TEST_P(generator_prng_tests, uniform_uint_test) +{ + const size_t size = 1313; + unsigned int* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * (size + 1))); + + auto g = TestFixture::get_generator(); + ROCRAND_CHECK(g.generate_uniform(data + 1, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int host_data[size]; + HIP_CHECK(hipMemcpy(host_data, data + 1, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned long long sum = 0; + for(size_t i = 0; i < size; i++) + { + sum += host_data[i]; + } + const unsigned int mean = sum / size; + ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); + + HIP_CHECK(hipFree(data)); +} + +template +void uniform_floating_point_test(rocrand_ordering ordering) +{ + const size_t size = 1313; + T* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(*data) * size)); + + Generator g; + g.set_order(ordering); + ROCRAND_CHECK(g.generate_uniform(data, size)); + + T host_data[size]; + HIP_CHECK(hipMemcpy(host_data, data, sizeof(*host_data) * size, hipMemcpyDeviceToHost)); + + double sum = 0; + for(size_t i = 0; i < size; i++) + { + ASSERT_GT(host_data[i], static_cast(0.0)); + ASSERT_LE(host_data[i], static_cast(1.0)); + sum += host_data[i]; + } + const double mean = sum / size; + ASSERT_NEAR(mean, 0.5, 0.05); + + HIP_CHECK(hipFree(data)); +} + +TYPED_TEST_P(generator_prng_tests, uniform_float_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + uniform_floating_point_test(ordering); +} + +TYPED_TEST_P(generator_prng_tests, uniform_double_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + uniform_floating_point_test(ordering); +} + +template +void normal_floating_point_test(rocrand_ordering ordering) +{ + const size_t size = 1313; + T* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(*data) * size)); + + Generator g; + g.set_order(ordering); + ROCRAND_CHECK(g.generate_normal(data, size, static_cast(2.0), static_cast(5.0))); + HIP_CHECK(hipDeviceSynchronize()); + + T host_data[size]; + HIP_CHECK(hipMemcpy(host_data, data, sizeof(*host_data) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + double mean = 0.0; + for(size_t i = 0; i < size; i++) + { + mean += host_data[i]; + } + mean = mean / size; + + double stddev = 0.0f; + for(size_t i = 0; i < size; i++) + { + stddev += std::pow(host_data[i] - mean, 2); + } + stddev = std::sqrt(stddev / size); + + EXPECT_NEAR(2.0, mean, 0.4); // 20% + EXPECT_NEAR(5.0, stddev, 1.0); // 20% + + HIP_CHECK(hipFree(data)); +} + +TYPED_TEST_P(generator_prng_tests, normal_float_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + normal_floating_point_test(ordering); +} + +TYPED_TEST_P(generator_prng_tests, normal_double_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + normal_floating_point_test(ordering); +} + +template +void log_normal_floating_point_test(rocrand_ordering ordering) +{ + const size_t size = 131313; + T* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(*data) * size)); + + T normal_mean = static_cast(3.0); + T normal_stddev = static_cast(1.5); + T normal_var = normal_stddev * normal_stddev; + + T log_normal_mean = std::exp(normal_mean + normal_var / 2.0); + T log_normal_stddev = std::sqrt(std::exp(normal_var) - 1.0) * log_normal_mean; + + Generator g; + g.set_order(ordering); + ROCRAND_CHECK(g.generate_log_normal(data, size, normal_mean, normal_stddev)); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector host_data(size); + HIP_CHECK(hipMemcpy(host_data.data(), data, sizeof(T) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + double mean = 0.0; + for(size_t i = 0; i < size; i++) + { + mean += host_data[i]; + } + mean = mean / size; + + double stddev = 0.0f; + for(size_t i = 0; i < size; i++) + { + stddev += std::pow(host_data[i] - mean, 2); + } + stddev = std::sqrt(stddev / size); + + EXPECT_NEAR(log_normal_mean, mean, log_normal_mean * 0.2); // 20% + EXPECT_NEAR(log_normal_stddev, stddev, log_normal_stddev * 0.2); // 20% + + HIP_CHECK(hipFree(data)); +} + +TYPED_TEST_P(generator_prng_tests, log_normal_float_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + log_normal_floating_point_test(ordering); +} + +TYPED_TEST_P(generator_prng_tests, log_normal_double_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + log_normal_floating_point_test(ordering); +} + +TYPED_TEST_P(generator_prng_tests, poisson_test) +{ + const size_t size = 1313; + unsigned int* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + + auto g = TestFixture::get_generator(); + ROCRAND_CHECK(g.generate_poisson(data, size, 5.5)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int host_data[size]; + HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + double mean = 0.0; + for(size_t i = 0; i < size; i++) + { + mean += host_data[i]; + } + mean = mean / size; + + double var = 0.0; + for(size_t i = 0; i < size; i++) + { + double x = host_data[i] - mean; + var += x * x; + } + var = var / size; + + EXPECT_NEAR(mean, 5.5, std::max(1.0, 5.5 * 1e-2)); + EXPECT_NEAR(var, 5.5, std::max(1.0, 5.5 * 1e-2)); + + HIP_CHECK(hipFree(data)); +} + +// Check if the numbers generated by first generate() call are different from +// the numbers generated by the 2nd call (same generator) +TYPED_TEST_P(generator_prng_tests, state_progress_test) +{ + // Device data + const size_t size = 1025; + unsigned int* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + + // Generator + auto g0 = TestFixture::get_generator(); + + // Generate using g0 and copy to host + ROCRAND_CHECK(g0.generate(data, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int host_data1[size]; + HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Generate using g0 and copy to host + ROCRAND_CHECK(g0.generate(data, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int host_data2[size]; + HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + size_t same = 0; + for(size_t i = 0; i < size; i++) + { + if(host_data1[i] == host_data2[i]) + same++; + } + // It may happen that numbers are the same, so we + // just make sure that most of them are different. + EXPECT_LT(same, static_cast(0.01f * size)); + HIP_CHECK(hipFree(data)); +} + +// Checks if generators with the same seed and in the same state +// generate the same numbers +TYPED_TEST_P(generator_prng_tests, same_seed_test) +{ + const unsigned long long seed = 0xdeadbeefdeadbeefULL; + + // Device side data + const size_t size = 1024; + unsigned int* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + + // Generators + auto g0 = TestFixture::get_generator(), g1 = TestFixture::get_generator(); + // Set same seeds + g0.set_seed(seed); + g1.set_seed(seed); + + // Generate using g0 and copy to host + ROCRAND_CHECK(g0.generate(data, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int g0_host_data[size]; + HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Generate using g1 and copy to host + ROCRAND_CHECK(g1.generate(data, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int g1_host_data[size]; + HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Numbers generated using same generator with same + // seed should be the same + for(size_t i = 0; i < size; i++) + { + ASSERT_EQ(g0_host_data[i], g1_host_data[i]); + } + HIP_CHECK(hipFree(data)); +} + +template +void different_seed_impl(rocrand_ordering ordering, + const unsigned long long seed0, + const unsigned long long seed1) +{ + // Device side data + const size_t size = 1024; + unsigned int* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + + // Generators + Generator g0, g1; + g0.set_order(ordering); + g1.set_order(ordering); + + // Set different seeds + g0.set_seed(seed0); + g1.set_seed(seed1); + ASSERT_NE(g0.get_seed(), g1.get_seed()); + + // Generate using g0 and copy to host + ROCRAND_CHECK(g0.generate(data, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int g0_host_data[size]; + HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Generate using g1 and copy to host + ROCRAND_CHECK(g1.generate(data, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int g1_host_data[size]; + HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + size_t same = 0; + for(size_t i = 0; i < size; i++) + { + if(g1_host_data[i] == g0_host_data[i]) + same++; + } + // It may happen that numbers are the same, so we + // just make sure that most of them are different. + EXPECT_LT(same, static_cast(0.01f * size)); + + HIP_CHECK(hipFree(data)); +} + +template +void different_seed(rocrand_ordering ordering) +{ + different_seed_impl(ordering, Seed0, Seed1); +} + +// mtgp32 uses a different seed +template<> +void different_seed(rocrand_ordering ordering) +{ + different_seed_impl(ordering, 5ULL, 10ULL); +} + +// mt19937 uses a different seed +template<> +void different_seed(rocrand_ordering ordering) +{ + different_seed_impl(ordering, 5ULL, 10ULL); +} + +// lsfr113 uses it's particular implementation +template<> +void different_seed(rocrand_ordering /*ordering*/) +{ + GTEST_SKIP() << "LFSR113 runs a custom implementation of different_seed test!"; +} + +TYPED_TEST_P(generator_prng_tests, different_seed_test) +{ + using generator_t = typename TestFixture::generator_t; + constexpr rocrand_ordering ordering = TestFixture::ordering; + + different_seed(ordering); +} + +// +// Generator host API continuity tests +// +// Check that subsequent generations of different sizes produce one +// sequence without gaps, no matter how many values are generated per call. +// +template +struct generator_prng_continuity_tests : public testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; +}; + +TYPED_TEST_SUITE_P(generator_prng_continuity_tests); + +template, bool> + = false> +void continuity_test(GenerateFunc generate_func, + rocrand_ordering ordering, + unsigned int divisor = 1) +{ + std::vector sizes0({100, 1, 24783, 3, 2, 776543, 1048576}); + std::vector sizes1({1024, 55, 65536, 623456, 30, 1048576, 111331}); + if(divisor > 1) + { + for(size_t& s : sizes0) + s = (s + divisor - 1) & ~static_cast(divisor - 1); + for(size_t& s : sizes1) + s = (s + divisor - 1) & ~static_cast(divisor - 1); + } + + const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); + const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); + + T* data0; + T* data1; + HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); + HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); + + Generator g0; + g0.set_order(ordering); + Generator g1; + g1.set_order(ordering); + + std::vector host_data0(size0); + std::vector host_data1(size1); + + size_t current0 = 0; + for(size_t s : sizes0) + { + generate_func(g0, data0, s); + HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); + current0 += s; + } + size_t current1 = 0; + for(size_t s : sizes1) + { + generate_func(g1, data1, s); + HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); + current1 += s; + } + + for(size_t i = 0; i < std::min(size0, size1); i++) + { + ASSERT_EQ(host_data0[i], host_data1[i]); + } + + HIP_CHECK(hipFree(data0)); + HIP_CHECK(hipFree(data1)); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_uniform_uint_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef unsigned int output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_uniform(data, s); }, + ordering, + rocrand_impl::host::uniform_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_uniform_char_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef unsigned char output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_uniform(data, s); }, + ordering, + rocrand_impl::host::uniform_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_uniform_float_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef float output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_uniform(data, s); }, + ordering, + rocrand_impl::host::uniform_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_uniform_double_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef double output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_uniform(data, s); }, + ordering, + rocrand_impl::host::uniform_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_normal_float_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef float output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_normal(data, s, 0.0f, 1.0f); }, + ordering, + rocrand_impl::host::normal_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_normal_double_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef double output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_normal(data, s, 0.0, 1.0); }, + ordering, + rocrand_impl::host::normal_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_log_normal_float_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef float output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) + { g.generate_log_normal(data, s, 0.0f, 1.0f); }, + ordering, + rocrand_impl::host::normal_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_log_normal_double_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef double output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_log_normal(data, s, 0.0, 1.0); }, + ordering, + rocrand_impl::host::normal_distribution::output_width); +} + +TYPED_TEST_P(generator_prng_continuity_tests, continuity_poisson_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef unsigned int output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_poisson(data, s, 100.0); }, + ordering, + rocrand_impl::host::poisson_distribution<>::output_width); +} + +template +struct generator_prng_offset_tests : public ::testing::Test +{ + using output_t = typename Params::output_t; + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; + + auto get_generator() const + { + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) + { + throw std::runtime_error("Could not set ordering for generator"); + } + return g; + } +}; + +// +// Generator host API offset tests +// +template +struct generator_prng_offset_tests_params +{ + using output_t = Output; + using generator_t = Generator; + static constexpr rocrand_ordering ordering = Ordering; +}; + +TYPED_TEST_SUITE_P(generator_prng_offset_tests); + +TYPED_TEST_P(generator_prng_offset_tests, offsets_test) +{ + using output_t = typename TestFixture::output_t; + const size_t size = 131313; + + constexpr size_t offsets[] = {0, 1, 4, 11, 65536, 112233}; + + for(const auto offset : offsets) + { + SCOPED_TRACE(::testing::Message() << "with offset=" << offset); + + const size_t size0 = size; + const size_t size1 = (size + offset); + output_t* data0; + output_t* data1; + HIP_CHECK(hipMalloc(&data0, sizeof(output_t) * size0)); + HIP_CHECK(hipMalloc(&data1, sizeof(output_t) * size1)); + + auto g0 = TestFixture::get_generator(); + g0.set_offset(offset); + g0.generate(data0, size0); + + auto g1 = TestFixture::get_generator(); + g1.generate(data1, size1); + + std::vector host_data0(size0); + std::vector host_data1(size1); + HIP_CHECK( + hipMemcpy(host_data0.data(), data0, sizeof(output_t) * size0, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_data1.data(), data1, sizeof(output_t) * size1, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + for(size_t i = 0; i < size; ++i) + { + ASSERT_EQ(host_data0[i], host_data1[i + offset]); + } + + HIP_CHECK(hipFree(data0)); + HIP_CHECK(hipFree(data1)); + } +} + +REGISTER_TYPED_TEST_SUITE_P(generator_prng_tests, + init_test, + uniform_uint_test, + uniform_float_test, + uniform_double_test, + normal_float_test, + normal_double_test, + log_normal_float_test, + log_normal_double_test, + poisson_test, + state_progress_test, + same_seed_test, + different_seed_test); + +REGISTER_TYPED_TEST_SUITE_P(generator_prng_continuity_tests, + continuity_uniform_uint_test, + continuity_uniform_char_test, + continuity_uniform_float_test, + continuity_uniform_double_test, + continuity_normal_float_test, + continuity_normal_double_test, + continuity_log_normal_float_test, + continuity_log_normal_double_test, + continuity_poisson_test); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(generator_prng_continuity_tests); + +REGISTER_TYPED_TEST_SUITE_P(generator_prng_offset_tests, offsets_test); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(generator_prng_offset_tests); + +#endif // ROCRAND_TEST_INTERNAL_TEST_ROCRAND_PRNG_HPP_ diff --git a/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp b/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp index cdedb648..134e1823 100644 --- a/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp +++ b/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,11 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" -using test_rocrand_scrambled_sobol32_qrng_types - = ::testing::Types>; +using rocrand_impl::host::scrambled_sobol32_generator; -INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_sobol_qrng_tests, - rocrand_sobol_qrng_tests, - test_rocrand_scrambled_sobol32_qrng_types); +using test_scrambled_sobol32_qrng_types = ::testing::Types< + sobol_qrng_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, + sobol_qrng_tests, + test_scrambled_sobol32_qrng_types); diff --git a/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp b/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp index e9f4362f..ed87966f 100644 --- a/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp +++ b/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,11 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" -using test_rocrand_scrambled_sobol64_qrng_types - = ::testing::Types>; +using rocrand_impl::host::scrambled_sobol64_generator; -INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_sobol_qrng_tests, - rocrand_sobol_qrng_tests, - test_rocrand_scrambled_sobol64_qrng_types); +using test_scrambled_sobol64_qrng_types = ::testing::Types< + sobol_qrng_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, + sobol_qrng_tests, + test_scrambled_sobol64_qrng_types); diff --git a/test/internal/test_rocrand_sobol32_qrng.cpp b/test/internal/test_rocrand_sobol32_qrng.cpp index 2d4d1f94..5c134464 100644 --- a/test/internal/test_rocrand_sobol32_qrng.cpp +++ b/test/internal/test_rocrand_sobol32_qrng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,9 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" -using test_rocrand_sobol32_qrng_types - = ::testing::Types>; +using rocrand_impl::host::sobol32_generator; -INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_sobol_qrng_tests, - rocrand_sobol_qrng_tests, - test_rocrand_sobol32_qrng_types); +using test_sobol32_qrng_types + = ::testing::Types>; + +INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, sobol_qrng_tests, test_sobol32_qrng_types); diff --git a/test/internal/test_rocrand_sobol64_qrng.cpp b/test/internal/test_rocrand_sobol64_qrng.cpp index db72e8dc..eeda2af4 100644 --- a/test/internal/test_rocrand_sobol64_qrng.cpp +++ b/test/internal/test_rocrand_sobol64_qrng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,9 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" -using test_rocrand_sobol64_qrng_types - = ::testing::Types>; +using rocrand_impl::host::sobol64_generator; -INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_sobol_qrng_tests, - rocrand_sobol_qrng_tests, - test_rocrand_sobol64_qrng_types); +using test_sobol64_qrng_types + = ::testing::Types>; + +INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, sobol_qrng_tests, test_sobol64_qrng_types); diff --git a/test/internal/test_rocrand_sobol_qrng.hpp b/test/internal/test_rocrand_sobol_qrng.hpp index c1efc3c3..ecae4c70 100644 --- a/test/internal/test_rocrand_sobol_qrng.hpp +++ b/test/internal/test_rocrand_sobol_qrng.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,9 @@ #ifndef ROCRAND_TEST_INTERNAL_TEST_ROCRAND_SOBOL_QRNG_HPP_ #define ROCRAND_TEST_INTERNAL_TEST_ROCRAND_SOBOL_QRNG_HPP_ -#include "rocrand/rocrand.h" #include "test_common.hpp" #include "test_rocrand_common.hpp" +#include #include @@ -31,17 +31,31 @@ #include +#include +#include +#include #include #include #include template -struct rocrand_sobol_qrng_tests : public ::testing::Test +struct sobol_qrng_tests : public ::testing::Test { using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; using constant_t = typename generator_t::constant_type; using engine_t = typename generator_t::engine_type; + auto get_generator() const + { + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) + { + throw std::runtime_error("Could not set ordering for generator"); + } + return g; + } + rocrand_status get_engine(engine_t& engine, unsigned long long int offset) { const constant_t* direction_vectors; @@ -120,70 +134,96 @@ struct rocrand_sobol_qrng_tests : public ::testing::Test } }; -TYPED_TEST_SUITE_P(rocrand_sobol_qrng_tests); +TYPED_TEST_SUITE_P(sobol_qrng_tests); -template -struct rocrand_sobol_qrng_tests_params +template +struct sobol_qrng_tests_params { using generator_t = Generator; + static inline constexpr rocrand_ordering ordering = Ordering; }; -TYPED_TEST_P(rocrand_sobol_qrng_tests, uniform_uint_test) +TYPED_TEST_P(sobol_qrng_tests, init_test) { - using generator_t = typename TestFixture::generator_t; + auto g = TestFixture::get_generator(); // offset = 0 + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); + g.set_offset(1); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); - generator_t g; - ROCRAND_CHECK(g.generate(data, size)); + g.set_offset(1337); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + g.set_offset(1048576); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1 << 24); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset(1 << 28); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); + + g.set_offset((1ULL << 36) + 1234567ULL); + ROCRAND_CHECK(g.init()); + HIP_CHECK(hipDeviceSynchronize()); +} + +template +void uniform_integer_test(T max) +{ + const size_t size = 1313; + T* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(T) * size)); + + Generator g; + ROCRAND_CHECK(g.generate_uniform(data, size)); - unsigned long long sum = 0; + T host_data[size]; + HIP_CHECK(hipMemcpy(host_data, data, sizeof(T) * size, hipMemcpyDeviceToHost)); + + double sum = 0; for(size_t i = 0; i < size; i++) { sum += host_data[i]; } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); + const double mean = sum / size; + ASSERT_NEAR(mean, + static_cast(max) / static_cast(2), + static_cast(max) / static_cast(20)); HIP_CHECK(hipFree(data)); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, uniform_uint64_test) +TYPED_TEST_P(sobol_qrng_tests, uniform_uint_test) { using generator_t = typename TestFixture::generator_t; - using constant_t = typename generator_t::constant_type; + using constant_t = typename TestFixture::constant_t; - if constexpr(!std::is_same_v) + if constexpr(!std::is_same_v) { return; } - constexpr size_t size = 1313; - constant_t* data; - HIP_CHECK(hipMalloc(&data, sizeof(constant_t) * size)); - - generator_t g; - ROCRAND_CHECK(g.generate(data, size)); + uniform_integer_test(UINT_MAX); +} - constant_t host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(constant_t) * size, hipMemcpyDeviceToHost)); +TYPED_TEST_P(sobol_qrng_tests, uniform_uint64_test) +{ + using generator_t = typename TestFixture::generator_t; + using constant_t = typename TestFixture::constant_t; - double sum = 0; - for(size_t i = 0; i < size; i++) + if constexpr(!std::is_same_v) { - sum += host_data[i]; + return; } - const double mean = sum / size; - ASSERT_NEAR(mean, - static_cast(UINT64_MAX) / 2.0, - static_cast(UINT64_MAX) / 20.0); - HIP_CHECK(hipFree(data)); + uniform_integer_test(UINT64_MAX); } template @@ -194,7 +234,7 @@ void uniform_floating_point_test() HIP_CHECK(hipMallocHelper(&data, sizeof(*data) * size)); Generator g; - ROCRAND_CHECK(g.generate(data, size)); + ROCRAND_CHECK(g.generate_uniform(data, size)); T host_data[size]; HIP_CHECK(hipMemcpy(host_data, data, sizeof(*host_data) * size, hipMemcpyDeviceToHost)); @@ -212,14 +252,14 @@ void uniform_floating_point_test() HIP_CHECK(hipFree(data)); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, uniform_float_test) +TYPED_TEST_P(sobol_qrng_tests, uniform_float_test) { using generator_t = typename TestFixture::generator_t; uniform_floating_point_test(); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, uniform_double_test) +TYPED_TEST_P(sobol_qrng_tests, uniform_double_test) { using generator_t = typename TestFixture::generator_t; @@ -235,9 +275,11 @@ void normal_floating_point_test() Generator g; ROCRAND_CHECK(g.generate_normal(data, size, static_cast(2.0), static_cast(5.0))); + HIP_CHECK(hipDeviceSynchronize()); T host_data[size]; HIP_CHECK(hipMemcpy(host_data, data, sizeof(*host_data) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); double mean = 0.0; for(size_t i = 0; i < size; i++) @@ -246,42 +288,96 @@ void normal_floating_point_test() } mean = mean / size; - double std = 0.0f; + double stddev = 0.0f; for(size_t i = 0; i < size; i++) { - std += std::pow(host_data[i] - mean, 2); + stddev += std::pow(host_data[i] - mean, 2); } - std = sqrt(std / size); + stddev = std::sqrt(stddev / size); EXPECT_NEAR(2.0, mean, 0.4); // 20% - EXPECT_NEAR(5.0, std, 1.0); // 20% + EXPECT_NEAR(5.0, stddev, 1.0); // 20% HIP_CHECK(hipFree(data)); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, normal_float_test) +TYPED_TEST_P(sobol_qrng_tests, normal_float_test) { using generator_t = typename TestFixture::generator_t; normal_floating_point_test(); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, normal_double_test) +TYPED_TEST_P(sobol_qrng_tests, normal_double_test) { using generator_t = typename TestFixture::generator_t; normal_floating_point_test(); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, poisson_test) +template +void log_normal_floating_point_test() +{ + const size_t size = 131313; + T* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(*data) * size)); + + T normal_mean = static_cast(3.0); + T normal_stddev = static_cast(1.5); + T normal_var = normal_stddev * normal_stddev; + + T log_normal_mean = std::exp(normal_mean + normal_var / 2.0); + T log_normal_stddev = std::sqrt(std::exp(normal_var) - 1.0) * log_normal_mean; + + Generator g; + ROCRAND_CHECK(g.generate_log_normal(data, size, normal_mean, normal_stddev)); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector host_data(size); + HIP_CHECK(hipMemcpy(host_data.data(), data, sizeof(T) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + double mean = 0.0; + for(size_t i = 0; i < size; i++) + { + mean += host_data[i]; + } + mean = mean / size; + + double stddev = 0.0f; + for(size_t i = 0; i < size; i++) + { + stddev += std::pow(host_data[i] - mean, 2); + } + stddev = std::sqrt(stddev / size); + + EXPECT_NEAR(log_normal_mean, mean, log_normal_mean * 0.2); // 20% + EXPECT_NEAR(log_normal_stddev, stddev, log_normal_stddev * 0.2); // 20% + + HIP_CHECK(hipFree(data)); +} + +TYPED_TEST_P(sobol_qrng_tests, log_normal_float_test) +{ + using generator_t = typename TestFixture::generator_t; + + log_normal_floating_point_test(); +} + +TYPED_TEST_P(sobol_qrng_tests, log_normal_double_test) { using generator_t = typename TestFixture::generator_t; + log_normal_floating_point_test(); +} + +TYPED_TEST_P(sobol_qrng_tests, poisson_test) +{ const size_t size = 1313; unsigned int* data; HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - generator_t g; + auto g = TestFixture::get_generator(); ROCRAND_CHECK(g.generate_poisson(data, size, 5.5)); HIP_CHECK(hipDeviceSynchronize()); @@ -310,15 +406,13 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, poisson_test) HIP_CHECK(hipFree(data)); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, dimensions_test) +TYPED_TEST_P(sobol_qrng_tests, dimensions_test) { - using generator_t = typename TestFixture::generator_t; - const size_t size = 12345; float* data; HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - generator_t g; + auto g = TestFixture::get_generator(); ROCRAND_CHECK(g.generate(data, size)); @@ -334,7 +428,7 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, dimensions_test) // Check if the numbers generated by first generate() call are different from // the numbers generated by the 2nd call (same generator) -TYPED_TEST_P(rocrand_sobol_qrng_tests, state_progress_test) +TYPED_TEST_P(sobol_qrng_tests, state_progress_test) { using generator_t = typename TestFixture::generator_t; @@ -377,7 +471,7 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, state_progress_test) HIP_CHECK(hipFree(data)); } -TYPED_TEST_P(rocrand_sobol_qrng_tests, discard_test) +TYPED_TEST_P(sobol_qrng_tests, discard_test) { using generator_t = typename TestFixture::generator_t; using engine_t = typename generator_t::engine_type; @@ -412,7 +506,7 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, discard_test) } } -TYPED_TEST_P(rocrand_sobol_qrng_tests, discard_stride_test) +TYPED_TEST_P(sobol_qrng_tests, discard_stride_test) { using generator_t = typename TestFixture::generator_t; using engine_t = typename generator_t::engine_type; @@ -435,7 +529,7 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, discard_stride_test) } } -TYPED_TEST_P(rocrand_sobol_qrng_tests, offsets_test) +TYPED_TEST_P(sobol_qrng_tests, offsets_test) { using generator_t = typename TestFixture::generator_t; @@ -494,7 +588,7 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, offsets_test) // Check that subsequent generations of different sizes produce one Sobol // sequence without gaps, no matter how many values are generated per call. -TYPED_TEST_P(rocrand_sobol_qrng_tests, continuity_test) +TYPED_TEST_P(sobol_qrng_tests, continuity_test) { constexpr unsigned int continuity_test_dimensions[] = {1, 2, 10, 21}; @@ -516,8 +610,8 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, continuity_test) HIP_CHECK(hipMalloc(&data0, sizeof(unsigned int) * size0)); HIP_CHECK(hipMalloc(&data1, sizeof(unsigned int) * size1)); - rocrand_sobol32 g0; - rocrand_sobol32 g1; + typename TestFixture::generator_t g0; + typename TestFixture::generator_t g1; g0.set_dimensions(dimensions); g1.set_dimensions(dimensions); @@ -566,13 +660,16 @@ TYPED_TEST_P(rocrand_sobol_qrng_tests, continuity_test) } } -REGISTER_TYPED_TEST_SUITE_P(rocrand_sobol_qrng_tests, - uniform_uint64_test, - uniform_double_test, +REGISTER_TYPED_TEST_SUITE_P(sobol_qrng_tests, + init_test, uniform_uint_test, - normal_double_test, + uniform_uint64_test, uniform_float_test, + uniform_double_test, normal_float_test, + normal_double_test, + log_normal_float_test, + log_normal_double_test, poisson_test, dimensions_test, state_progress_test, diff --git a/test/internal/test_rocrand_threefry2x32_20_prng.cpp b/test/internal/test_rocrand_threefry2x32_20_prng.cpp index 998a7cf4..fa6667e4 100644 --- a/test/internal/test_rocrand_threefry2x32_20_prng.cpp +++ b/test/internal/test_rocrand_threefry2x32_20_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -16,240 +16,56 @@ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#include -#include -#include -#include - -#include +#include "test_rocrand_prng.hpp" +#include "test_rocrand_threefryNx32_20_prng.hpp" #include -#include #include -#include "test_common.hpp" -#include "test_rocrand_common.hpp" - -struct rocrand_threefry_prng_tests : public ::testing::TestWithParam -{ - rocrand_threefry2x32_20 get_generator() const - { - rocrand_threefry2x32_20 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - rocrand_threefry_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -// Assert that the kernel arguments are trivially copyable and destructible. -TEST(rocrand_threefry_prng_tests, type) -{ - using engine_type = typename rocrand_threefry2x32_20::engine_type::base_type; - // TODO: Enable once uint2 is trivially copyable. - // EXPECT_TRUE(std::is_trivially_copyable::value); - EXPECT_TRUE(std::is_trivially_destructible::value); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * (size + 1))); - - rocrand_threefry2x32_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data + 1, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data + 1, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_threefry2x32_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(rocrand_threefry_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generator - rocrand_threefry2x32_20 g0 = get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_threefry_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_threefry2x32_20 g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_threefry_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_threefry2x32_20 g0 = get_generator(), g1 = get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -/// -/// rocrand_threefry_prng_state_tests TEST GROUP -/// - -// Just get access to internal state -class rocrand_threefry2x32_engine_type_test : public rocrand_threefry2x32_20::engine_type +using rocrand_impl::host::threefry2x32_20_generator; + +// Generator API tests +using threefry2x32_20_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; + +using threefry2x32_20_generator_prng_offset_tests_types + = ::testing::Types, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x32_20_generator, + generator_prng_tests, + threefry2x32_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x32_20_generator, + generator_prng_continuity_tests, + threefry2x32_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x32_20_generator, + generator_prng_offset_tests, + threefry2x32_20_generator_prng_offset_tests_types); + +// threefry2x32_20-specific generator API tests +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x32_20_generator, + threefryNx32_20_generator_prng_tests, + threefry2x32_20_generator_prng_tests_types); + +// Engine API tests +class threefry2x32_engine_type_test : public threefry2x32_20_generator::engine_type { public: - __host__ rocrand_threefry2x32_engine_type_test() : rocrand_threefry2x32_20::engine_type(0, 0, 0) - {} + __host__ threefry2x32_engine_type_test() : threefry2x32_20_generator::engine_type(0, 0, 0) {} __host__ state_type& internal_state_ref() { @@ -257,10 +73,10 @@ class rocrand_threefry2x32_engine_type_test : public rocrand_threefry2x32_20::en } }; -TEST(rocrand_threefry_prng_state_tests, seed_test) +TEST(threefry_prng_state_tests, seed_test) { - rocrand_threefry2x32_engine_type_test engine; - rocrand_threefry2x32_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry2x32_engine_type_test engine; + threefry2x32_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0U); EXPECT_EQ(state.counter.y, 0U); @@ -279,10 +95,10 @@ TEST(rocrand_threefry_prng_state_tests, seed_test) // Check if the threefry state counter is calculated correctly during // random number generation. -TEST(rocrand_threefry_prng_state_tests, discard_test) +TEST(threefry_prng_state_tests, discard_test) { - rocrand_threefry2x32_engine_type_test engine; - rocrand_threefry2x32_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry2x32_engine_type_test engine; + threefry2x32_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0U); EXPECT_EQ(state.counter.y, 0U); @@ -328,10 +144,10 @@ TEST(rocrand_threefry_prng_state_tests, discard_test) EXPECT_EQ(state.substate, 0U); } -TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) +TEST(threefry_prng_state_tests, discard_sequence_test) { - rocrand_threefry2x32_engine_type_test engine; - rocrand_threefry2x32_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry2x32_engine_type_test engine; + threefry2x32_engine_type_test::state_type& state = engine.internal_state_ref(); engine.discard_subsequence(UINT_MAX); EXPECT_EQ(state.counter.x, 0U); @@ -345,212 +161,3 @@ TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.y, 457U); EXPECT_EQ(state.substate, 0U); } - -template -struct rocrand_threefry_prng_offset : public ::testing::Test -{ - using type = T; - rocrand_threefry2x32_20 get_generator() const - { - rocrand_threefry2x32_20 g; - if(g.set_order(T::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_threefry_prng_offset_params -{ - using output_type = T; - static constexpr inline rocrand_ordering ordering = Ordering; -}; - -using RocrandThreefryPrngOffsetTypes = ::testing::Types< - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params>; -TYPED_TEST_SUITE(rocrand_threefry_prng_offset, RocrandThreefryPrngOffsetTypes); - -TYPED_TEST(rocrand_threefry_prng_offset, offsets_test) -{ - using Params = typename TestFixture::type; - using T = typename Params::output_type; - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 4, 11, 65536, 112233}; - - for(const auto offset : offsets) - { - SCOPED_TRACE(::testing::Message() << "with offset=" << offset); - - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry2x32_20 g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - rocrand_threefry2x32_20 g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned int divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543, 1048576}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 1048576, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry2x32_20 g0; - g0.set_order(ordering); - rocrand_threefry2x32_20 g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_char_test) -{ - typedef unsigned char output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_uint_test) -{ - typedef unsigned int output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_float_test) -{ - typedef float output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_double_test) -{ - typedef double output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_float_test) -{ - typedef float output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_double_test) -{ - typedef double output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_float_test) -{ - typedef float output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_double_test) -{ - typedef double output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_poisson_test) -{ - typedef unsigned int output_type; - continuity_test([](rocrand_threefry2x32_20& g, output_type* data, size_t s) - { g.generate_poisson(data, s, 100.); }, - GetParam(), - 1); -} diff --git a/test/internal/test_rocrand_threefry2x64_20_prng.cpp b/test/internal/test_rocrand_threefry2x64_20_prng.cpp index b551976c..cd13ea3b 100644 --- a/test/internal/test_rocrand_threefry2x64_20_prng.cpp +++ b/test/internal/test_rocrand_threefry2x64_20_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,244 +18,59 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include -#include - -#include +#include "test_rocrand_prng.hpp" +#include "test_rocrand_threefryNx64_20_prng.hpp" #include -#include #include -#include "test_common.hpp" -#include "test_rocrand_common.hpp" - -struct rocrand_threefry_prng_tests : public ::testing::TestWithParam -{ - rocrand_threefry2x64_20 get_generator() const - { - rocrand_threefry2x64_20 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - rocrand_threefry_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -// Assert that the kernel arguments are trivially copyable and destructible. -TEST(rocrand_threefry_prng_tests, type) -{ - using engine_type = typename rocrand_threefry2x64_20::engine_type::base_type; - // TODO: Enable once ulonglong2 is trivially copyable. - // EXPECT_TRUE(std::is_trivially_copyable::value); - EXPECT_TRUE(std::is_trivially_destructible::value); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_ulonglong_test) -{ - const size_t size = 1313; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * (size + 1))); - - rocrand_threefry2x64_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data + 1, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long host_data[size]; - HIP_CHECK( - hipMemcpy(host_data, data + 1, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double mean = 0.; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i] / static_cast(size); - } - ASSERT_NEAR(mean, static_cast(ULLONG_MAX / 2), static_cast(ULLONG_MAX / 20)); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_threefry2x64_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(rocrand_threefry_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * size)); - - // Generator - rocrand_threefry2x64_20 g0 = get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long host_data1[size]; - HIP_CHECK( - hipMemcpy(host_data1, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long host_data2[size]; - HIP_CHECK( - hipMemcpy(host_data2, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_threefry_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; - - // Device side data - const size_t size = 1024; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * size)); - - // Generators - rocrand_threefry2x64_20 g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g0_host_data[size]; - HIP_CHECK( - hipMemcpy(g0_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g1_host_data[size]; - HIP_CHECK( - hipMemcpy(g1_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_threefry_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * size)); - - // Generators - rocrand_threefry2x64_20 g0 = get_generator(), g1 = get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g0_host_data[size]; - HIP_CHECK( - hipMemcpy(g0_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g1_host_data[size]; - HIP_CHECK( - hipMemcpy(g1_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -/// -/// rocrand_threefry_prng_state_tests TEST GROUP -/// - -// Just get access to internal state -class rocrand_threefry2x64_engine_type_test : public rocrand_threefry2x64_20::engine_type +using rocrand_impl::host::threefry2x64_20_generator; + +// Generator API tests +using threefry2x64_20_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; + +using threefry2x64_20_generator_prng_offset_tests_types + = ::testing::Types, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x64_20_generator, + generator_prng_tests, + threefry2x64_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x64_20_generator, + generator_prng_continuity_tests, + threefry2x64_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x64_20_generator, + generator_prng_offset_tests, + threefry2x64_20_generator_prng_offset_tests_types); + +// threefry2x64_20-specific generator API tests +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x64_20_generator, + threefryNx64_20_generator_prng_tests, + threefry2x64_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry2x64_20_generator, + threefryNx64_20_generator_prng_continuity_tests, + threefry2x64_20_generator_prng_tests_types); + +// Engine API tests +class threefry2x64_engine_type_test : public threefry2x64_20_generator::engine_type { public: - __host__ rocrand_threefry2x64_engine_type_test() : rocrand_threefry2x64_20::engine_type(0, 0, 0) - {} + __host__ threefry2x64_engine_type_test() : threefry2x64_20_generator::engine_type(0, 0, 0) {} __host__ state_type& internal_state_ref() { @@ -263,10 +78,10 @@ class rocrand_threefry2x64_engine_type_test : public rocrand_threefry2x64_20::en } }; -TEST(rocrand_threefry_prng_state_tests, seed_test) +TEST(threefry_prng_state_tests, seed_test) { - rocrand_threefry2x64_engine_type_test engine; - rocrand_threefry2x64_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry2x64_engine_type_test engine; + threefry2x64_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0ULL); EXPECT_EQ(state.counter.y, 0ULL); @@ -282,10 +97,10 @@ TEST(rocrand_threefry_prng_state_tests, seed_test) // Check if the threefry state counter is calculated correctly during // random number generation. -TEST(rocrand_threefry_prng_state_tests, discard_test) +TEST(threefry_prng_state_tests, discard_test) { - rocrand_threefry2x64_engine_type_test engine; - rocrand_threefry2x64_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry2x64_engine_type_test engine; + threefry2x64_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0ULL); EXPECT_EQ(state.counter.y, 0ULL); @@ -336,10 +151,10 @@ TEST(rocrand_threefry_prng_state_tests, discard_test) EXPECT_EQ(state.substate, 0ULL); } -TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) +TEST(threefry_prng_state_tests, discard_sequence_test) { - rocrand_threefry2x64_engine_type_test engine; - rocrand_threefry2x64_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry2x64_engine_type_test engine; + threefry2x64_engine_type_test::state_type& state = engine.internal_state_ref(); engine.discard_subsequence(ULLONG_MAX); EXPECT_EQ(state.counter.x, 0ULL); @@ -354,226 +169,3 @@ TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.y, 457ULL); EXPECT_EQ(state.substate, 0U); } - -template -struct rocrand_threefry_prng_offset : public ::testing::Test -{ - using type = T; - rocrand_threefry2x64_20 get_generator() const - { - rocrand_threefry2x64_20 g; - if(g.set_order(T::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_threefry_prng_offset_params -{ - using output_type = T; - static constexpr inline rocrand_ordering ordering = Ordering; -}; - -using RocrandThreefryPrngOffsetTypes = ::testing::Types< - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params>; - -TYPED_TEST_SUITE(rocrand_threefry_prng_offset, RocrandThreefryPrngOffsetTypes); - -TYPED_TEST(rocrand_threefry_prng_offset, offsets_test) -{ - using Params = typename TestFixture::type; - using T = typename Params::output_type; - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 4, 11, 65536, 112233}; - - for(const auto offset : offsets) - { - SCOPED_TRACE(::testing::Message() << "with offset=" << offset); - - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry2x64_20 g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - rocrand_threefry2x64_20 g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned long long divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543, 1048576}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 1048576, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry2x64_20 g0; - g0.set_order(ordering); - rocrand_threefry2x64_20 g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_char_test) -{ - typedef unsigned char output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_uint_test) -{ - typedef unsigned int output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_ullong_test) -{ - typedef unsigned long long int output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_float_test) -{ - typedef float output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_double_test) -{ - typedef double output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_float_test) -{ - typedef float output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_double_test) -{ - typedef double output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_float_test) -{ - typedef float output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_double_test) -{ - typedef double output_type; - continuity_test( - [](rocrand_threefry2x64_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_poisson_test) -{ - typedef unsigned int output_type; - continuity_test([](rocrand_threefry2x64_20& g, output_type* data, size_t s) - { g.generate_poisson(data, s, 100.); }, - GetParam(), - rocrand_poisson_distribution<>::output_width); -} diff --git a/test/internal/test_rocrand_threefry4x32_20_prng.cpp b/test/internal/test_rocrand_threefry4x32_20_prng.cpp index 785392ab..3ce1be60 100644 --- a/test/internal/test_rocrand_threefry4x32_20_prng.cpp +++ b/test/internal/test_rocrand_threefry4x32_20_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,238 +18,55 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include -#include - -#include +#include "test_rocrand_prng.hpp" +#include "test_rocrand_threefryNx32_20_prng.hpp" #include -#include #include -#include "test_common.hpp" -#include "test_rocrand_common.hpp" - -struct rocrand_threefry_prng_tests : public ::testing::TestWithParam -{ - rocrand_threefry4x32_20 get_generator() const - { - rocrand_threefry4x32_20 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - rocrand_threefry_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -// Assert that the kernel arguments are trivially copyable and destructible. -TEST(rocrand_threefry_prng_tests, type) -{ - using engine_type = typename rocrand_threefry4x32_20::engine_type::base_type; - // TODO: Enable once uint4 is trivially copyable. - // EXPECT_TRUE(std::is_trivially_copyable::value); - EXPECT_TRUE(std::is_trivially_destructible::value); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * (size + 1))); - - rocrand_threefry4x32_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data + 1, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data + 1, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_threefry4x32_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(rocrand_threefry_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generator - rocrand_threefry4x32_20 g0 = get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_threefry_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_threefry4x32_20 g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_threefry_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_threefry4x32_20 g0 = get_generator(), g1 = get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -/// -/// rocrand_threefry_prng_state_tests TEST GROUP -/// - -// Just get access to internal state -class rocrand_threefry4x32_engine_type_test : public rocrand_threefry4x32_20::engine_type +using rocrand_impl::host::threefry4x32_20_generator; + +// Generator API tests +using threefry4x32_20_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; + +using threefry4x32_20_generator_prng_offset_tests_types + = ::testing::Types, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x32_20_generator, + generator_prng_tests, + threefry4x32_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x32_20_generator, + generator_prng_continuity_tests, + threefry4x32_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x32_20_generator, + generator_prng_offset_tests, + threefry4x32_20_generator_prng_offset_tests_types); + +// threefry4x32_20-specific generator API tests +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x32_20_generator, + threefryNx32_20_generator_prng_tests, + threefry4x32_20_generator_prng_tests_types); + +// Engine API tests +class threefry4x32_engine_type_test : public threefry4x32_20_generator::engine_type { public: - __host__ rocrand_threefry4x32_engine_type_test() : rocrand_threefry4x32_20::engine_type(0, 0, 0) - {} + __host__ threefry4x32_engine_type_test() : threefry4x32_20_generator::engine_type(0, 0, 0) {} __host__ state_type& internal_state_ref() { @@ -257,10 +74,10 @@ class rocrand_threefry4x32_engine_type_test : public rocrand_threefry4x32_20::en } }; -TEST(rocrand_threefry_prng_state_tests, seed_test) +TEST(threefry_prng_state_tests, seed_test) { - rocrand_threefry4x32_engine_type_test engine; - rocrand_threefry4x32_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry4x32_engine_type_test engine; + threefry4x32_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0U); EXPECT_EQ(state.counter.y, 0U); @@ -285,10 +102,10 @@ TEST(rocrand_threefry_prng_state_tests, seed_test) // Check if the threefry state counter is calculated correctly during // random number generation. -TEST(rocrand_threefry_prng_state_tests, discard_test) +TEST(threefry_prng_state_tests, discard_test) { - rocrand_threefry4x32_engine_type_test engine; - rocrand_threefry4x32_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry4x32_engine_type_test engine; + threefry4x32_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0U); EXPECT_EQ(state.counter.y, 0U); @@ -378,10 +195,10 @@ TEST(rocrand_threefry_prng_state_tests, discard_test) EXPECT_EQ(state.substate, 0U); } -TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) +TEST(threefry_prng_state_tests, discard_sequence_test) { - rocrand_threefry4x32_engine_type_test engine; - rocrand_threefry4x32_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry4x32_engine_type_test engine; + threefry4x32_engine_type_test::state_type& state = engine.internal_state_ref(); engine.discard_subsequence(UINT_MAX); EXPECT_EQ(state.counter.x, 0U); @@ -428,212 +245,3 @@ TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.w, 6U); EXPECT_EQ(state.substate, 0U); } - -template -struct rocrand_threefry_prng_offset : public ::testing::Test -{ - using type = T; - rocrand_threefry4x32_20 get_generator() const - { - rocrand_threefry4x32_20 g; - if(g.set_order(T::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_threefry_prng_offset_params -{ - using output_type = T; - static constexpr inline rocrand_ordering ordering = Ordering; -}; - -using RocrandThreefryPrngOffsetTypes = ::testing::Types< - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params>; -TYPED_TEST_SUITE(rocrand_threefry_prng_offset, RocrandThreefryPrngOffsetTypes); - -TYPED_TEST(rocrand_threefry_prng_offset, offsets_test) -{ - using Params = typename TestFixture::type; - using T = typename Params::output_type; - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 4, 11, 65536, 112233}; - - for(const auto offset : offsets) - { - SCOPED_TRACE(::testing::Message() << "with offset=" << offset); - - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry4x32_20 g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - rocrand_threefry4x32_20 g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned int divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543, 1048576}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 1048576, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry4x32_20 g0; - g0.set_order(ordering); - rocrand_threefry4x32_20 g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_char_test) -{ - typedef unsigned char output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_uint_test) -{ - typedef unsigned int output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_float_test) -{ - typedef float output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_double_test) -{ - typedef double output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_float_test) -{ - typedef float output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_double_test) -{ - typedef double output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_float_test) -{ - typedef float output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_double_test) -{ - typedef double output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_poisson_test) -{ - typedef unsigned int output_type; - continuity_test([](rocrand_threefry4x32_20& g, output_type* data, size_t s) - { g.generate_poisson(data, s, 100.); }, - GetParam(), - 1); -} diff --git a/test/internal/test_rocrand_threefry4x64_20_prng.cpp b/test/internal/test_rocrand_threefry4x64_20_prng.cpp index d55f36ab..280c333d 100644 --- a/test/internal/test_rocrand_threefry4x64_20_prng.cpp +++ b/test/internal/test_rocrand_threefry4x64_20_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,244 +18,59 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include -#include - -#include +#include "test_rocrand_prng.hpp" +#include "test_rocrand_threefryNx64_20_prng.hpp" #include -#include #include -#include "test_common.hpp" -#include "test_rocrand_common.hpp" - -struct rocrand_threefry_prng_tests : public ::testing::TestWithParam -{ - rocrand_threefry4x64_20 get_generator() const - { - rocrand_threefry4x64_20 g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - rocrand_threefry_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -// Assert that the kernel arguments are trivially copyable and destructible. -TEST(rocrand_threefry_prng_tests, type) -{ - using engine_type = typename rocrand_threefry4x64_20::engine_type::base_type; - // TODO: Enable once ulonglong4 is trivially copyable. - // EXPECT_TRUE(std::is_trivially_copyable::value); - EXPECT_TRUE(std::is_trivially_destructible::value); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * (size + 1))); - - rocrand_threefry4x64_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data + 1, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long host_data[size]; - HIP_CHECK( - hipMemcpy(host_data, data + 1, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double mean = 0.; - for(size_t i = 0; i < size; i++) - { - mean += host_data[i] / static_cast(size); - } - ASSERT_NEAR(mean, static_cast(ULLONG_MAX / 2), static_cast(ULLONG_MAX / 20)); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(rocrand_threefry_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_threefry4x64_20 g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(rocrand_threefry_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * size)); - - // Generator - rocrand_threefry4x64_20 g0 = get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long host_data1[size]; - HIP_CHECK( - hipMemcpy(host_data1, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long host_data2[size]; - HIP_CHECK( - hipMemcpy(host_data2, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(rocrand_threefry_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; - - // Device side data - const size_t size = 1024; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * size)); - - // Generators - rocrand_threefry4x64_20 g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g0_host_data[size]; - HIP_CHECK( - hipMemcpy(g0_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g1_host_data[size]; - HIP_CHECK( - hipMemcpy(g1_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(rocrand_threefry_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned long long* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * size)); - - // Generators - rocrand_threefry4x64_20 g0 = get_generator(), g1 = get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g0_host_data[size]; - HIP_CHECK( - hipMemcpy(g0_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long g1_host_data[size]; - HIP_CHECK( - hipMemcpy(g1_host_data, data, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - HIP_CHECK(hipFree(data)); -} - -/// -/// rocrand_threefry_prng_state_tests TEST GROUP -/// - -// Just get access to internal state -class rocrand_threefry4x64_engine_type_test : public rocrand_threefry4x64_20::engine_type +using rocrand_impl::host::threefry4x64_20_generator; + +// Generator API tests +using threefry4x64_20_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; + +using threefry4x64_20_generator_prng_offset_tests_types + = ::testing::Types, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x64_20_generator, + generator_prng_tests, + threefry4x64_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x64_20_generator, + generator_prng_continuity_tests, + threefry4x64_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x64_20_generator, + generator_prng_offset_tests, + threefry4x64_20_generator_prng_offset_tests_types); + +// threefry4x64_20-specific generator API tests +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x64_20_generator, + threefryNx64_20_generator_prng_tests, + threefry4x64_20_generator_prng_tests_types); + +INSTANTIATE_TYPED_TEST_SUITE_P(threefry4x64_20_generator, + threefryNx64_20_generator_prng_continuity_tests, + threefry4x64_20_generator_prng_tests_types); + +// Engine API tests +class threefry4x64_engine_type_test : public threefry4x64_20_generator::engine_type { public: - __host__ rocrand_threefry4x64_engine_type_test() : rocrand_threefry4x64_20::engine_type(0, 0, 0) - {} + __host__ threefry4x64_engine_type_test() : threefry4x64_20_generator::engine_type(0, 0, 0) {} __host__ state_type& internal_state_ref() { @@ -263,10 +78,10 @@ class rocrand_threefry4x64_engine_type_test : public rocrand_threefry4x64_20::en } }; -TEST(rocrand_threefry_prng_state_tests, seed_test) +TEST(threefry_prng_state_tests, seed_test) { - rocrand_threefry4x64_engine_type_test engine; - rocrand_threefry4x64_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry4x64_engine_type_test engine; + threefry4x64_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0ULL); EXPECT_EQ(state.counter.y, 0ULL); @@ -291,10 +106,10 @@ TEST(rocrand_threefry_prng_state_tests, seed_test) // Check if the threefry state counter is calculated correctly during // random number generation. -TEST(rocrand_threefry_prng_state_tests, discard_test) +TEST(threefry_prng_state_tests, discard_test) { - rocrand_threefry4x64_engine_type_test engine; - rocrand_threefry4x64_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry4x64_engine_type_test engine; + threefry4x64_engine_type_test::state_type& state = engine.internal_state_ref(); EXPECT_EQ(state.counter.x, 0ULL); EXPECT_EQ(state.counter.y, 0ULL); @@ -394,10 +209,10 @@ TEST(rocrand_threefry_prng_state_tests, discard_test) EXPECT_EQ(state.substate, 0ULL); } -TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) +TEST(threefry_prng_state_tests, discard_sequence_test) { - rocrand_threefry4x64_engine_type_test engine; - rocrand_threefry4x64_engine_type_test::state_type& state = engine.internal_state_ref(); + threefry4x64_engine_type_test engine; + threefry4x64_engine_type_test::state_type& state = engine.internal_state_ref(); engine.discard_subsequence(ULLONG_MAX); EXPECT_EQ(state.counter.x, 0ULL); @@ -446,225 +261,3 @@ TEST(rocrand_threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.w, 6ULL); EXPECT_EQ(state.substate, 0U); } - -template -struct rocrand_threefry_prng_offset : public ::testing::Test -{ - using type = T; - rocrand_threefry4x64_20 get_generator() const - { - rocrand_threefry4x64_20 g; - if(g.set_order(T::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_threefry_prng_offset_params -{ - using output_type = T; - static constexpr inline rocrand_ordering ordering = Ordering; -}; - -using RocrandThreefryPrngOffsetTypes = ::testing::Types< - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params, - rocrand_threefry_prng_offset_params>; -TYPED_TEST_SUITE(rocrand_threefry_prng_offset, RocrandThreefryPrngOffsetTypes); - -TYPED_TEST(rocrand_threefry_prng_offset, offsets_test) -{ - using Params = typename TestFixture::type; - using T = typename Params::output_type; - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 4, 11, 65536, 112233}; - - for(const auto offset : offsets) - { - SCOPED_TRACE(::testing::Message() << "with offset=" << offset); - - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry4x64_20 g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - rocrand_threefry4x64_20 g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned long long divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543, 1048576}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 1048576, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const size_t size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const size_t size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_threefry4x64_20 g0; - g0.set_order(ordering); - rocrand_threefry4x64_20 g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_char_test) -{ - typedef unsigned char output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_uint_test) -{ - typedef unsigned int output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_ullong_test) -{ - typedef unsigned long long int output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_float_test) -{ - typedef float output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_uniform_double_test) -{ - typedef double output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) { g.generate(data, s); }, - GetParam(), - uniform_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_float_test) -{ - typedef float output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_normal_double_test) -{ - typedef double output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) - { g.generate_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_float_test) -{ - typedef float output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0.f, 1.f); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_log_normal_double_test) -{ - typedef double output_type; - continuity_test( - [](rocrand_threefry4x64_20& g, output_type* data, size_t s) - { g.generate_log_normal(data, s, 0., 1.); }, - GetParam(), - normal_distribution::output_width); -} - -TEST_P(rocrand_threefry_prng_tests, continuity_poisson_test) -{ - typedef unsigned int output_type; - continuity_test([](rocrand_threefry4x64_20& g, output_type* data, size_t s) - { g.generate_poisson(data, s, 100.); }, - GetParam(), - rocrand_poisson_distribution<>::output_width); -} diff --git a/test/internal/test_rocrand_threefryNx32_20_prng.hpp b/test/internal/test_rocrand_threefryNx32_20_prng.hpp new file mode 100644 index 00000000..86873694 --- /dev/null +++ b/test/internal/test_rocrand_threefryNx32_20_prng.hpp @@ -0,0 +1,65 @@ +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ROCRAND_TEST_INTERNAL_TEST_ROCRAND_THREEFRY_PRNG_HPP_ +#define ROCRAND_TEST_INTERNAL_TEST_ROCRAND_THREEFRY_PRNG_HPP_ + +#include "test_common.hpp" +#include "test_rocrand_common.hpp" +#include + +#include + +#include +#include + +// threefryNx32_20-specific generator API tests +template +struct threefryNx32_20_generator_prng_tests : public ::testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; + + auto get_generator() const + { + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) + { + throw std::runtime_error("Could not set ordering for generator"); + } + return g; + } +}; + +TYPED_TEST_SUITE_P(threefryNx32_20_generator_prng_tests); + +// Assert that the kernel arguments are trivially copyable and destructible. +TYPED_TEST_P(threefryNx32_20_generator_prng_tests, type) +{ + using generator_t = typename TestFixture::generator_t; + using engine_type = typename generator_t::engine_type::base_type; + // TODO: Enable once uint2 is trivially copyable. + // EXPECT_TRUE(std::is_trivially_copyable::value); + EXPECT_TRUE(std::is_trivially_destructible::value); +} + +REGISTER_TYPED_TEST_SUITE_P(threefryNx32_20_generator_prng_tests, type); + +#endif // ROCRAND_TEST_INTERNAL_TEST_ROCRAND_THREEFRY_PRNG_HPP_ diff --git a/test/internal/test_rocrand_threefryNx64_20_prng.hpp b/test/internal/test_rocrand_threefryNx64_20_prng.hpp new file mode 100644 index 00000000..a228acaa --- /dev/null +++ b/test/internal/test_rocrand_threefryNx64_20_prng.hpp @@ -0,0 +1,119 @@ +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ROCRAND_TEST_INTERNAL_TEST_ROCRAND_THREEFRY_PRNG_HPP_ +#define ROCRAND_TEST_INTERNAL_TEST_ROCRAND_THREEFRY_PRNG_HPP_ + +#include "test_common.hpp" +#include "test_rocrand_common.hpp" +#include + +#include + +#include + +#include + +#include +#include + +// threefryNx64_20-specific generator API tests +template +struct threefryNx64_20_generator_prng_tests : public ::testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; + + auto get_generator() const + { + generator_t g; + if(g.set_order(ordering) != ROCRAND_STATUS_SUCCESS) + { + throw std::runtime_error("Could not set ordering for generator"); + } + return g; + } +}; + +TYPED_TEST_SUITE_P(threefryNx64_20_generator_prng_tests); + +// Assert that the kernel arguments are trivially copyable and destructible. +TYPED_TEST_P(threefryNx64_20_generator_prng_tests, type) +{ + using generator_t = typename TestFixture::generator_t; + using engine_type = typename generator_t::engine_type::base_type; + // TODO: Enable once uint2 is trivially copyable. + // EXPECT_TRUE(std::is_trivially_copyable::value); + EXPECT_TRUE(std::is_trivially_destructible::value); +} + +TYPED_TEST_P(threefryNx64_20_generator_prng_tests, uniform_ulonglong_test) +{ + const size_t size = 1313; + unsigned long long* data; + HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned long long) * (size + 1))); + + auto g = TestFixture::get_generator(); + ROCRAND_CHECK(g.generate_uniform(data + 1, size)); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned long long host_data[size]; + HIP_CHECK( + hipMemcpy(host_data, data + 1, sizeof(unsigned long long) * size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + double mean = 0.; + for(size_t i = 0; i < size; i++) + { + mean += host_data[i] / static_cast(size); + } + ASSERT_NEAR(mean, static_cast(ULLONG_MAX / 2), static_cast(ULLONG_MAX / 20)); + + HIP_CHECK(hipFree(data)); +} + +REGISTER_TYPED_TEST_SUITE_P(threefryNx64_20_generator_prng_tests, type, uniform_ulonglong_test); + +// threefryNx64_20-specific generator API continuity tests +template +struct threefryNx64_20_generator_prng_continuity_tests : public ::testing::Test +{ + using generator_t = typename Params::generator_t; + static inline constexpr rocrand_ordering ordering = Params::ordering; +}; + +TYPED_TEST_SUITE_P(threefryNx64_20_generator_prng_continuity_tests); + +TYPED_TEST_P(threefryNx64_20_generator_prng_continuity_tests, continuity_uniform_ullong_test) +{ + constexpr rocrand_ordering ordering = TestFixture::ordering; + using generator_t = typename TestFixture::generator_t; + typedef unsigned long long int output_t; + + continuity_test( + [](generator_t& g, output_t* data, size_t s) { g.generate_uniform(data, s); }, + ordering, + rocrand_impl::host::uniform_distribution::output_width); +} + +REGISTER_TYPED_TEST_SUITE_P(threefryNx64_20_generator_prng_continuity_tests, + continuity_uniform_ullong_test); + +#endif // ROCRAND_TEST_INTERNAL_TEST_ROCRAND_THREEFRY_PRNG_HPP_ diff --git a/test/internal/test_rocrand_xorwow_prng.cpp b/test/internal/test_rocrand_xorwow_prng.cpp index 2455f065..2fcee362 100644 --- a/test/internal/test_rocrand_xorwow_prng.cpp +++ b/test/internal/test_rocrand_xorwow_prng.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,258 +18,53 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include -#include -#include -#include - -#include -#include - -#include -#include - #include "test_common.hpp" #include "test_rocrand_common.hpp" +#include "test_rocrand_prng.hpp" +#include -struct xorwow_prng_tests : public testing::TestWithParam -{ - rocrand_xorwow get_generator() const - { - rocrand_xorwow g; - if(g.set_order(GetParam()) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -INSTANTIATE_TEST_SUITE_P(rocrand, - xorwow_prng_tests, - testing::Values(ROCRAND_ORDERING_PSEUDO_DEFAULT, - ROCRAND_ORDERING_PSEUDO_DYNAMIC)); - -TEST_P(xorwow_prng_tests, init_test) -{ - rocrand_xorwow generator = get_generator(); // offset = 0 - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); - - generator.set_offset(1); - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); - - generator.set_offset(1337); - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); - - generator.set_offset(1048576); - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); - - generator.set_offset(1 << 24); - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); - - generator.set_offset(1 << 28); - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); - - generator.set_offset((1ULL << 36) + 1234567ULL); - ROCRAND_CHECK(generator.init()); - HIP_CHECK(hipDeviceSynchronize()); -} - -TEST_P(xorwow_prng_tests, uniform_uint_test) -{ - const size_t size = 1313; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - rocrand_xorwow g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned long long sum = 0; - for(size_t i = 0; i < size; i++) - { - sum += host_data[i]; - } - const unsigned int mean = sum / size; - ASSERT_NEAR(mean, UINT_MAX / 2, UINT_MAX / 20); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(xorwow_prng_tests, uniform_float_test) -{ - const size_t size = 1313; - float* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(float) * size)); - - rocrand_xorwow g = get_generator(); - ROCRAND_CHECK(g.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - float host_data[size]; - HIP_CHECK(hipMemcpy(host_data, data, sizeof(float) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - double sum = 0; - for(size_t i = 0; i < size; i++) - { - ASSERT_GT(host_data[i], 0.0f); - ASSERT_LE(host_data[i], 1.0f); - sum += host_data[i]; - } - const float mean = sum / size; - ASSERT_NEAR(mean, 0.5f, 0.05f); - - HIP_CHECK(hipFree(data)); -} - -// Check if the numbers generated by first generate() call are different from -// the numbers generated by the 2nd call (same generator) -TEST_P(xorwow_prng_tests, state_progress_test) -{ - // Device data - const size_t size = 1025; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generator - rocrand_xorwow g0 = get_generator(); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data1[size]; - HIP_CHECK(hipMemcpy(host_data1, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int host_data2[size]; - HIP_CHECK(hipMemcpy(host_data2, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(host_data1[i] == host_data2[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - - HIP_CHECK(hipFree(data)); -} - -// Checks if generators with the same seed and in the same state -// generate the same numbers -TEST_P(xorwow_prng_tests, same_seed_test) -{ - const unsigned long long seed = 0xdeadbeefdeadbeefULL; +#include - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); +#include - // Generators - rocrand_xorwow g0 = get_generator(), g1 = get_generator(); - // Set same seeds - g0.set_seed(seed); - g1.set_seed(seed); +using rocrand_impl::host::xorwow_generator; - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); +// Generator API tests +using xorwow_generator_prng_tests_types = ::testing::Types< + generator_prng_tests_params, + generator_prng_tests_params>; - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); +using xorwow_generator_prng_offset_tests_types = ::testing::Types< + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params, + generator_prng_offset_tests_params>; - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); +INSTANTIATE_TYPED_TEST_SUITE_P(xorwow_generator, + generator_prng_tests, + xorwow_generator_prng_tests_types); - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); +INSTANTIATE_TYPED_TEST_SUITE_P(xorwow_generator, + generator_prng_continuity_tests, + xorwow_generator_prng_tests_types); - // Numbers generated using same generator with same - // seed should be the same - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(g0_host_data[i], g1_host_data[i]); - } +INSTANTIATE_TYPED_TEST_SUITE_P(xorwow_generator, + generator_prng_offset_tests, + xorwow_generator_prng_offset_tests_types); - HIP_CHECK(hipFree(data)); -} +// Engine API tests +class xorwow_engine_type_test : public xorwow_generator::engine_type +{}; -// Checks if generators with the same seed and in the same state generate -// the same numbers -TEST_P(xorwow_prng_tests, different_seed_test) -{ - const unsigned long long seed0 = 0xdeadbeefdeadbeefULL; - const unsigned long long seed1 = 0xbeefdeadbeefdeadULL; - - // Device side data - const size_t size = 1024; - unsigned int* data; - HIP_CHECK(hipMallocHelper(&data, sizeof(unsigned int) * size)); - - // Generators - rocrand_xorwow g0 = get_generator(), g1 = get_generator(); - // Set different seeds - g0.set_seed(seed0); - g1.set_seed(seed1); - ASSERT_NE(g0.get_seed(), g1.get_seed()); - - // Generate using g0 and copy to host - ROCRAND_CHECK(g0.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g0_host_data[size]; - HIP_CHECK(hipMemcpy(g0_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - // Generate using g1 and copy to host - ROCRAND_CHECK(g1.generate(data, size)); - HIP_CHECK(hipDeviceSynchronize()); - - unsigned int g1_host_data[size]; - HIP_CHECK(hipMemcpy(g1_host_data, data, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - size_t same = 0; - for(size_t i = 0; i < size; i++) - { - if(g1_host_data[i] == g0_host_data[i]) - same++; - } - // It may happen that numbers are the same, so we - // just make sure that most of them are different. - EXPECT_LT(same, static_cast(0.01f * size)); - - HIP_CHECK(hipFree(data)); -} - -TEST_P(xorwow_prng_tests, discard_test) +TEST(xorwow_engine_type_test, discard_test) { const unsigned long long seed = 1234567890123ULL; - rocrand_xorwow::engine_type engine1(seed, 0, 678ULL); - rocrand_xorwow::engine_type engine2(seed, 0, 677ULL); + xorwow_generator::engine_type engine1(seed, 0, 678ULL); + xorwow_generator::engine_type engine2(seed, 0, 677ULL); (void)engine2.next(); @@ -298,11 +93,11 @@ TEST_P(xorwow_prng_tests, discard_test) } } -TEST_P(xorwow_prng_tests, discard_sequence_test) +TEST(xorwow_engine_type_test, discard_sequence_test) { const unsigned long long seed = ~1234567890123ULL; - rocrand_xorwow::engine_type engine1(seed, 0, 444ULL); - rocrand_xorwow::engine_type engine2(seed, 123ULL, 444ULL); + xorwow_generator::engine_type engine1(seed, 0, 444ULL); + xorwow_generator::engine_type engine2(seed, 123ULL, 444ULL); engine1.discard_subsequence(123ULL); @@ -325,198 +120,3 @@ TEST_P(xorwow_prng_tests, discard_sequence_test) EXPECT_EQ(engine1(), engine2()); } - -template -struct rocrand_xorwow_prng_offset : public ::testing::Test -{ - using type = T; - rocrand_xorwow get_generator() const - { - rocrand_xorwow g; - if(g.set_order(T::ordering) != ROCRAND_STATUS_SUCCESS) - { - throw std::runtime_error("Could not set ordering for generator"); - } - return g; - } -}; - -template -struct rocrand_xorwow_prng_offset_params -{ - using output_type = T; - static constexpr rocrand_ordering ordering = Ordering; -}; - -using RocrandXorwowPrngOffsetTypes = ::testing::Types< - rocrand_xorwow_prng_offset_params, - rocrand_xorwow_prng_offset_params, - rocrand_xorwow_prng_offset_params, - rocrand_xorwow_prng_offset_params>; -TYPED_TEST_SUITE(rocrand_xorwow_prng_offset, RocrandXorwowPrngOffsetTypes); - -TYPED_TEST(rocrand_xorwow_prng_offset, offsets_test) -{ - using params = typename TestFixture::type; - using T = typename params::output_type; - - const size_t size = 131313; - - constexpr size_t offsets[] = {0, 1, 11, 112233}; - - for(const auto offset : offsets) - { - const size_t size0 = size; - const size_t size1 = (size + offset); - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_xorwow g0 = TestFixture::get_generator(); - g0.set_offset(offset); - g0.generate(data0, size0); - - rocrand_xorwow g1 = TestFixture::get_generator(); - g1.generate(data1, size1); - - std::vector host_data0(size0); - std::vector host_data1(size1); - HIP_CHECK(hipMemcpy(host_data0.data(), data0, sizeof(T) * size0, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(host_data1.data(), data1, sizeof(T) * size1, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - - for(size_t i = 0; i < size; ++i) - { - ASSERT_EQ(host_data0[i], host_data1[i + offset]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); - } -} - -// Check that subsequent generations of different sizes produce one -// sequence without gaps, no matter how many values are generated per call. -template -void continuity_test(GenerateFunc generate_func, - rocrand_ordering ordering, - unsigned int divisor = 1) -{ - std::vector sizes0({100, 1, 24783, 3, 2, 776543}); - std::vector sizes1({1024, 55, 65536, 623456, 30, 111331}); - if(divisor > 1) - { - for(size_t& s : sizes0) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - for(size_t& s : sizes1) - s = (s + divisor - 1) & ~static_cast(divisor - 1); - } - - const auto size0 = std::accumulate(sizes0.cbegin(), sizes0.cend(), std::size_t{0}); - const auto size1 = std::accumulate(sizes1.cbegin(), sizes1.cend(), std::size_t{0}); - - T* data0; - T* data1; - HIP_CHECK(hipMalloc(&data0, sizeof(T) * size0)); - HIP_CHECK(hipMalloc(&data1, sizeof(T) * size1)); - - rocrand_xorwow g0; - g0.set_order(ordering); - rocrand_xorwow g1; - g1.set_order(ordering); - - std::vector host_data0(size0); - std::vector host_data1(size1); - - size_t current0 = 0; - for(size_t s : sizes0) - { - generate_func(g0, data0, s); - HIP_CHECK(hipMemcpy(host_data0.data() + current0, data0, sizeof(T) * s, hipMemcpyDefault)); - current0 += s; - } - size_t current1 = 0; - for(size_t s : sizes1) - { - generate_func(g1, data1, s); - HIP_CHECK(hipMemcpy(host_data1.data() + current1, data1, sizeof(T) * s, hipMemcpyDefault)); - current1 += s; - } - - for(size_t i = 0; i < std::min(size0, size1); i++) - { - ASSERT_EQ(host_data0[i], host_data1[i]); - } - - HIP_CHECK(hipFree(data0)); - HIP_CHECK(hipFree(data1)); -} - -TEST_P(xorwow_prng_tests, continuity_uniform_uint_test) -{ - continuity_test([](rocrand_xorwow& g, unsigned int* data, size_t s) - { g.generate(data, s); }, - GetParam()); -} - -TEST_P(xorwow_prng_tests, continuity_uniform_char_test) -{ - continuity_test([](rocrand_xorwow& g, unsigned char* data, size_t s) - { g.generate(data, s); }, - GetParam(), - 4); -} - -TEST_P(xorwow_prng_tests, continuity_uniform_float_test) -{ - continuity_test([](rocrand_xorwow& g, float* data, size_t s) - { g.generate_uniform(data, s); }, - GetParam()); -} - -TEST_P(xorwow_prng_tests, continuity_uniform_double_test) -{ - continuity_test([](rocrand_xorwow& g, double* data, size_t s) - { g.generate_uniform(data, s); }, - GetParam()); -} - -TEST_P(xorwow_prng_tests, continuity_normal_float_test) -{ - continuity_test([](rocrand_xorwow& g, float* data, size_t s) - { g.generate_normal(data, s, 0.0f, 1.0f); }, - GetParam(), - 2); -} - -TEST_P(xorwow_prng_tests, continuity_normal_double_test) -{ - continuity_test([](rocrand_xorwow& g, double* data, size_t s) - { g.generate_normal(data, s, 0.0, 1.0); }, - GetParam(), - 2); -} - -TEST_P(xorwow_prng_tests, continuity_log_normal_float_test) -{ - continuity_test([](rocrand_xorwow& g, float* data, size_t s) - { g.generate_log_normal(data, s, 0.0f, 1.0f); }, - GetParam(), - 2); -} - -TEST_P(xorwow_prng_tests, continuity_log_normal_double_test) -{ - continuity_test([](rocrand_xorwow& g, double* data, size_t s) - { g.generate_log_normal(data, s, 0.0, 1.0); }, - GetParam(), - 2); -} - -TEST_P(xorwow_prng_tests, continuity_poisson_test) -{ - continuity_test([](rocrand_xorwow& g, unsigned int* data, size_t s) - { g.generate_poisson(data, s, 100.0); }, - GetParam()); -} diff --git a/test/internal/test_uniform_distribution.cpp b/test/internal/test_uniform_distribution.cpp index 2322e2b9..e897e014 100644 --- a/test/internal/test_uniform_distribution.cpp +++ b/test/internal/test_uniform_distribution.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -25,6 +25,8 @@ #include +using namespace rocrand_impl::host; + TEST(uniform_distribution_tests, uint_test) { std::random_device rd; diff --git a/test/test_rocrand_host.cpp b/test/test_rocrand_host.cpp index ee4bb24e..96f5de83 100644 --- a/test/test_rocrand_host.cpp +++ b/test/test_rocrand_host.cpp @@ -49,8 +49,10 @@ std::vector get_seeds() constexpr rocrand_rng_type host_rng_types[] = { ROCRAND_RNG_PSEUDO_PHILOX4_32_10, + ROCRAND_RNG_PSEUDO_LFSR113, ROCRAND_RNG_PSEUDO_MRG31K3P, ROCRAND_RNG_PSEUDO_MRG32K3A, + ROCRAND_RNG_PSEUDO_MTGP32, ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, @@ -321,13 +323,6 @@ TEST_P(rocrand_generate_host_test, poisson_parity_test) HIP_CHECK(hipFree(output)); } -// Since it is extremely expensive to initialize the LFSR113 -// engine vector, only a single test is performed -TEST(rocrand_generate_host_test, lfsr113_int_parity) -{ - test_int_parity(ROCRAND_RNG_PSEUDO_LFSR113, rocrand_generate, {seeds[0]}); -} - INSTANTIATE_TEST_SUITE_P(rocrand_generate_host_test, rocrand_generate_host_test, ::testing::ValuesIn(host_rng_types)); diff --git a/test/test_rocrand_kernel_lfsr113.cpp b/test/test_rocrand_kernel_lfsr113.cpp index df09957e..6ddf88cd 100644 --- a/test/test_rocrand_kernel_lfsr113.cpp +++ b/test/test_rocrand_kernel_lfsr113.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include @@ -63,6 +62,7 @@ __global__ __launch_bounds__(32) void rocrand_kernel(unsigned int* output, const ROCRAND_LFSR113_DEFAULT_SEED_Z, ROCRAND_LFSR113_DEFAULT_SEED_W}, subsequence, + 123ULL, &state); unsigned int index = state_id; @@ -86,6 +86,7 @@ __global__ __launch_bounds__(32) void rocrand_uniform_kernel(float* output, cons ROCRAND_LFSR113_DEFAULT_SEED_Z, ROCRAND_LFSR113_DEFAULT_SEED_W}, subsequence, + 234ULL, &state); unsigned int index = state_id; @@ -110,6 +111,7 @@ __global__ __launch_bounds__(32) void rocrand_uniform_double_kernel(double* ROCRAND_LFSR113_DEFAULT_SEED_Z, ROCRAND_LFSR113_DEFAULT_SEED_W}, subsequence, + 234ULL, &state); unsigned int index = state_id; @@ -128,7 +130,7 @@ __global__ __launch_bounds__(32) void rocrand_normal_kernel(float* output, const GeneratorState state; const unsigned int subsequence = state_id; - rocrand_init(uint4{12345, 67890, 23456, 78901}, subsequence, &state); + rocrand_init(uint4{12345, 67890, 23456, 78901}, subsequence, 345ULL, &state); unsigned int index = state_id; while(index < size) @@ -149,7 +151,7 @@ __global__ __launch_bounds__(32) void rocrand_log_normal_kernel(float* output, c GeneratorState state; const unsigned int subsequence = state_id; - rocrand_init(uint4{12345, 67890, 23456, 78901}, subsequence, &state); + rocrand_init(uint4{12345, 67890, 23456, 78901}, subsequence, 456ULL, &state); unsigned int index = state_id; while(index < size) @@ -172,7 +174,7 @@ __global__ __launch_bounds__(64) void rocrand_poisson_kernel(unsigned int* outpu GeneratorState state; const unsigned int subsequence = state_id; - rocrand_init(uint4{23456, 78901, 34567, 89012}, subsequence, &state); + rocrand_init(uint4{23456, 78901, 34567, 89012}, subsequence, 234ULL, &state); unsigned int index = state_id; while(index < size) @@ -191,7 +193,7 @@ __global__ __launch_bounds__(64) void rocrand_discrete_kernel( GeneratorState state; const unsigned int subsequence = state_id; - rocrand_init(uint4{23456, 78901, 34567, 89012}, subsequence, &state); + rocrand_init(uint4{23456, 78901, 34567, 89012}, subsequence, 234ULL, &state); unsigned int index = state_id; while(index < size) diff --git a/test/test_rocrand_kernel_mrg.cpp b/test/test_rocrand_kernel_mrg.cpp index 2ac3d00f..6f896bce 100644 --- a/test/test_rocrand_kernel_mrg.cpp +++ b/test/test_rocrand_kernel_mrg.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_mtgp32.cpp b/test/test_rocrand_kernel_mtgp32.cpp index 568ce698..2fd6b37c 100644 --- a/test/test_rocrand_kernel_mtgp32.cpp +++ b/test/test_rocrand_kernel_mtgp32.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -18,26 +18,25 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#include #include +#include -#include #include +#include #include +#include #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include #include "test_common.hpp" #include "test_rocrand_common.hpp" -template -__global__ -__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) -void rocrand_kernel(GeneratorState * states, unsigned int * output, const size_t size) +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_kernel( + GeneratorState* states, unsigned int* output, const size_t size) { const unsigned int state_id = blockIdx.x; unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; @@ -61,10 +60,9 @@ void rocrand_kernel(GeneratorState * states, unsigned int * output, const size_t rocrand_mtgp32_block_copy(&state, &states[state_id]); } -template -__global__ -__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) -void rocrand_uniform_kernel(GeneratorState * states, float * output, const size_t size) +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_uniform_kernel( + GeneratorState* states, float* output, const size_t size) { const unsigned int state_id = blockIdx.x; const unsigned int thread_id = threadIdx.x; @@ -72,7 +70,7 @@ void rocrand_uniform_kernel(GeneratorState * states, float * output, const size_ unsigned int stride = gridDim.x * blockDim.x; __shared__ GeneratorState state; - if (thread_id == 0) + if(thread_id == 0) state = states[state_id]; __syncthreads(); @@ -88,14 +86,13 @@ void rocrand_uniform_kernel(GeneratorState * states, float * output, const size_ } // Save engine with its state - if (thread_id == 0) + if(thread_id == 0) states[state_id] = state; } -template -__global__ -__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) -void rocrand_normal_kernel(GeneratorState * states, float * output, const size_t size) +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_normal_kernel( + GeneratorState* states, float* output, const size_t size) { const unsigned int state_id = blockIdx.x; const unsigned int thread_id = threadIdx.x; @@ -103,7 +100,7 @@ void rocrand_normal_kernel(GeneratorState * states, float * output, const size_t unsigned int stride = gridDim.x * blockDim.x; __shared__ GeneratorState state; - if (thread_id == 0) + if(thread_id == 0) state = states[state_id]; __syncthreads(); @@ -111,9 +108,17 @@ void rocrand_normal_kernel(GeneratorState * states, float * output, const size_t const size_t size_rounded_up = r == 0 ? size : size + (blockDim.x - r); while(index < size_rounded_up) { - auto value = rocrand_normal(&state); if(index < size) - output[index] = value; + { + if(state_id % 2 == 0) + { + output[index] = rocrand_normal_double2(&state).x; + } + else + { + output[index] = rocrand_normal_double(&state); + } + } // Next position index += stride; } @@ -123,10 +128,9 @@ void rocrand_normal_kernel(GeneratorState * states, float * output, const size_t states[state_id] = state; } -template -__global__ -__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) -void rocrand_log_normal_kernel(GeneratorState * states, float * output, const size_t size) +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_normal_double_kernel( + GeneratorState* states, double* output, const size_t size) { const unsigned int state_id = blockIdx.x; const unsigned int thread_id = threadIdx.x; @@ -134,7 +138,7 @@ void rocrand_log_normal_kernel(GeneratorState * states, float * output, const si unsigned int stride = gridDim.x * blockDim.x; __shared__ GeneratorState state; - if (thread_id == 0) + if(thread_id == 0) state = states[state_id]; __syncthreads(); @@ -142,22 +146,29 @@ void rocrand_log_normal_kernel(GeneratorState * states, float * output, const si const size_t size_rounded_up = r == 0 ? size : size + (blockDim.x - r); while(index < size_rounded_up) { - auto value = rocrand_log_normal(&state, 1.6f, 0.25f); if(index < size) - output[index] = value; + { + if(state_id % 2 == 0) + { + output[index] = rocrand_normal_double2(&state).x; + } + else + { + output[index] = rocrand_normal_double(&state); + } + } // Next position index += stride; } // Save engine with its state - if (thread_id == 0) + if(thread_id == 0) states[state_id] = state; } -template -__global__ -__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) -void rocrand_poisson_kernel(GeneratorState * states, unsigned int * output, const size_t size, double lambda) +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_log_normal_kernel( + GeneratorState* states, float* output, const size_t size) { const unsigned int state_id = blockIdx.x; const unsigned int thread_id = threadIdx.x; @@ -165,7 +176,83 @@ void rocrand_poisson_kernel(GeneratorState * states, unsigned int * output, cons unsigned int stride = gridDim.x * blockDim.x; __shared__ GeneratorState state; - if (thread_id == 0) + if(thread_id == 0) + state = states[state_id]; + __syncthreads(); + + const size_t r = size % blockDim.x; + const size_t size_rounded_up = r == 0 ? size : size + (blockDim.x - r); + while(index < size_rounded_up) + { + if(index < size) + { + if(state_id % 2 == 0) + { + output[index] = rocrand_log_normal2(&state, 1.6f, 0.25f).x; + } + else + { + output[index] = rocrand_log_normal(&state, 1.6f, 0.25f); + } + } + // Next position + index += stride; + } + + // Save engine with its state + if(thread_id == 0) + states[state_id] = state; +} + +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_log_normal_double_kernel( + GeneratorState* states, double* output, const size_t size) +{ + const unsigned int state_id = blockIdx.x; + const unsigned int thread_id = threadIdx.x; + unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int stride = gridDim.x * blockDim.x; + + __shared__ GeneratorState state; + if(thread_id == 0) + state = states[state_id]; + __syncthreads(); + + const size_t r = size % blockDim.x; + const size_t size_rounded_up = r == 0 ? size : size + (blockDim.x - r); + while(index < size_rounded_up) + { + if(index < size) + { + if(state_id % 2 == 0) + { + output[index] = rocrand_log_normal_double2(&state, 1.6f, 0.25f).x; + } + else + { + output[index] = rocrand_log_normal_double(&state, 1.6f, 0.25f); + } + } + // Next position + index += stride; + } + + // Save engine with its state + if(thread_id == 0) + states[state_id] = state; +} + +template +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void rocrand_poisson_kernel( + GeneratorState* states, unsigned int* output, const size_t size, double lambda) +{ + const unsigned int state_id = blockIdx.x; + const unsigned int thread_id = threadIdx.x; + unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int stride = gridDim.x * blockDim.x; + + __shared__ GeneratorState state; + if(thread_id == 0) state = states[state_id]; __syncthreads(); @@ -181,7 +268,7 @@ void rocrand_poisson_kernel(GeneratorState * states, unsigned int * output, cons } // Save engine with its state - if (thread_id == 0) + if(thread_id == 0) states[state_id] = state; } @@ -198,31 +285,23 @@ TEST(rocrand_kernel_mtgp32, rocrand) { typedef rocrand_state_mtgp32 state_type; - state_type * states; + state_type* states; hipMallocHelper(&states, sizeof(state_type) * 8); ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); - const size_t output_size = 8192; - unsigned int * output; + const size_t output_size = 8192; + unsigned int* output; HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(unsigned int))); HIP_CHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_kernel), - dim3(8), dim3(256), 0, 0, - states, output, output_size - ); + rocrand_kernel<<>>(states, output, output_size); HIP_CHECK(hipGetLastError()); std::vector output_host(output_size); - HIP_CHECK( - hipMemcpy( - output_host.data(), output, - output_size * sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); + HIP_CHECK(hipMemcpy(output_host.data(), + output, + output_size * sizeof(unsigned int), + hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(output)); HIP_CHECK(hipFree(states)); @@ -240,31 +319,21 @@ TEST(rocrand_kernel_mtgp32, rocrand_uniform) { typedef rocrand_state_mtgp32 state_type; - state_type * states; + state_type* states; hipMallocHelper(&states, sizeof(state_type) * 8); ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); const size_t output_size = 8192; - float * output; + float* output; HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(float))); HIP_CHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_uniform_kernel), - dim3(8), dim3(256), 0, 0, - states, output, output_size - ); + rocrand_uniform_kernel<<>>(states, output, output_size); HIP_CHECK(hipGetLastError()); std::vector output_host(output_size); HIP_CHECK( - hipMemcpy( - output_host.data(), output, - output_size * sizeof(float), - hipMemcpyDeviceToHost - ) - ); + hipMemcpy(output_host.data(), output, output_size * sizeof(float), hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(output)); HIP_CHECK(hipFree(states)); @@ -282,41 +351,62 @@ TEST(rocrand_kernel_mtgp32, rocrand_normal) { typedef rocrand_state_mtgp32 state_type; - state_type * states; + state_type* states; hipMallocHelper(&states, sizeof(state_type) * 8); ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); const size_t output_size = 8192; - float * output; + float* output; HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(float))); HIP_CHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_normal_kernel), - dim3(8), dim3(256), 0, 0, - states, output, output_size - ); + rocrand_normal_kernel<<>>(states, output, output_size); HIP_CHECK(hipGetLastError()); std::vector output_host(output_size); HIP_CHECK( - hipMemcpy( - output_host.data(), output, - output_size * sizeof(float), - hipMemcpyDeviceToHost - ) - ); + hipMemcpy(output_host.data(), output, output_size * sizeof(float), hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(output)); HIP_CHECK(hipFree(states)); - double mean = 0; + double mean = std::accumulate(output_host.begin(), output_host.end(), 0.0) / output_size; + EXPECT_NEAR(mean, 0.0, 0.2); + + double stddev = 0; for(auto v : output_host) { - mean += static_cast(v); + stddev += std::pow(static_cast(v) - mean, 2); } - mean = mean / output_size; + stddev = stddev / output_size; + EXPECT_NEAR(stddev, 1.0, 0.2); +} + +TEST(rocrand_kernel_mtgp32, rocrand_normal_double) +{ + typedef rocrand_state_mtgp32 state_type; + + state_type* states; + hipMallocHelper(&states, sizeof(state_type) * 8); + + ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); + + const size_t output_size = 8192; + double* output; + HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(double))); + HIP_CHECK(hipDeviceSynchronize()); + rocrand_normal_double_kernel + <<>>(states, output, output_size); + HIP_CHECK(hipGetLastError()); + + std::vector output_host(output_size); + HIP_CHECK( + hipMemcpy(output_host.data(), output, output_size * sizeof(double), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipFree(output)); + HIP_CHECK(hipFree(states)); + + double mean = std::accumulate(output_host.begin(), output_host.end(), 0.0) / output_size; EXPECT_NEAR(mean, 0.0, 0.2); double stddev = 0; @@ -332,41 +422,67 @@ TEST(rocrand_kernel_mtgp32, rocrand_log_normal) { typedef rocrand_state_mtgp32 state_type; - state_type * states; + state_type* states; hipMallocHelper(&states, sizeof(state_type) * 8); ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); const size_t output_size = 8192; - float * output; + float* output; HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(float))); HIP_CHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_log_normal_kernel), - dim3(8), dim3(256), 0, 0, - states, output, output_size - ); + rocrand_log_normal_kernel + <<>>(states, output, output_size); HIP_CHECK(hipGetLastError()); std::vector output_host(output_size); HIP_CHECK( - hipMemcpy( - output_host.data(), output, - output_size * sizeof(float), - hipMemcpyDeviceToHost - ) - ); + hipMemcpy(output_host.data(), output, output_size * sizeof(float), hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(output)); HIP_CHECK(hipFree(states)); - double mean = 0; + double mean = std::accumulate(output_host.begin(), output_host.end(), 0.0) / output_size; + + double stddev = 0; for(auto v : output_host) { - mean += static_cast(v); + stddev += std::pow(v - mean, 2); } - mean = mean / output_size; + stddev = std::sqrt(stddev / output_size); + + double logmean = std::log(mean * mean / std::sqrt(stddev + mean * mean)); + double logstd = std::sqrt(std::log(1.0f + stddev / (mean * mean))); + + EXPECT_NEAR(1.6, logmean, 1.6 * 0.2); + EXPECT_NEAR(0.25, logstd, 0.25 * 0.2); +} + +TEST(rocrand_kernel_mtgp32, rocrand_log_normal_double) +{ + typedef rocrand_state_mtgp32 state_type; + + state_type* states; + hipMallocHelper(&states, sizeof(state_type) * 8); + + ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); + + const size_t output_size = 8192; + double* output; + HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(double))); + HIP_CHECK(hipDeviceSynchronize()); + rocrand_log_normal_double_kernel + <<>>(states, output, output_size); + HIP_CHECK(hipGetLastError()); + + std::vector output_host(output_size); + HIP_CHECK( + hipMemcpy(output_host.data(), output, output_size * sizeof(double), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipFree(output)); + HIP_CHECK(hipFree(states)); + + double mean = std::accumulate(output_host.begin(), output_host.end(), 0.0) / output_size; double stddev = 0; for(auto v : output_host) @@ -376,13 +492,14 @@ TEST(rocrand_kernel_mtgp32, rocrand_log_normal) stddev = std::sqrt(stddev / output_size); double logmean = std::log(mean * mean / std::sqrt(stddev + mean * mean)); - double logstd = std::sqrt(std::log(1.0f + stddev/(mean * mean))); + double logstd = std::sqrt(std::log(1.0f + stddev / (mean * mean))); EXPECT_NEAR(1.6, logmean, 1.6 * 0.2); EXPECT_NEAR(0.25, logstd, 0.25 * 0.2); } -class rocrand_kernel_mtgp32_poisson : public ::testing::TestWithParam { }; +class rocrand_kernel_mtgp32_poisson : public ::testing::TestWithParam +{}; TEST_P(rocrand_kernel_mtgp32_poisson, rocrand_poisson) { @@ -390,41 +507,29 @@ TEST_P(rocrand_kernel_mtgp32_poisson, rocrand_poisson) const double lambda = GetParam(); - state_type * states; + state_type* states; hipMallocHelper(&states, sizeof(state_type) * 8); ROCRAND_CHECK(rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, 8, 0)); - const size_t output_size = 8192; - unsigned int * output; + const size_t output_size = 8192; + unsigned int* output; HIP_CHECK(hipMallocHelper(&output, output_size * sizeof(unsigned int))); HIP_CHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL( - HIP_KERNEL_NAME(rocrand_poisson_kernel), - dim3(8), dim3(256), 0, 0, - states, output, output_size, lambda - ); + rocrand_poisson_kernel + <<>>(states, output, output_size, lambda); HIP_CHECK(hipGetLastError()); std::vector output_host(output_size); - HIP_CHECK( - hipMemcpy( - output_host.data(), output, - output_size * sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); + HIP_CHECK(hipMemcpy(output_host.data(), + output, + output_size * sizeof(unsigned int), + hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(output)); HIP_CHECK(hipFree(states)); - double mean = 0; - for(auto v : output_host) - { - mean += static_cast(v); - } - mean = mean / output_size; + double mean = std::accumulate(output_host.begin(), output_host.end(), 0.0) / output_size; double variance = 0; for(auto v : output_host) @@ -437,8 +542,8 @@ TEST_P(rocrand_kernel_mtgp32_poisson, rocrand_poisson) EXPECT_NEAR(variance, lambda, std::max(1.0, lambda * 1e-1)); } -const double lambdas[] = { 1.0, 5.5, 20.0, 100.0, 1234.5, 5000.0 }; +const double lambdas[] = {1.0, 5.5, 20.0, 100.0, 1234.5, 5000.0}; INSTANTIATE_TEST_SUITE_P(rocrand_kernel_mtgp32_poisson, - rocrand_kernel_mtgp32_poisson, - ::testing::ValuesIn(lambdas)); + rocrand_kernel_mtgp32_poisson, + ::testing::ValuesIn(lambdas)); diff --git a/test/test_rocrand_kernel_philox4x32_10.cpp b/test/test_rocrand_kernel_philox4x32_10.cpp index 5a90a2eb..496a9bd0 100644 --- a/test/test_rocrand_kernel_philox4x32_10.cpp +++ b/test/test_rocrand_kernel_philox4x32_10.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_sobol32.cpp b/test/test_rocrand_kernel_sobol32.cpp index 8e65255a..d50c8347 100644 --- a/test/test_rocrand_kernel_sobol32.cpp +++ b/test/test_rocrand_kernel_sobol32.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_sobol64.cpp b/test/test_rocrand_kernel_sobol64.cpp index 13d816ec..3334dcc7 100644 --- a/test/test_rocrand_kernel_sobol64.cpp +++ b/test/test_rocrand_kernel_sobol64.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #define HIP_CHECK(state) ASSERT_EQ(state, hipSuccess) @@ -122,10 +121,11 @@ void rocrand_log_normal_kernel(double * output, unsigned long long int * vectors } } -template -__global__ -__launch_bounds__(32) -void rocrand_poisson_kernel(unsigned int * output, unsigned long long int * vectors, const size_t size, double lambda) +template +__global__ __launch_bounds__(32) void rocrand_poisson_kernel(unsigned long long int* output, + unsigned long long int* vectors, + const size_t size, + double lambda) { const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int global_size = gridDim.x * blockDim.x; @@ -371,6 +371,7 @@ TEST_P(rocrand_kernel_sobol64_poisson, rocrand_poisson) { typedef rocrand_state_sobol64 state_type; typedef double Type; + typedef unsigned long long int ResultType; const Type lambda = GetParam(); @@ -387,8 +388,8 @@ TEST_P(rocrand_kernel_sobol64_poisson, rocrand_poisson) HIP_CHECK(hipDeviceSynchronize()); const size_t output_size = 8192; - unsigned int * output; - HIP_CHECK(hipMalloc(&output, output_size * sizeof(unsigned int))); + ResultType* output; + HIP_CHECK(hipMalloc(&output, output_size * sizeof(ResultType))); HIP_CHECK(hipDeviceSynchronize()); hipLaunchKernelGGL( @@ -398,14 +399,11 @@ TEST_P(rocrand_kernel_sobol64_poisson, rocrand_poisson) ); HIP_CHECK(hipGetLastError()); - std::vector output_host(output_size); - HIP_CHECK( - hipMemcpy( - output_host.data(), output, - output_size * sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); + std::vector output_host(output_size); + HIP_CHECK(hipMemcpy(output_host.data(), + output, + output_size * sizeof(ResultType), + hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(output)); HIP_CHECK(hipFree(m_vector)); diff --git a/test/test_rocrand_kernel_threefry2x32_20.cpp b/test/test_rocrand_kernel_threefry2x32_20.cpp index 69cb8191..2efe8e91 100644 --- a/test/test_rocrand_kernel_threefry2x32_20.cpp +++ b/test/test_rocrand_kernel_threefry2x32_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_threefry2x64_20.cpp b/test/test_rocrand_kernel_threefry2x64_20.cpp index 76a3846f..70af6616 100644 --- a/test/test_rocrand_kernel_threefry2x64_20.cpp +++ b/test/test_rocrand_kernel_threefry2x64_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_threefry4x32_20.cpp b/test/test_rocrand_kernel_threefry4x32_20.cpp index 128727f3..ee1fb82e 100644 --- a/test/test_rocrand_kernel_threefry4x32_20.cpp +++ b/test/test_rocrand_kernel_threefry4x32_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_threefry4x64_20.cpp b/test/test_rocrand_kernel_threefry4x64_20.cpp index 047deffb..deeaefd5 100644 --- a/test/test_rocrand_kernel_threefry4x64_20.cpp +++ b/test/test_rocrand_kernel_threefry4x64_20.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/test/test_rocrand_kernel_xorwow.cpp b/test/test_rocrand_kernel_xorwow.cpp index 44fab33f..6614b342 100644 --- a/test/test_rocrand_kernel_xorwow.cpp +++ b/test/test_rocrand_kernel_xorwow.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ #include -#define FQUALIFIERS __forceinline__ __host__ __device__ #include #include diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index c4a1640d..0ffbd3f0 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -48,6 +48,12 @@ function(rocrand_add_tool TARGET_NAME) endfunction() rocrand_add_tool(bin2typed bin2typed.cpp) +if(HIP_COMPILER STREQUAL "nvcc") + message(STATUS "Target lfsr113_precomputed_generator cannot be built for CUDA") +else() + rocrand_add_tool(lfsr113_precomputed_generator lfsr113_precomputed_generator.cpp) + target_link_libraries(lfsr113_precomputed_generator hip::device) +endif() rocrand_add_tool(mrg31k3p_precomputed_generator mrg31k3p_precomputed_generator.cpp) rocrand_add_tool(mrg32k3a_precomputed_generator mrg32k3a_precomputed_generator.cpp) rocrand_add_tool(mt19937_precomputed_generator mt19937_precomputed_generator.cpp) diff --git a/tools/lfsr113_precomputed_generator.cpp b/tools/lfsr113_precomputed_generator.cpp new file mode 100644 index 00000000..d8b13437 --- /dev/null +++ b/tools/lfsr113_precomputed_generator.cpp @@ -0,0 +1,212 @@ +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "utils_matrix_exponentiation.hpp" + +#include + +#include +#include +#include + +const int LFSR113_N = 4; // 4 values +const int LFSR113_M = 32; // 32-bit each + +const int LFSR113_SIZE = LFSR113_M * LFSR113_N * LFSR113_N; + +const int LFSR113_JUMP_MATRICES = 32; +const int LFSR113_JUMP_LOG2 = 2; + +const int LFSR113_SEQUENCE_JUMP_LOG2 = 55; + +struct rocrand_lfsr113_state +{ + uint4 z; + uint4 subsequence; + + void discard() + { + unsigned int b; + + b = (((z.x << 6) ^ z.x) >> 13); + z.x = (((z.x & 4294967294U) << 18) ^ b); + + b = (((z.y << 2) ^ z.y) >> 27); + z.y = (((z.y & 4294967288U) << 2) ^ b); + + b = (((z.z << 13) ^ z.z) >> 21); + z.z = (((z.z & 4294967280U) << 7) ^ b); + + b = (((z.w << 3) ^ z.w) >> 12); + z.w = (((z.w & 4294967168U) << 13) ^ b); + } +}; + +void generate_matrices(unsigned int (&jump_matrices)[LFSR113_JUMP_MATRICES][LFSR113_SIZE], + unsigned int (&sequence_jump_matrices)[LFSR113_JUMP_MATRICES][LFSR113_SIZE]) +{ + unsigned int one_step[LFSR113_SIZE]; + for(int i = 0; i < LFSR113_N; ++i) + { + for(int j = 0; j < LFSR113_M; ++j) + { + rocrand_lfsr113_state state; + const unsigned int b = 1U << j; + + state.z.x = (i == 0 ? b : 0); + state.z.y = (i == 1 ? b : 0); + state.z.z = (i == 2 ? b : 0); + state.z.w = (i == 3 ? b : 0); + state.subsequence = uint4{0, 0, 0, 0}; + + state.discard(); + + one_step[(i * LFSR113_M + j) * LFSR113_N + 0] = state.z.x; + one_step[(i * LFSR113_M + j) * LFSR113_N + 1] = state.z.y; + one_step[(i * LFSR113_M + j) * LFSR113_N + 2] = state.z.z; + one_step[(i * LFSR113_M + j) * LFSR113_N + 3] = state.z.w; + } + } + + // Matrices for jumps within the same sequence (offset). The powers from 2^0 (one_step) to 2^64 + // are required, so we need 32 iterations of exponentiations to 2^LFSR113_JUMP_LOG2. + { + unsigned int a[LFSR113_SIZE]; + unsigned int b[LFSR113_SIZE]; + copy_arr(a, one_step); + + copy_arr(jump_matrices[0], a); + for(int k = 1; k < LFSR113_JUMP_MATRICES; k++) + { + copy_arr(b, a); + mat_pow(a, b, (1 << LFSR113_JUMP_LOG2)); + copy_arr(jump_matrices[k], a); + } + } + + // Matrices for jumps within sequences. The powers from (2^0)*(2^55) (one_step) to (2^64)*(2^55) + // are required, so we need 32 iterations of exponentiations to 2^LFSR113_JUMP_LOG2. + { + unsigned int a[LFSR113_SIZE]; + unsigned int b[LFSR113_SIZE]; + copy_arr(a, one_step); + + // For 55: A^(2^27) + mat_pow(b, a, 1ULL << (LFSR113_SEQUENCE_JUMP_LOG2 / 2)); + // For 55: (A^(2^27))^(2^28) = A^(2^55) + mat_pow( + a, + b, + 1ULL << (LFSR113_SEQUENCE_JUMP_LOG2 - LFSR113_SEQUENCE_JUMP_LOG2 / 2)); + + copy_arr(sequence_jump_matrices[0], a); + for(int k = 1; k < LFSR113_JUMP_MATRICES; k++) + { + copy_arr(b, a); + mat_pow(a, b, (1 << LFSR113_JUMP_LOG2)); + copy_arr(sequence_jump_matrices[k], a); + } + } +} + +int main(int argc, char const* argv[]) +{ + if(argc != 2 || std::string(argv[1]) == "--help") + { + std::cout << "Usage:" << std::endl; + std::cout << " ./lfsr113_precomputed_generator " + "../../library/include/rocrand/rocrand_lfsr113_precomputed.h" + << std::endl; + return -1; + } + + unsigned int jump_matrices[LFSR113_JUMP_MATRICES][LFSR113_SIZE]; + unsigned int sequence_jump_matrices[LFSR113_JUMP_MATRICES][LFSR113_SIZE]; + generate_matrices(jump_matrices, sequence_jump_matrices); + + const std::string file_path(argv[1]); + std::ofstream fout(file_path, std::ios_base::out | std::ios_base::trunc); + fout << R"(// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ROCRAND_LFSR113_PRECOMPUTED_H_ +#define ROCRAND_LFSR113_PRECOMPUTED_H_ + +// Auto-generated file. Do not edit! +// Generated by tools/lfsr113_precomputed_generator + +)"; + + fout << "#define LFSR113_N " << LFSR113_N << std::endl; + fout << "#define LFSR113_M " << LFSR113_M << std::endl; + fout << "#define LFSR113_SIZE (LFSR113_M * LFSR113_N * LFSR113_N)" << std::endl; + fout << "#define LFSR113_JUMP_MATRICES " << LFSR113_JUMP_MATRICES << std::endl; + fout << "#define LFSR113_JUMP_LOG2 " << LFSR113_JUMP_LOG2 << std::endl; + fout << std::endl; + + write_matrices( + fout, + "d_lfsr113_jump_matrices", + "lfsr113", + static_cast(&jump_matrices[0][0]), + true); + write_matrices( + fout, + "h_lfsr113_jump_matrices", + "lfsr113", + static_cast(&jump_matrices[0][0]), + false); + + write_matrices( + fout, + "d_lfsr113_sequence_jump_matrices", + "lfsr113", + static_cast(&sequence_jump_matrices[0][0]), + true); + write_matrices( + fout, + "h_lfsr113_sequence_jump_matrices", + "lfsr113", + static_cast(&sequence_jump_matrices[0][0]), + false); + + fout << R"( +#endif // ROCRAND_LFSR113_PRECOMPUTED_H_ +)"; + + return 0; +} diff --git a/tools/utils_matrix_exponentiation.hpp b/tools/utils_matrix_exponentiation.hpp new file mode 100644 index 00000000..ccea67a1 --- /dev/null +++ b/tools/utils_matrix_exponentiation.hpp @@ -0,0 +1,139 @@ +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ROCRAND_TOOLS_UTILS_MATRIX_EXPONENTIATION_HPP_ +#define ROCRAND_TOOLS_UTILS_MATRIX_EXPONENTIATION_HPP_ + +#include +#include +#include +#include +#include + +/// @brief Copies array \p src of size \p SIZE into \p src. +template +void copy_arr(unsigned int* dst, const unsigned int* src) +{ + for(int i = 0; i < SIZE; i++) + { + dst[i] = src[i]; + } +} + +/// @brief Given an NxMxN matrix \p m and a vector \p v of size N, it performs an exclusive OR +/// among the N-sized vectors m[i][j] for which it holds that v[i] & (1U << j) != 0 +/// and stores the result into \p v. +template +void mul_mat_vec_inplace(const unsigned int* m, unsigned int* v) +{ + unsigned int r[N] = {0}; + for(int i = 0; i < N; i++) + { + for(int j = 0; j < M; j++) + { + if(v[i] & (1U << j)) + { + for(int k = 0; k < N; k++) + { + r[k] ^= m[N * (i * M + j) + k]; + } + } + } + } + copy_arr(v, r); +} + +/// @brief Multiplies NxMxN matrices \p b and \p a and stores the result in \p a. +template +void mul_mat_mat_inplace(unsigned int* a, const unsigned int* b) +{ + for(int i = 0; i < N * M; i++) + { + mul_mat_vec_inplace(b, a + i * N); + } +} + +/// @brief Computes exponentiation of matrix \p b to \p power by squaring and stores result into \p a. +template +void mat_pow(unsigned int* a, const unsigned int* b, const unsigned long long power) +{ + // Identity matrix + for(int i = 0; i < N; i++) + { + for(int j = 0; j < M; j++) + { + for(int k = 0; k < N; k++) + { + a[(i * M + j) * N + k] = ((i == k) ? (1 << j) : 0); + } + } + } + + // Exponentiation by squaring + unsigned int y[SIZE]; + copy_arr(y, b); + for(unsigned long long p = power; p > 0; p >>= 1) + { + if(p & 1) + { + mul_mat_mat_inplace(a, y); + } + + // Square the matrix + unsigned int t[SIZE]; + copy_arr(t, y); + mul_mat_mat_inplace(y, t); + } +} + +/// @brief Writes the C++ code of the declaration (with variable name \p name) and initialization +/// of a precomputed state matrix \p a for a generator named \p generator to a given output +/// stream \p fout. +template +void write_matrices(std::ofstream& fout, + const std::string name, + std::string generator, + unsigned int* a, + bool is_device) +{ + std::transform(generator.begin(), generator.end(), generator.begin(), ::toupper); + fout << "// clang-format off" << std::endl; + fout << "static const " << (is_device ? "__device__ " : "") << "unsigned int " << name << "[" + << generator << "_JUMP_MATRICES][" << generator << "_SIZE] = {" << std::endl; + for(int k = 0; k < JUMP_MATRICES; k++) + { + fout << " {" << std::endl; + for(int i = 0; i < M; i++) + { + fout << " "; + for(int j = 0; j < N * N; j++) + { + fout << a[k * SIZE + i * N * N + j] << ", "; + } + fout << std::endl; + } + fout << " }," << std::endl; + } + fout << "};" << std::endl; + fout << std::endl; + fout << "// clang-format on" << std::endl; +} + +#endif // ROCRAND_TOOLS_UTILS_MATRIX_EXPONENTIATION_HPP_ diff --git a/tools/xorwow_precomputed_generator.cpp b/tools/xorwow_precomputed_generator.cpp index 508b59bb..5370dcb4 100644 --- a/tools/xorwow_precomputed_generator.cpp +++ b/tools/xorwow_precomputed_generator.cpp @@ -1,8 +1,28 @@ -#include +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "utils_matrix_exponentiation.hpp" + #include +#include #include -#include - const int XORWOW_N = 5; // 5 values const int XORWOW_M = 32; // 32-bit each @@ -17,81 +37,6 @@ const int XORWOW_SEQUENCE_JUMP_LOG2 = 67; static unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE]; static unsigned int sequence_jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE]; - -void copy_mat(unsigned int * dst, const unsigned int * src) -{ - for (int i = 0; i < XORWOW_SIZE; i++) - { - dst[i] = src[i]; - } -} - -void copy_vec(unsigned int * dst, const unsigned int * src) -{ - for (int i = 0; i < XORWOW_N; i++) - { - dst[i] = src[i]; - } -} - -void mul_mat_vec_inplace(const unsigned int * m, unsigned int * v) -{ - unsigned int r[XORWOW_N] = { 0 }; - for (int i = 0; i < XORWOW_N; i++) - { - for (int j = 0; j < XORWOW_M; j++) - { - if (v[i] & (1U << j)) - { - for (int k = 0; k < XORWOW_N; k++) - { - r[k] ^= m[XORWOW_N * (i * XORWOW_M + j) + k]; - } - } - } - } - copy_vec(v, r); -} - -void mul_mat_mat_inplace(unsigned int * a, const unsigned int * b) -{ - for (int i = 0; i < XORWOW_N * XORWOW_M; i++) - { - mul_mat_vec_inplace(b, a + i * XORWOW_N); - } -} - -void mat_pow(unsigned int * a, const unsigned int * b, const unsigned long long power) -{ - // Identity matrix - for (int i = 0; i < XORWOW_N; i++) - { - for (int j = 0; j < XORWOW_M; j++) - { - for (int k = 0; k < XORWOW_N; k++) - { - a[(i * XORWOW_M + j) * XORWOW_N + k] = ((i == k) ? (1 << j) : 0); - } - } - } - - // Exponentiation by squaring - unsigned int y[XORWOW_SIZE]; - copy_mat(y, b); - for (unsigned long long p = power; p > 0; p >>= 1) - { - if (p & 1) - { - mul_mat_mat_inplace(a, y); - } - - // Square the matrix - unsigned int t[XORWOW_SIZE]; - copy_mat(t, y); - mul_mat_mat_inplace(y, t); - } -} - struct rocrand_xorwow_state { // Xorshift values (160 bits) @@ -140,59 +85,40 @@ void generate_matrices() { unsigned int a[XORWOW_SIZE]; unsigned int b[XORWOW_SIZE]; - copy_mat(a, one_step); + copy_arr(a, one_step); - copy_mat(jump_matrices[0], a); + copy_arr(jump_matrices[0], a); for (int k = 1; k < XORWOW_JUMP_MATRICES; k++) { - copy_mat(b, a); - mat_pow(a, b, (1 << XORWOW_JUMP_LOG2)); - copy_mat(jump_matrices[k], a); + copy_arr(b, a); + mat_pow(a, b, (1 << XORWOW_JUMP_LOG2)); + copy_arr(jump_matrices[k], a); } } { unsigned int a[XORWOW_SIZE]; unsigned int b[XORWOW_SIZE]; - copy_mat(a, one_step); + copy_arr(a, one_step); // For 67: A^(2^33) - mat_pow(b, a, 1ULL << (XORWOW_SEQUENCE_JUMP_LOG2 / 2)); + mat_pow(b, a, 1ULL << (XORWOW_SEQUENCE_JUMP_LOG2 / 2)); // For 67: (A^(2^33))^(2^34) = A^(2^67) - mat_pow(a, b, 1ULL << (XORWOW_SEQUENCE_JUMP_LOG2 - XORWOW_SEQUENCE_JUMP_LOG2 / 2)); + mat_pow( + a, + b, + 1ULL << (XORWOW_SEQUENCE_JUMP_LOG2 - XORWOW_SEQUENCE_JUMP_LOG2 / 2)); - copy_mat(sequence_jump_matrices[0], a); + copy_arr(sequence_jump_matrices[0], a); for (int k = 1; k < XORWOW_JUMP_MATRICES; k++) { - copy_mat(b, a); - mat_pow(a, b, (1 << XORWOW_JUMP_LOG2)); - copy_mat(sequence_jump_matrices[k], a); - } - } -} - -void write_matrices(std::ofstream& fout, const std::string name, unsigned int * a, bool is_device) -{ - fout << "static const " << (is_device ? "__device__ " : "") << "unsigned int " << name << "[XORWOW_JUMP_MATRICES][XORWOW_SIZE] = {" << std::endl; - for (int k = 0; k < XORWOW_JUMP_MATRICES; k++) - { - fout << " {" << std::endl; - for (int i = 0; i < XORWOW_M; i++) - { - fout << " "; - for (int j = 0; j < XORWOW_N * XORWOW_N; j++) - { - fout << a[k * XORWOW_SIZE + i * XORWOW_N * XORWOW_N + j] << ", "; - } - fout << std::endl; + copy_arr(b, a); + mat_pow(a, b, (1 << XORWOW_JUMP_LOG2)); + copy_arr(sequence_jump_matrices[k], a); } - fout << " }," << std::endl; } - fout << "};" << std::endl; - fout << std::endl; } - int main(int argc, char const *argv[]) { if (argc != 2 || std::string(argv[1]) == "--help") { @@ -205,7 +131,7 @@ int main(int argc, char const *argv[]) { const std::string file_path(argv[1]); std::ofstream fout(file_path, std::ios_base::out | std::ios_base::trunc); - fout << R"(// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. + fout << R"(// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -240,15 +166,31 @@ int main(int argc, char const *argv[]) { fout << "#define XORWOW_JUMP_LOG2 " << XORWOW_JUMP_LOG2 << std::endl; fout << std::endl; - write_matrices(fout, "d_xorwow_jump_matrices", - static_cast(&jump_matrices[0][0]), true); - write_matrices(fout, "h_xorwow_jump_matrices", - static_cast(&jump_matrices[0][0]), false); - - write_matrices(fout, "d_xorwow_sequence_jump_matrices", - static_cast(&sequence_jump_matrices[0][0]), true); - write_matrices(fout, "h_xorwow_sequence_jump_matrices", - static_cast(&sequence_jump_matrices[0][0]), false); + write_matrices( + fout, + "d_xorwow_jump_matrices", + "xorwow", + static_cast(&jump_matrices[0][0]), + true); + write_matrices( + fout, + "h_xorwow_jump_matrices", + "xorwow", + static_cast(&jump_matrices[0][0]), + false); + + write_matrices( + fout, + "d_xorwow_sequence_jump_matrices", + "xorwow", + static_cast(&sequence_jump_matrices[0][0]), + true); + write_matrices( + fout, + "h_xorwow_sequence_jump_matrices", + "xorwow", + static_cast(&sequence_jump_matrices[0][0]), + false); fout << R"( #endif // ROCRAND_XORWOW_PRECOMPUTED_H_