diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b08796f1..651fc00a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,14 @@ Documentation for rocRAND is available at [https://rocm.docs.amd.com/projects/rocRAND/en/latest/](https://rocm.docs.amd.com/projects/rocRAND/en/latest/) +## (unreleased) rocRAND 3.3.0 for ROCm 6.5 + +### Changed +* Updated several `gfx942` auto tuning parameters. + +### Resolved issues +* Fixed an issue where `mt19937.hpp` would cause kernel errors during auto tuning. + ## rocRAND 3.3.0 for ROCm 6.4 ### Added diff --git a/library/src/rng/config/lfsr113_config.hpp b/library/src/rng/config/lfsr113_config.hpp index 3556dc240..a5cacb8ec 100644 --- a/library/src/rng/config/lfsr113_config.hpp +++ b/library/src/rng/config/lfsr113_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 128; case target_arch::gfx1100: return 64; case target_arch::gfx1030: return 64; - case target_arch::gfx942: return 512; + case target_arch::gfx942: return 256; case target_arch::gfx90a: return 64; case target_arch::gfx908: return 256; case target_arch::gfx906: return 256; diff --git a/library/src/rng/config/mrg32k3a_config.hpp b/library/src/rng/config/mrg32k3a_config.hpp index 5857b7a11..6b6d4a233 100644 --- a/library/src/rng/config/mrg32k3a_config.hpp +++ b/library/src/rng/config/mrg32k3a_config.hpp @@ -41,7 +41,7 @@ struct generator_config_selector case target_arch::gfx1102: return 128; case target_arch::gfx1101: return 128; case target_arch::gfx1100: return 128; - case target_arch::gfx942: return 256; + case target_arch::gfx942: return 1024; case target_arch::gfx90a: return 256; case target_arch::gfx1030: return 256; case target_arch::gfx908: return 1024; diff --git a/library/src/rng/config/mt19937_config.hpp b/library/src/rng/config/mt19937_config.hpp index 8a02a0d67..21605273f 100644 --- a/library/src/rng/config/mt19937_config.hpp +++ b/library/src/rng/config/mt19937_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -41,7 +41,7 @@ struct generator_config_selector case target_arch::gfx1102: return 128; case target_arch::gfx1101: return 128; case target_arch::gfx1100: return 64; - case target_arch::gfx942: return 128; + case target_arch::gfx942: return 256; case target_arch::gfx90a: return 1024; case target_arch::gfx908: return 512; default: @@ -67,4 +67,4 @@ struct generator_config_selector } // end namespace rocrand_host::detail -#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_ +#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_ \ No newline at end of file diff --git a/library/src/rng/config/philox4_32_10_config.hpp b/library/src/rng/config/philox4_32_10_config.hpp index abc4e1bdc..8214c792c 100644 --- a/library/src/rng/config/philox4_32_10_config.hpp +++ b/library/src/rng/config/philox4_32_10_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 1024; case target_arch::gfx1100: return 512; case target_arch::gfx1030: return 1024; - case target_arch::gfx942: return 1024; + case target_arch::gfx942: return 512; case target_arch::gfx90a: return 512; case target_arch::gfx908: return 512; case target_arch::gfx906: return 64; diff --git a/library/src/rng/config/threefry2_32_20_config.hpp b/library/src/rng/config/threefry2_32_20_config.hpp index 9f614754d..1635e8971 100644 --- a/library/src/rng/config/threefry2_32_20_config.hpp +++ b/library/src/rng/config/threefry2_32_20_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 256; case target_arch::gfx1100: return 1024; case target_arch::gfx1030: return 256; - case target_arch::gfx942: return 256; + case target_arch::gfx942: return 512; case target_arch::gfx90a: return 512; case target_arch::gfx908: return 512; case target_arch::gfx906: return 256; diff --git a/library/src/rng/config/threefry4_32_20_config.hpp b/library/src/rng/config/threefry4_32_20_config.hpp index c2e2cf8ec..78f0f6eb0 100644 --- a/library/src/rng/config/threefry4_32_20_config.hpp +++ b/library/src/rng/config/threefry4_32_20_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 512; case target_arch::gfx1100: return 1024; case target_arch::gfx1030: return 1024; - case target_arch::gfx942: return 1024; + case target_arch::gfx942: return 512; case target_arch::gfx90a: return 256; case target_arch::gfx908: return 256; case target_arch::gfx906: return 256; diff --git a/library/src/rng/mt19937.hpp b/library/src/rng/mt19937.hpp index 0cb7bd6be..f17fe70e6 100644 --- a/library/src/rng/mt19937.hpp +++ b/library/src/rng/mt19937.hpp @@ -50,6 +50,7 @@ #ifndef ROCRAND_RNG_MT19937_H_ #define ROCRAND_RNG_MT19937_H_ +#include "config/mt19937_config.hpp" #include "common.hpp" #include "config_types.hpp" @@ -874,18 +875,19 @@ class mt19937_generator_template : public generator_impl_base system_type::free(d_mt19937_jump); // This kernel is not actually tuned for ordering, but config is needed for device-side compile time check of the generator count - dynamic_dispatch(m_order, - [&, this](auto is_dynamic) - { - status = system_type::template launch< - init_engines_mt19937>( - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - d_engines); - }); + dynamic_dispatch( + m_order, + [&, this](auto is_dynamic) + { + status + = system_type::template launch, + ConfigProvider>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + d_engines); + }); if(status != ROCRAND_STATUS_SUCCESS) { system_type::free(d_engines); @@ -983,20 +985,22 @@ class mt19937_generator_template : public generator_impl_base is_dynamic, T, vec_type, - Distribution>>( - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - m_start_input, - data, - size, - vec_data, - vec_size, - head_size, - tail_size, - distribution); + Distribution>, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + m_start_input, + data, + size, + vec_data, + vec_size, + head_size, + tail_size, + distribution); }); if(status != ROCRAND_STATUS_SUCCESS) { @@ -1014,20 +1018,22 @@ class mt19937_generator_template : public generator_impl_base is_dynamic, T, vec_type, - Distribution>>( - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - m_start_input, - data, - size, - vec_data, - vec_size, - head_size, - tail_size, - distribution); + Distribution>, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + m_start_input, + data, + size, + vec_data, + vec_size, + head_size, + tail_size, + distribution); }); if(status != ROCRAND_STATUS_SUCCESS) { @@ -1122,4 +1128,4 @@ using mt19937_generator_host } // namespace rocrand_impl::host -#endif // ROCRAND_RNG_MT19937_H_ +#endif // ROCRAND_RNG_MT19937_H_ \ No newline at end of file