From 34eea922d336426059597f1cbfc3c1eec1675e77 Mon Sep 17 00:00:00 2001 From: NguyenNhuDi Date: Mon, 9 Dec 2024 15:27:04 -0600 Subject: [PATCH 1/3] fixed auto tuning mt19937 kernel error and updated auto tuning paramater --- library/src/rng/config/lfsr113_config.hpp | 2 +- library/src/rng/config/mrg32k3a_config.hpp | 2 +- library/src/rng/config/mt19937_config.hpp | 6 +- .../src/rng/config/philox4_32_10_config.hpp | 2 +- .../src/rng/config/threefry2_32_20_config.hpp | 2 +- .../src/rng/config/threefry4_32_20_config.hpp | 2 +- library/src/rng/mt19937.hpp | 88 ++++++++++--------- 7 files changed, 55 insertions(+), 49 deletions(-) diff --git a/library/src/rng/config/lfsr113_config.hpp b/library/src/rng/config/lfsr113_config.hpp index 3556dc240..a5cacb8ec 100644 --- a/library/src/rng/config/lfsr113_config.hpp +++ b/library/src/rng/config/lfsr113_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 128; case target_arch::gfx1100: return 64; case target_arch::gfx1030: return 64; - case target_arch::gfx942: return 512; + case target_arch::gfx942: return 256; case target_arch::gfx90a: return 64; case target_arch::gfx908: return 256; case target_arch::gfx906: return 256; diff --git a/library/src/rng/config/mrg32k3a_config.hpp b/library/src/rng/config/mrg32k3a_config.hpp index 5857b7a11..6b6d4a233 100644 --- a/library/src/rng/config/mrg32k3a_config.hpp +++ b/library/src/rng/config/mrg32k3a_config.hpp @@ -41,7 +41,7 @@ struct generator_config_selector case target_arch::gfx1102: return 128; case target_arch::gfx1101: return 128; case target_arch::gfx1100: return 128; - case target_arch::gfx942: return 256; + case target_arch::gfx942: return 1024; case target_arch::gfx90a: return 256; case target_arch::gfx1030: return 256; case target_arch::gfx908: return 1024; diff --git a/library/src/rng/config/mt19937_config.hpp b/library/src/rng/config/mt19937_config.hpp index 8a02a0d67..21605273f 100644 --- a/library/src/rng/config/mt19937_config.hpp +++ b/library/src/rng/config/mt19937_config.hpp @@ -28,7 +28,7 @@ * This file is automatically generated by `/scripts/config-tuning/select_best_config.py`. */ -namespace rocrand_host::detail +namespace rocrand_impl::host { template @@ -41,7 +41,7 @@ struct generator_config_selector case target_arch::gfx1102: return 128; case target_arch::gfx1101: return 128; case target_arch::gfx1100: return 64; - case target_arch::gfx942: return 128; + case target_arch::gfx942: return 256; case target_arch::gfx90a: return 1024; case target_arch::gfx908: return 512; default: @@ -67,4 +67,4 @@ struct generator_config_selector } // end namespace rocrand_host::detail -#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_ +#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_ \ No newline at end of file diff --git a/library/src/rng/config/philox4_32_10_config.hpp b/library/src/rng/config/philox4_32_10_config.hpp index abc4e1bdc..8214c792c 100644 --- a/library/src/rng/config/philox4_32_10_config.hpp +++ b/library/src/rng/config/philox4_32_10_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 1024; case target_arch::gfx1100: return 512; case target_arch::gfx1030: return 1024; - case target_arch::gfx942: return 1024; + case target_arch::gfx942: return 512; case target_arch::gfx90a: return 512; case target_arch::gfx908: return 512; case target_arch::gfx906: return 64; diff --git a/library/src/rng/config/threefry2_32_20_config.hpp b/library/src/rng/config/threefry2_32_20_config.hpp index 9f614754d..1635e8971 100644 --- a/library/src/rng/config/threefry2_32_20_config.hpp +++ b/library/src/rng/config/threefry2_32_20_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 256; case target_arch::gfx1100: return 1024; case target_arch::gfx1030: return 256; - case target_arch::gfx942: return 256; + case target_arch::gfx942: return 512; case target_arch::gfx90a: return 512; case target_arch::gfx908: return 512; case target_arch::gfx906: return 256; diff --git a/library/src/rng/config/threefry4_32_20_config.hpp b/library/src/rng/config/threefry4_32_20_config.hpp index c2e2cf8ec..78f0f6eb0 100644 --- a/library/src/rng/config/threefry4_32_20_config.hpp +++ b/library/src/rng/config/threefry4_32_20_config.hpp @@ -42,7 +42,7 @@ struct generator_config_selector case target_arch::gfx1101: return 512; case target_arch::gfx1100: return 1024; case target_arch::gfx1030: return 1024; - case target_arch::gfx942: return 1024; + case target_arch::gfx942: return 512; case target_arch::gfx90a: return 256; case target_arch::gfx908: return 256; case target_arch::gfx906: return 256; diff --git a/library/src/rng/mt19937.hpp b/library/src/rng/mt19937.hpp index 0cb7bd6be..f17fe70e6 100644 --- a/library/src/rng/mt19937.hpp +++ b/library/src/rng/mt19937.hpp @@ -50,6 +50,7 @@ #ifndef ROCRAND_RNG_MT19937_H_ #define ROCRAND_RNG_MT19937_H_ +#include "config/mt19937_config.hpp" #include "common.hpp" #include "config_types.hpp" @@ -874,18 +875,19 @@ class mt19937_generator_template : public generator_impl_base system_type::free(d_mt19937_jump); // This kernel is not actually tuned for ordering, but config is needed for device-side compile time check of the generator count - dynamic_dispatch(m_order, - [&, this](auto is_dynamic) - { - status = system_type::template launch< - init_engines_mt19937>( - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - d_engines); - }); + dynamic_dispatch( + m_order, + [&, this](auto is_dynamic) + { + status + = system_type::template launch, + ConfigProvider>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + d_engines); + }); if(status != ROCRAND_STATUS_SUCCESS) { system_type::free(d_engines); @@ -983,20 +985,22 @@ class mt19937_generator_template : public generator_impl_base is_dynamic, T, vec_type, - Distribution>>( - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - m_start_input, - data, - size, - vec_data, - vec_size, - head_size, - tail_size, - distribution); + Distribution>, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + m_start_input, + data, + size, + vec_data, + vec_size, + head_size, + tail_size, + distribution); }); if(status != ROCRAND_STATUS_SUCCESS) { @@ -1014,20 +1018,22 @@ class mt19937_generator_template : public generator_impl_base is_dynamic, T, vec_type, - Distribution>>( - dim3(config.blocks), - dim3(config.threads), - 0, - m_stream, - m_engines, - m_start_input, - data, - size, - vec_data, - vec_size, - head_size, - tail_size, - distribution); + Distribution>, + ConfigProvider, + T, + is_dynamic>(dim3(config.blocks), + dim3(config.threads), + 0, + m_stream, + m_engines, + m_start_input, + data, + size, + vec_data, + vec_size, + head_size, + tail_size, + distribution); }); if(status != ROCRAND_STATUS_SUCCESS) { @@ -1122,4 +1128,4 @@ using mt19937_generator_host } // namespace rocrand_impl::host -#endif // ROCRAND_RNG_MT19937_H_ +#endif // ROCRAND_RNG_MT19937_H_ \ No newline at end of file From a239571c584f3d724defa248b251caad79ed19eb Mon Sep 17 00:00:00 2001 From: NguyenNhuDi Date: Mon, 9 Dec 2024 15:30:34 -0600 Subject: [PATCH 2/3] updated changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b08796f1..473863d59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,14 @@ Documentation for rocRAND is available at [https://rocm.docs.amd.com/projects/rocRAND/en/latest/](https://rocm.docs.amd.com/projects/rocRAND/en/latest/) +## (unreleased) rocRAND 3.3.0 for ROCm 6.5 + +### Changed +* Updated several `gfx942` auto tuning parameters. + +### Fixed +* Fixed an issue where `mt19937.hpp` would cause kernel errors during auto tuning. + ## rocRAND 3.3.0 for ROCm 6.4 ### Added From fb16e255a6feff99962469c270f5c43620e1e97b Mon Sep 17 00:00:00 2001 From: Di Nguyen Date: Tue, 10 Dec 2024 09:53:52 -0700 Subject: [PATCH 3/3] Update CHANGELOG.md Co-authored-by: Jeffrey Novotny --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 473863d59..651fc00a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ Documentation for rocRAND is available at ### Changed * Updated several `gfx942` auto tuning parameters. -### Fixed +### Resolved issues * Fixed an issue where `mt19937.hpp` would cause kernel errors during auto tuning. ## rocRAND 3.3.0 for ROCm 6.4