From 6954818b6f03e171110c232ce9453af3981be04a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:23:53 +0200 Subject: [PATCH 01/82] Update rocm and cce versions for both corona and tioga, updates of lassen specs --- .gitlab/jobs/corona.yml | 4 ++-- .gitlab/jobs/lassen.yml | 19 ++++++++++++++----- .gitlab/jobs/poodle.yml | 12 ++---------- .gitlab/jobs/ruby.yml | 10 ++-------- .gitlab/jobs/tioga.yml | 8 ++++---- scripts/radiuss-spack-configs | 2 +- 6 files changed, 25 insertions(+), 30 deletions(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index abbafe5bb9..ae24d52c5d 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -26,9 +26,9 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_5_7_0_hip_desul_atomics: +rocmcc_6_0_2_hip_desul_atomics: variables: - SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=5.7.0 ^hip@5.7.0 ^blt@develop" + SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=6.0.2 ^hip@6.0.2 ^blt@develop" extends: .job_on_corona clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index dc21689ce3..bb04df7c40 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -27,6 +27,14 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: extends: .job_on_lassen allow_failure: true +xl_2023_06_28_gcc_11_2_1_cuda_11_8_0: + variables: + SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS} ^blt@develop" + MODULE_LIST: "cuda/11.8.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + allow_failure: true + ############ # Extra jobs ############ @@ -39,11 +47,11 @@ gcc_8_3_1_omptask: SPEC: " ~shared +openmp +omptask +tests %gcc@=8.3.1 ^blt@develop" extends: .job_on_lassen -gcc_8_3_1_cuda_11_5_0_ats_disabled: +gcc_8_3_1_cuda_11_7_0_ats_disabled: extends: .job_on_lassen variables: - SPEC: " ~shared +openmp +tests +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^blt@develop" - MODULE_LIST: "cuda/11.5.0" + SPEC: " ~shared +openmp +tests +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.7.0" LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" ########## @@ -62,9 +70,10 @@ clang_13_0_1_libcpp: # LSAN_OPTIONS: "suppressions=${CI_PROJECT_DIR}/suppressions.asan" # extends: .job_on_lassen -gcc_8_3_1_cuda_10_1_243_desul_atomics: +gcc_8_3_1_cuda_11_7_desul_atomics: variables: - SPEC: " ~shared +openmp +tests +cuda +desul %gcc@=8.3.1 cuda_arch=70 ^cuda@10.1.243+allow-unsupported-compilers ^blt@develop" + SPEC: " ~shared +openmp +tests +cuda +desul %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.7.0" extends: .job_on_lassen # Warning: Allowed to fail temporarily diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 54870e37aa..714f01b0b7 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -29,18 +29,10 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask +tests %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -# Identical to shared job, but use OpenMP tasks and no vectorization -# Deactivated (too long on poodle) -intel_19_1_2_gcc_10_3_1: - variables: - ON_POODLE: "OFF" - SPEC: " ~shared +openmp +omptask +tests %intel@=19.1.2.gcc.10.3.1 ${PROJECT_POODLE_DEPS}" - extends: .job_on_poodle - # Allowed to fail -intel_2022_1_0: +intel_2023_2_1: variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" allow_failure: true extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 2242494b9c..49cd0d2389 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -29,16 +29,10 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask +tests %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby -# Identical to shared job, but use OpenMP tasks and no vectorization -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: " ~shared +openmp +omptask +tests %intel@=19.1.2.gcc.10.3.1 ${PROJECT_RUBY_DEPS}" - extends: .job_on_ruby - # Allowed to fail -intel_2022_1_0: +intel_2023_2_1: variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" allow_failure: true extends: .job_on_ruby diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 50b60bc13d..8072d95791 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -26,12 +26,12 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_1_1_hip_desul_atomics: +rocmcc_6_1_2_hip_desul_atomics: variables: - SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.1.2 ^hip@6.1.2 ^blt@develop" extends: .job_on_tioga -rocmcc_6_1_1_hip_openmp: +rocmcc_6_1_2_hip_openmp: variables: - SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.1.2 ^hip@6.1.2 ^blt@develop" extends: .job_on_tioga diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 54c09b5dcf..8978ee09a0 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 54c09b5dcf45decaac2b1e6d1048671cde17f7e5 +Subproject commit 8978ee09a0f3b70305b4c5b6eb07a6b396315030 From c7d4d4c85ed2aaab72cc9b9d69abda80c62c71af Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:42:38 +0200 Subject: [PATCH 02/82] From RSC: Fix: add missing compilers and corresponding external packages --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 8978ee09a0..b0bdf60b43 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 8978ee09a0f3b70305b4c5b6eb07a6b396315030 +Subproject commit b0bdf60b4360c3dc7a257781c7eb4775d74064dc From 0b5e471cb65492230357cd2497c053a022cdbb15 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:36:42 +0200 Subject: [PATCH 03/82] From RSC: Deactivate rocm 5.7 job on tioga --- .gitlab/jobs/corona.yml | 4 ++-- scripts/radiuss-spack-configs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index ae24d52c5d..9b64dbc0a4 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -26,9 +26,9 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_0_2_hip_desul_atomics: +rocmcc_5_7_1_hip_desul_atomics: variables: - SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=6.0.2 ^hip@6.0.2 ^blt@develop" + SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=5.7.1 ^hip@5.7.1 ^blt@develop" extends: .job_on_corona clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index b0bdf60b43..b9739eb6c0 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit b0bdf60b4360c3dc7a257781c7eb4775d74064dc +Subproject commit b9739eb6c050bf29ad1204354ec3afd5be6a5334 From c1170a1c9aa35e790cea763081abff7b547e5e33 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:37:07 +0200 Subject: [PATCH 04/82] From RSC: Fix: need to point at compiler wrapper with cuda 11.8 defined, module is not enough --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index b9739eb6c0..7e6f52cbfe 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit b9739eb6c050bf29ad1204354ec3afd5be6a5334 +Subproject commit 7e6f52cbfe9b9a148c205e6d7bba7bf7d2e3e9d8 From b6e364517eb1bd20f2444d885a9964e6359748b8 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:33:54 +0200 Subject: [PATCH 05/82] =?UTF-8?q?From=20RSC:=20Fix:=20use=20wrapper=20with?= =?UTF-8?q?=20cuda=2011.8=20consistently=20+=20change=20in=20naming=20conv?= =?UTF-8?q?ention=20to=20match=20LC=E2=80=99s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 7e6f52cbfe..f0f0c1a4d0 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 7e6f52cbfe9b9a148c205e6d7bba7bf7d2e3e9d8 +Subproject commit f0f0c1a4d02bddc1ad685cc08f6ab01c90c82900 From 919ea3b645921e1d96ead0b9e3e466ec697ca24d Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:01:46 +0200 Subject: [PATCH 06/82] Do not allow intel@2023.2.1 jobs to fail on ruby and poodle --- .gitlab/jobs/poodle.yml | 7 ------- .gitlab/jobs/ruby.yml | 7 ------- 2 files changed, 14 deletions(-) diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 714f01b0b7..1178dae89e 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -29,13 +29,6 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask +tests %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -# Allowed to fail -intel_2023_2_1: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" - allow_failure: true - extends: .job_on_poodle - ############ # Extra jobs ############ diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 49cd0d2389..458e4c7b57 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -29,13 +29,6 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask +tests %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby -# Allowed to fail -intel_2023_2_1: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" - allow_failure: true - extends: .job_on_ruby - ############ # Extra jobs ############ From b1386d98c82cb847676e5a115ca88714a355791b Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:25:44 +0200 Subject: [PATCH 07/82] From RSC: Add cuda to xl spec relying on LC wrapper with cuda --- .gitlab/jobs/lassen.yml | 2 +- scripts/radiuss-spack-configs | 2 +- tpl/camp | 2 +- tpl/desul | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index bb04df7c40..91980b847c 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -29,7 +29,7 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: xl_2023_06_28_gcc_11_2_1_cuda_11_8_0: variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS} ^blt@develop" + SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.cuda.11.8.0.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS} ^blt@develop" MODULE_LIST: "cuda/11.8.0" LASSEN_JOB_ALLOC: "1 -W 60 -q pci" extends: .job_on_lassen diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index f0f0c1a4d0..9639236da5 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit f0f0c1a4d02bddc1ad685cc08f6ab01c90c82900 +Subproject commit 9639236da58277efbcea54208ad52cf69556be8c diff --git a/tpl/camp b/tpl/camp index d580fd8feb..0f07de4240 160000 --- a/tpl/camp +++ b/tpl/camp @@ -1 +1 @@ -Subproject commit d580fd8feb10ddb7a63a784b4afcd857ac686e39 +Subproject commit 0f07de4240c42e0b38a8d872a20440cb4b33d9f5 diff --git a/tpl/desul b/tpl/desul index 6114dd25b5..afbd4486b2 160000 --- a/tpl/desul +++ b/tpl/desul @@ -1 +1 @@ -Subproject commit 6114dd25b54782678c555c0c1d2197f13cc8d2a0 +Subproject commit afbd4486b2b39211e8ded3f04b12c2e0f3a25248 From 949414a9dcd1ff02a7baff918d7de2785675d268 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:53:23 +0200 Subject: [PATCH 08/82] From RSC: Fix --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 9639236da5..e4d5ecfe3b 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 9639236da58277efbcea54208ad52cf69556be8c +Subproject commit e4d5ecfe3b788b6ddfec5b963b769ac6a38193c2 From b6357a50f52170176bc20e92b71b43e43d25ed4a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 7 Aug 2024 17:06:19 +0200 Subject: [PATCH 09/82] From RSC: Clean drop of rocm 5.7.0 in favor on 5.7.1 on corona --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index e4d5ecfe3b..6e5c30da6d 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit e4d5ecfe3b788b6ddfec5b963b769ac6a38193c2 +Subproject commit 6e5c30da6d845e8772e84101e8e1a5cdd9fcb1e6 From 0260d5d76dc9b7f23b535f30f8003efa812dce47 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Fri, 9 Aug 2024 16:08:36 +0200 Subject: [PATCH 10/82] From RSC: Update cray-mpich and add rocm 6.2.0: only apply cray-mpich first --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 6e5c30da6d..df21a7fb15 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 6e5c30da6d845e8772e84101e8e1a5cdd9fcb1e6 +Subproject commit df21a7fb158e79e58443249cecfa5f8b90e021fe From 81cc3282aec3f66303290483c79d95fe9f63368d Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Fri, 9 Aug 2024 16:19:17 +0200 Subject: [PATCH 11/82] Update rocm in tioga CI --- .gitlab/jobs/tioga.yml | 8 ++++---- scripts/radiuss-spack-configs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 8072d95791..308b155ba9 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -26,12 +26,12 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_1_2_hip_desul_atomics: +rocmcc_6_2_0_hip_desul_atomics: variables: - SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.1.2 ^hip@6.1.2 ^blt@develop" + SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga -rocmcc_6_1_2_hip_openmp: +rocmcc_6_2_0_hip_openmp: variables: - SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.1.2 ^hip@6.1.2 ^blt@develop" + SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index df21a7fb15..f504356322 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit df21a7fb158e79e58443249cecfa5f8b90e021fe +Subproject commit f504356322ca77bdb70f40c8d9d7a74a7fede822 From d9fa65edeae5d2ef73f8e9c2ebc745ac1d8c9daf Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Fri, 9 Aug 2024 23:46:12 +0200 Subject: [PATCH 12/82] From RSC: Enforce coherency between rocm software stack and compiler + fix --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index f504356322..56c55b0988 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit f504356322ca77bdb70f40c8d9d7a74a7fede822 +Subproject commit 56c55b0988ec09473f3abd943c2135da138bec1c From 9865c3d71c2252b6cb3f036d31626c3e5c497859 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:16:38 +0200 Subject: [PATCH 13/82] From RSC: Fix typo: rocm compiler is rocmcc --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 56c55b0988..4e8046425d 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 56c55b0988ec09473f3abd943c2135da138bec1c +Subproject commit 4e8046425de66ad1119ecd4f93007b00edfcc5c2 From de9f415eba6379658eab8e1ac0fe4b287247b4fc Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 22 Aug 2024 16:53:34 -0700 Subject: [PATCH 14/82] intial commit for perfectly nested prototype --- examples/launch_reductions.cpp | 14 +++++++------- include/RAJA/pattern/launch/launch_core.hpp | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/examples/launch_reductions.cpp b/examples/launch_reductions.cpp index 24e313e649..9a514bdc49 100644 --- a/examples/launch_reductions.cpp +++ b/examples/launch_reductions.cpp @@ -40,7 +40,7 @@ using device_launch = RAJA::cuda_launch_t; using device_loop = RAJA::cuda_global_thread_x; #elif defined(RAJA_ENABLE_HIP) using device_launch = RAJA::hip_launch_t; -using device_loop = RAJA::hip_global_thread_x; +using device_loop = RAJA::hip_thread_xyz_loop; #endif using launch_policy = RAJA::LaunchPolicy(ctx, arange, [&] (int i) { - + RAJA::expt::loop(ctx, arange, arange, arange, [&] (int i, int j, int k) { + kernel_sum += a[i]; - + kernel_min.min(a[i]); kernel_max.max(a[i]); - + kernel_minloc.minloc(a[i], i); kernel_maxloc.maxloc(a[i], i); }); - + }); std::cout << "\tsum = " << kernel_sum.get() << std::endl; diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index b78ec0de92..f1d70aeacb 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -585,6 +585,25 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, body); } +RAJA_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) +{ + + LoopExecute, SEGMENT>::exec(ctx, + segment0, + segment1, + segment2, + body); +} + RAJA_SUPPRESS_HD_WARN template Date: Sun, 1 Sep 2024 14:20:32 -0700 Subject: [PATCH 15/82] revert changes in launch reduction examples --- examples/launch_reductions.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/launch_reductions.cpp b/examples/launch_reductions.cpp index 9a514bdc49..24e313e649 100644 --- a/examples/launch_reductions.cpp +++ b/examples/launch_reductions.cpp @@ -40,7 +40,7 @@ using device_launch = RAJA::cuda_launch_t; using device_loop = RAJA::cuda_global_thread_x; #elif defined(RAJA_ENABLE_HIP) using device_launch = RAJA::hip_launch_t; -using device_loop = RAJA::hip_thread_xyz_loop; +using device_loop = RAJA::hip_global_thread_x; #endif using launch_policy = RAJA::LaunchPolicy(ctx, arange, arange, arange, [&] (int i, int j, int k) { - + RAJA::loop(ctx, arange, [&] (int i) { + kernel_sum += a[i]; - + kernel_min.min(a[i]); kernel_max.max(a[i]); - + kernel_minloc.minloc(a[i], i); kernel_maxloc.maxloc(a[i], i); }); - + }); std::cout << "\tsum = " << kernel_sum.get() << std::endl; From 9161b2b57b49649f9250e8fd2457f7ade6d4f574 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Mon, 2 Sep 2024 16:40:29 +0200 Subject: [PATCH 16/82] Allow failure for intel jobs on ruby and poodle and cce 18 jobs on tioga, add cce 17 job on tioga --- .gitlab/jobs/poodle.yml | 8 ++++++++ .gitlab/jobs/ruby.yml | 8 ++++++++ .gitlab/jobs/tioga.yml | 13 ++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 1178dae89e..c1fb433dbc 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -17,6 +17,14 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # when possible so that the comparison with the original job is easier. +# Known issue currently under investigation +# https://github.com/LLNL/RAJA/pull/1712#issuecomment-2292006843 +intel_2023_2_1: + variables: + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + extends: .job_on_poodle + allow_failure: true + # Identical to shared job, but use OpenMP tasks and no vectorization clang_14_0_6: variables: diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 458e4c7b57..012a822b0d 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -29,6 +29,14 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask +tests %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby +# Known issue currently under investigation +# https://github.com/LLNL/RAJA/pull/1712#issuecomment-2292006843 +intel_2023_2_1: + variables: + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + extends: .job_on_ruby + allow_failure: true + ############ # Extra jobs ############ diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 308b155ba9..b1e08cb469 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -17,7 +17,13 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # So that the comparison with the original job is easier. -# No overridden jobs so far. +# Compiler error preventing a test to succeed. +# https://github.com/LLNL/RAJA/pull/1712#issuecomment-2316335119 +cce_18_0_0: + variables: + SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=18.0.0 ${PROJECT_TIOGA_DEPS}" + extends: .job_on_tioga + allow_failure: true ############ # Extra jobs @@ -26,6 +32,11 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. +cce_17_0_1: + variables: + SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=17.0.1 ${PROJECT_TIOGA_DEPS}" + extends: .job_on_tioga + rocmcc_6_2_0_hip_desul_atomics: variables: SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" From f5bb9b8bb554bf5636e44a96d1abce5966392327 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 2 Sep 2024 08:48:16 -0700 Subject: [PATCH 17/82] artv3/raja-view-slowdown --- examples/CMakeLists.txt | 4 + examples/raja_view_slowdown.cpp | 145 ++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 examples/raja_view_slowdown.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4dfd2fbc10..fd1aed62ba 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,6 +3,10 @@ # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### +raja_add_executable( + NAME raja_view_slowdown + SOURCES raja_view_slowdown.cpp) + raja_add_executable( NAME tut_launch_basic SOURCES tut_launch_basic.cpp) diff --git a/examples/raja_view_slowdown.cpp b/examples/raja_view_slowdown.cpp new file mode 100644 index 0000000000..7b7dc8def3 --- /dev/null +++ b/examples/raja_view_slowdown.cpp @@ -0,0 +1,145 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include "RAJA/util/Timer.hpp" +#include + +int main() { + + const int N = 10000; + const int K = 17; + + auto timer = RAJA::Timer(); + + //launch to intialize the stream + RAJA::forall> + (RAJA::RangeSegment(0,1), [=] __device__ (int i) { + printf("launch kernel\n"); + }); + + + int* array = new int[N * N]; + int* array_copy = new int[N * N]; + + //big array, or image + for (int i = 0; i < N * N; ++i) { + array[i] = 1; + array_copy[i] = 1; + } + + //small array that acts as the blur + int* kernel = new int[K * K]; + for (int i = 0; i < K * K; ++i) { + kernel[i] = 2; + } + + // copying to gpu + int* d_array; + int* d_array_copy; + int* d_kernel; + cudaMalloc((void**)&d_array, N * N * sizeof(int)); + cudaMalloc((void**)&d_array_copy, N * N * sizeof(int)); + cudaMalloc((void**)&d_kernel, K * K * sizeof(int)); + cudaMemcpy(d_array, array, N * N * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(d_array_copy, array_copy, N * N * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel, K * K * sizeof(int), cudaMemcpyHostToDevice); + + + constexpr int DIM = 2; + RAJA::View> array_view(d_array, N, N); + RAJA::View> array_view_copy(d_array_copy, N, N); + RAJA::View> kernel_view(d_kernel, K, K); + + + using EXEC_POL5 = RAJA::KernelPolicy< + RAJA::statement::CudaKernelFixed<256, + RAJA::statement::For<1, RAJA::cuda_global_size_y_direct<16>, + RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<16>, + RAJA::statement::Lambda<0> + > + > + > + >; + + RAJA::RangeSegment range_i(0, N); + RAJA::RangeSegment range_j(0, N); + + +timer.start(); + + RAJA::kernel + (RAJA::make_tuple(range_i, range_j), + [=] RAJA_DEVICE (int i, int j) { + int sum = 0; + + //looping through the "blur" + for (int m = 0; m < K; ++m) { + for (int n = 0; n < K; ++n) { + int x = i + m; + int y = j + n; + + // adding the "blur" to the "image" wherever the blur is located on the image + if (x < N && y < N) { + sum += kernel_view(m, n) * array_view(x, y); + } + } + } + + array_view(i, j) += sum; + } + ); + +timer.stop(); + +std::cout<<"Elapsed time with RAJA view : "< + (RAJA::make_tuple(range_i, range_j), + [=] RAJA_DEVICE (int i, int j) { + int sum = 0; + + // looping through the "blur" + for (int m = 0; m < K; ++m) { + for (int n = 0; n < K; ++n) { + int x = i + m; + int y = j + n; + + // adding the "blur" to the "image" wherever the blur is located on the image + if (x < N && y < N) { + sum += d_kernel[m * K + n] * d_array_copy[x * N + y]; + } + } + } + + d_array_copy[i * N + j] += sum; + } + ); + +timer.stop(); +std::cout<<"Elapsed time with NO RAJA view : "< Date: Wed, 4 Sep 2024 10:29:09 +0200 Subject: [PATCH 18/82] From RSC: Remove XL jobs from shared CI jobs --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 4e8046425d..35c1ae0c45 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 4e8046425de66ad1119ecd4f93007b00edfcc5c2 +Subproject commit 35c1ae0c45952c4aaccb981941572379328f4c7e From 2a3ce5cbe38651e2f08c0091e73fff9e33125f9b Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:10:02 +0200 Subject: [PATCH 19/82] Remove XL jobs defined locally too --- .gitlab/jobs/lassen.yml | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 91980b847c..1d4f05f038 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -17,23 +17,7 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # So that the comparison with the original job is easier. -# Overriding shared spec: Longer allocation + extra flags -# Warning: allowed to fail because of a bug in Spack > 0.20.3 -xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: - variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS} ^blt@develop" - MODULE_LIST: "cuda/11.2.0" - LASSEN_JOB_ALLOC: "1 -W 60 -q pci" - extends: .job_on_lassen - allow_failure: true - -xl_2023_06_28_gcc_11_2_1_cuda_11_8_0: - variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.cuda.11.8.0.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS} ^blt@develop" - MODULE_LIST: "cuda/11.8.0" - LASSEN_JOB_ALLOC: "1 -W 60 -q pci" - extends: .job_on_lassen - allow_failure: true +# No overridden jobs so far. ############ # Extra jobs From ad48eb6527cae99fd3fa2aa8c4032810368bf89f Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:11:07 +0200 Subject: [PATCH 20/82] Point at main branch in RSC --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 35c1ae0c45..f6724f418d 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 35c1ae0c45952c4aaccb981941572379328f4c7e +Subproject commit f6724f418d29edd1448217c817384ff54fcc7c47 From ac40ebd346e1951e78d19a1f47a8fac88df54ed4 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 5 Sep 2024 21:27:00 +0200 Subject: [PATCH 21/82] Do not enforce blt@develop anymore --- .gitlab/custom-jobs-and-variables.yml | 10 +++++----- .gitlab/jobs/corona.yml | 4 ++-- .gitlab/jobs/lassen.yml | 10 +++++----- .gitlab/jobs/poodle.yml | 6 +++--- .gitlab/jobs/ruby.yml | 6 +++--- .gitlab/jobs/tioga.yml | 4 ++-- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index b04cf0de1d..ca6f5f9011 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -21,7 +21,7 @@ variables: # Project specific variants for ruby PROJECT_RUBY_VARIANTS: "~shared +openmp +vectorization +tests" # Project specific deps for ruby - PROJECT_RUBY_DEPS: "^blt@develop " + PROJECT_RUBY_DEPS: # Poodle # Arguments for top level allocation @@ -31,7 +31,7 @@ variables: # Project specific variants for poodle PROJECT_POODLE_VARIANTS: "~shared +openmp +vectorization +tests" # Project specific deps for poodle - PROJECT_POODLE_DEPS: "^blt@develop " + PROJECT_POODLE_DEPS: # Corona # Arguments for top level allocation @@ -41,7 +41,7 @@ variables: # Project specific variants for corona PROJECT_CORONA_VARIANTS: "~shared ~openmp +vectorization +tests" # Project specific deps for corona - PROJECT_CORONA_DEPS: "^blt@develop " + PROJECT_CORONA_DEPS: # Tioga # Arguments for top level allocation @@ -51,7 +51,7 @@ variables: # Project specific variants for corona PROJECT_TIOGA_VARIANTS: "~shared +openmp +vectorization +tests" # Project specific deps for corona - PROJECT_TIOGA_DEPS: "^blt@develop " + PROJECT_TIOGA_DEPS: # Lassen and Butte use a different job scheduler (spectrum lsf) that does not # allow pre-allocation the same way slurm does. @@ -60,7 +60,7 @@ variables: # Project specific variants for lassen PROJECT_LASSEN_VARIANTS: "~shared +openmp +vectorization +tests cuda_arch=70" # Project specific deps for lassen - PROJECT_LASSEN_DEPS: "^blt@develop " + PROJECT_LASSEN_DEPS: # Configuration shared by build and test jobs specific to this project. # Not all configuration can be shared. Here projects can fine tune the diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 9b64dbc0a4..930f394f94 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -28,12 +28,12 @@ rocmcc_5_7_1_hip_desul_atomics: variables: - SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=5.7.1 ^hip@5.7.1 ^blt@develop" + SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=5.7.1 ^hip@5.7.1" extends: .job_on_corona clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: variables: - SPEC: " ~shared +sycl ~openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + SPEC: " ~shared +sycl ~openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\"" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 1d4f05f038..2724d38a6b 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -28,13 +28,13 @@ gcc_8_3_1_omptask: variables: - SPEC: " ~shared +openmp +omptask +tests %gcc@=8.3.1 ^blt@develop" + SPEC: " ~shared +openmp +omptask +tests %gcc@=8.3.1 ${PROJECT_LASSEN_DEPS}" extends: .job_on_lassen gcc_8_3_1_cuda_11_7_0_ats_disabled: extends: .job_on_lassen variables: - SPEC: " ~shared +openmp +tests +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ^blt@develop" + SPEC: " ~shared +openmp +tests +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" MODULE_LIST: "cuda/11.7.0" LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" @@ -44,7 +44,7 @@ gcc_8_3_1_cuda_11_7_0_ats_disabled: clang_13_0_1_libcpp: variables: - SPEC: " ~shared +openmp +tests %clang@=13.0.1 cflags==\"-DGTEST_HAS_CXXABI_H_=0\" cxxflags==\"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0\" ^blt@develop" + SPEC: " ~shared +openmp +tests %clang@=13.0.1 cflags==\"-DGTEST_HAS_CXXABI_H_=0\" cxxflags==\"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0\"" extends: .job_on_lassen #clang_14_0_5_asan: @@ -56,7 +56,7 @@ clang_13_0_1_libcpp: gcc_8_3_1_cuda_11_7_desul_atomics: variables: - SPEC: " ~shared +openmp +tests +cuda +desul %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ^blt@develop" + SPEC: " ~shared +openmp +tests +cuda +desul %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers" MODULE_LIST: "cuda/11.7.0" extends: .job_on_lassen @@ -64,7 +64,7 @@ gcc_8_3_1_cuda_11_7_desul_atomics: # Deactivated due to issues with OpenMP Target and various tests and compilers. clang_16_0_6_ibm_omptarget: variables: - SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.ibm.gcc.8.3.1 ^blt@develop" + SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.ibm.gcc.8.3.1" ON_LASSEN: "OFF" extends: .job_on_lassen allow_failure: true diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index c1fb433dbc..6a0b7d40df 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -46,16 +46,16 @@ gcc_10_3_1: clang_14_0_6_openmp_off: variables: - SPEC: " ~shared ~openmp +tests %clang@=14.0.6 ^blt@develop" + SPEC: " ~shared ~openmp +tests %clang@=14.0.6" extends: .job_on_poodle gcc_10_3_1_openmp_default: variables: - SPEC: " ~shared +tests %gcc@=10.3.1 ^blt@develop" + SPEC: " ~shared +tests %gcc@=10.3.1" extends: .job_on_poodle # OTHERS clang_14_0_6_gcc_10_3_1_desul_atomics: variables: - SPEC: " ~shared +openmp +tests +desul %clang@=14.0.6.gcc.10.3.1 ^blt@develop" + SPEC: " ~shared +openmp +tests +desul %clang@=14.0.6.gcc.10.3.1" extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 012a822b0d..dce5c43d10 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -46,16 +46,16 @@ intel_2023_2_1: clang_14_0_6_openmp_off: variables: - SPEC: " ~shared ~openmp +tests %clang@=14.0.6 ^blt@develop" + SPEC: " ~shared ~openmp +tests %clang@=14.0.6" extends: .job_on_ruby gcc_10_3_1_openmp_default: variables: - SPEC: " ~shared +tests %gcc@=10.3.1 ^blt@develop" + SPEC: " ~shared +tests %gcc@=10.3.1" extends: .job_on_ruby # OTHERS clang_14_0_6_gcc_10_3_1_desul_atomics: variables: - SPEC: " ~shared +openmp +tests +desul %clang@=14.0.6.gcc.10.3.1 ^blt@develop" + SPEC: " ~shared +openmp +tests +desul %clang@=14.0.6.gcc.10.3.1" extends: .job_on_ruby diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index b1e08cb469..110c8308bf 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -39,10 +39,10 @@ cce_17_0_1: rocmcc_6_2_0_hip_desul_atomics: variables: - SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" + SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0" extends: .job_on_tioga rocmcc_6_2_0_hip_openmp: variables: - SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" + SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0" extends: .job_on_tioga From 3c58b7ca2fbdc39d9234219a80d17535395a2f5f Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Fri, 6 Sep 2024 10:05:34 -0700 Subject: [PATCH 22/82] Add ValOp and update new reduction objects. --- include/RAJA/pattern/params/params_base.hpp | 102 ++++++++++++++++++ include/RAJA/pattern/params/reducer.hpp | 108 ++++++++------------ 2 files changed, 143 insertions(+), 67 deletions(-) diff --git a/include/RAJA/pattern/params/params_base.hpp b/include/RAJA/pattern/params/params_base.hpp index 51e96260f8..b01de9c7e7 100644 --- a/include/RAJA/pattern/params/params_base.hpp +++ b/include/RAJA/pattern/params/params_base.hpp @@ -6,6 +6,108 @@ namespace RAJA { namespace expt { + + template + struct ValLoc { + using index_type = IndexType; + using value_type = T; + + RAJA_HOST_DEVICE constexpr ValLoc() {} + RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) {} + + RAJA_HOST_DEVICE constexpr void min(value_type v, index_type l) { if (v < val) { val = v; loc = l; } } + + RAJA_HOST_DEVICE constexpr void max(value_type v, index_type l) { if (v > val) { val = v; loc = l; } } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const { return val < rhs.val; } + RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const { return val > rhs.val; } + + RAJA_HOST_DEVICE constexpr explicit operator T() const { return val; } + + RAJA_HOST_DEVICE constexpr value_type getVal() const {return val;} + RAJA_HOST_DEVICE constexpr index_type getLoc() const {return loc;} + + //private: + value_type val; + index_type loc = -1; + }; + + template class Op> + struct ValOp { + using value_type = T; + using op_type = Op; + + RAJA_HOST_DEVICE constexpr ValOp() {} + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & operator+=(const value_type& rhs) { val += rhs; return *this; } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & operator&=(const value_type& rhs) { val &= rhs; return *this; } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & operator|=(const value_type& rhs) { val |= rhs; return *this; } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE ValOp & operator&=(value_type& rhs) { val &= rhs; return *this; } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE ValOp & operator|=(value_type& rhs) { val |= rhs; return *this; } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { val < rhs.val; return *this; } + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { val > rhs.val; return *this; } + + RAJA_HOST_DEVICE constexpr value_type get() const {return val;} + + //private: + value_type val = op_type::identity(); + }; + + template class Op> + struct ValOp , Op> { + using index_type = IndexType; + using value_type = ValLoc; + using op_type = Op; + using valloc_value_type = typename value_type::value_type; + using valloc_index_type = typename value_type::index_type; + + RAJA_HOST_DEVICE constexpr ValOp() {} + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr explicit ValOp(valloc_value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) : val(v, l) {} + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & minloc(valloc_value_type v, valloc_index_type l) { return min(value_type(v,l)); } + + template >::value> * = nullptr> + RAJA_HOST_DEVICE constexpr ValOp & maxloc(valloc_value_type v, valloc_index_type l) { return max(value_type(v,l)); } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { return val < rhs.val; } + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { return val > rhs.val; } + + RAJA_HOST_DEVICE constexpr value_type get() const {return val;} + RAJA_HOST_DEVICE constexpr valloc_value_type getVal() const {return val.getVal();} + RAJA_HOST_DEVICE constexpr valloc_index_type getLoc() const {return val.getLoc();} + + //private: + value_type val = op_type::identity(); + }; + + template class Op> + using ValLocOp = ValOp, Op>; + namespace detail { diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 05103c7ad4..de1f58907a 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -15,46 +15,18 @@ namespace RAJA { -namespace expt -{ - -template -struct ValLoc { - using index_type = RAJA::Index_type; - using value_type = T; - - RAJA_HOST_DEVICE ValLoc() {} - RAJA_HOST_DEVICE ValLoc(value_type v) : val(v) {} - RAJA_HOST_DEVICE ValLoc(value_type v, RAJA::Index_type l) : val(v), loc(l) {} - - RAJA_HOST_DEVICE void min(value_type v, index_type l) { if (v < val) { val = v; loc = l; } } - RAJA_HOST_DEVICE void max(value_type v, index_type l) { if (v > val) { val = v; loc = l; } } - - bool constexpr operator<(const ValLoc& rhs) const { return val < rhs.val; } - bool constexpr operator>(const ValLoc& rhs) const { return val > rhs.val; } - - value_type getVal() {return val;} - RAJA::Index_type getLoc() {return loc;} - -private: - value_type val; - index_type loc = -1; -}; - -} // namespace expt - namespace operators { -template -struct limits> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc min() +template +struct limits> { + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc min() { - return RAJA::expt::ValLoc(RAJA::operators::limits::min()); + return RAJA::expt::ValLoc(RAJA::operators::limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc max() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc max() { - return RAJA::expt::ValLoc(RAJA::operators::limits::max()); + return RAJA::expt::ValLoc(RAJA::operators::limits::max()); } }; @@ -83,16 +55,31 @@ namespace detail // Basic Reducer // // - template + template