Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Perlmutter CPU and GPU #204

Merged
merged 14 commits into from
Oct 26, 2024
567 changes: 175 additions & 392 deletions mache/cime_machine_config/config_machines.xml

Large diffs are not rendered by default.

70 changes: 34 additions & 36 deletions mache/spack/pm-cpu_gnu_mpich.csh
Original file line number Diff line number Diff line change
@@ -1,41 +1,38 @@
module rm cray-hdf5-parallel &> /dev/null
module rm cray-netcdf-hdf5parallel &> /dev/null
module rm cray-parallel-netcdf &> /dev/null
module rm PrgEnv-gnu &> /dev/null
module rm PrgEnv-intel &> /dev/null
module rm PrgEnv-nvidia &> /dev/null
module rm PrgEnv-cray &> /dev/null
module rm PrgEnv-aocc &> /dev/null
module rm gcc-native &> /dev/null
module rm intel &> /dev/null
module rm intel-oneapi &> /dev/null
module rm cudatoolkit &> /dev/null
module rm climate-utils &> /dev/null
module rm cray-libsci &> /dev/null
module rm matlab &> /dev/null
module rm craype-accel-nvidia80 &> /dev/null
module rm craype-accel-host &> /dev/null
module rm perftools-base &> /dev/null
module rm perftools &> /dev/null
module rm darshan &> /dev/null
module rm cpe \
cray-hdf5-parallel \
cray-netcdf-hdf5parallel \
cray-parallel-netcdf \
PrgEnv-gnu \
PrgEnv-intel \
PrgEnv-nvidia \
PrgEnv-cray \
PrgEnv-aocc \
gcc-native \
intel \
intel-oneapi \
cudatoolkit \
climate-utils \
cray-libsci \
matlab \
craype-accel-nvidia80 \
craype-accel-host \
perftools-base \
perftools \
darshan \
cray-mpich &> /dev/null

module load PrgEnv-gnu/8.5.0
module load gcc/12.2.0
module load craype-accel-host
{% if e3sm_lapack %}
module load cray-libsci/23.02.1.1
{% endif %}
module load craype/2.7.20
module rm cray-mpich &> /dev/null
module load libfabric/1.15.2.0
module load cray-mpich/8.1.25
module load PrgEnv-gnu/8.5.0 \
gcc-native/12.3 \
cray-libsci/23.12.5 \
craype-accel-host \
craype/2.7.30 \
libfabric/1.20.1 \
cray-mpich/8.1.28 \
cmake/3.24.3
{% if e3sm_hdf5_netcdf %}
module rm cray-hdf5-parallel &> /dev/null
module rm cray-netcdf-hdf5parallel &> /dev/null
module rm cray-parallel-netcdf &> /dev/null
module load cray-hdf5-parallel/1.12.2.3
module load cray-netcdf-hdf5parallel/4.9.0.3
module load cray-parallel-netcdf/1.12.3.3
module load cray-hdf5-parallel/1.12.2.9 \
cray-netcdf-hdf5parallel/4.9.0.9 \
cray-parallel-netcdf/1.12.3.9
{% endif %}

{% if e3sm_hdf5_netcdf %}
Expand All @@ -55,6 +52,7 @@ setenv HDF5_USE_FILE_LOCKING FALSE
## Not needed
# setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch
setenv FI_CXI_RX_MATCH_MODE software
setenv FI_MR_CACHE_MONITOR kdreg2
setenv MPICH_COLL_SYNC MPI_Bcast
setenv GATOR_INITIAL_MB 4000MB
setenv BLA_VENDOR Generic
70 changes: 34 additions & 36 deletions mache/spack/pm-cpu_gnu_mpich.sh
Original file line number Diff line number Diff line change
@@ -1,41 +1,38 @@
module rm cray-hdf5-parallel &> /dev/null
module rm cray-netcdf-hdf5parallel &> /dev/null
module rm cray-parallel-netcdf &> /dev/null
module rm PrgEnv-gnu &> /dev/null
module rm PrgEnv-intel &> /dev/null
module rm PrgEnv-nvidia &> /dev/null
module rm PrgEnv-cray &> /dev/null
module rm PrgEnv-aocc &> /dev/null
module rm gcc-native &> /dev/null
module rm intel &> /dev/null
module rm intel-oneapi &> /dev/null
module rm cudatoolkit &> /dev/null
module rm climate-utils &> /dev/null
module rm cray-libsci &> /dev/null
module rm matlab &> /dev/null
module rm craype-accel-nvidia80 &> /dev/null
module rm craype-accel-host &> /dev/null
module rm perftools-base &> /dev/null
module rm perftools &> /dev/null
module rm darshan &> /dev/null
module rm cpe \
cray-hdf5-parallel \
cray-netcdf-hdf5parallel \
cray-parallel-netcdf \
PrgEnv-gnu \
PrgEnv-intel \
PrgEnv-nvidia \
PrgEnv-cray \
PrgEnv-aocc \
gcc-native \
intel \
intel-oneapi \
cudatoolkit \
climate-utils \
cray-libsci \
matlab \
craype-accel-nvidia80 \
craype-accel-host \
perftools-base \
perftools \
darshan \
cray-mpich &> /dev/null

module load PrgEnv-gnu/8.5.0
module load gcc/12.2.0
module load craype-accel-host
{% if e3sm_lapack %}
module load cray-libsci/23.02.1.1
{% endif %}
module load craype/2.7.20
module rm cray-mpich &> /dev/null
module load libfabric/1.15.2.0
module load cray-mpich/8.1.25
module load PrgEnv-gnu/8.5.0 \
gcc-native/12.3 \
cray-libsci/23.12.5 \
craype-accel-host \
craype/2.7.30 \
libfabric/1.20.1 \
cray-mpich/8.1.28 \
cmake/3.24.3
{% if e3sm_hdf5_netcdf %}
module rm cray-hdf5-parallel &> /dev/null
module rm cray-netcdf-hdf5parallel &> /dev/null
module rm cray-parallel-netcdf &> /dev/null
module load cray-hdf5-parallel/1.12.2.3
module load cray-netcdf-hdf5parallel/4.9.0.3
module load cray-parallel-netcdf/1.12.3.3
module load cray-hdf5-parallel/1.12.2.9 \
cray-netcdf-hdf5parallel/4.9.0.9 \
cray-parallel-netcdf/1.12.3.9
{% endif %}

{% if e3sm_hdf5_netcdf %}
Expand All @@ -55,6 +52,7 @@ export HDF5_USE_FILE_LOCKING=FALSE
## Not needed
# export PERL5LIB=/global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch
export FI_CXI_RX_MATCH_MODE=software
export FI_MR_CACHE_MONITOR=kdreg2

if [ -z "${NERSC_HOST:-}" ]; then
# happens when building spack environment
Expand Down
63 changes: 25 additions & 38 deletions mache/spack/pm-cpu_gnu_mpich.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ spack:
specs:
- gcc
- cray-mpich
{% if e3sm_lapack %}
- cray-libsci
{% endif %}
{% if e3sm_hdf5_netcdf %}
- hdf5
- netcdf-c
Expand All @@ -16,12 +14,10 @@ spack:
unify: when_possible
packages:
all:
compiler: [gcc@12.2.0]
compiler: [gcc@12.3]
providers:
mpi: [[email protected]]
{% if e3sm_lapack %}
lapack: [[email protected]]
{% endif %}
mpi: [[email protected]]
lapack: [[email protected]]
bzip2:
externals:
- spec: [email protected]
Expand Down Expand Up @@ -101,65 +97,55 @@ spack:
buildable: false
gcc:
externals:
- spec: gcc@12.2.0
- spec: gcc@12.3
modules:
- PrgEnv-gnu/8.5.0
- gcc/12.2.0
- gcc-native/12.3
- cray-libsci/23.12.5
- craype-accel-host
- craype/2.7.20
- libfabric/1.15.2.0
- craype/2.7.30
- libfabric/1.20.1
buildable: false
cray-mpich:
externals:
- spec: [email protected]
prefix: /opt/cray/pe/mpich/8.1.25/ofi/gnu/9.1
modules:
- libfabric/1.15.2.0
- cray-mpich/8.1.25
buildable: false
libfabric:
externals:
- spec: [email protected]
prefix: /opt/cray/libfabric/1.15.2.0
- spec: [email protected]
modules:
- libfabric/1.15.2.0
- libfabric/1.20.1
- cray-mpich/8.1.28
buildable: false
{% if e3sm_lapack %}
cray-libsci:
externals:
- spec: [email protected]
prefix: /opt/cray/pe/libsci/23.02.1.1/GNU/9.1/x86_64
- spec: [email protected]
modules:
- cray-libsci/23.02.1.1
- cray-libsci/23.12.5
buildable: false
{% endif %}
{% if e3sm_hdf5_netcdf %}
hdf5:
externals:
- spec: [email protected].3~cxx+fortran+hl~java+mpi+shared
prefix: /opt/cray/pe/hdf5-parallel/1.12.2.3/GNU/9.1
- spec: [email protected].9~cxx+fortran+hl~java+mpi+shared
prefix: /opt/cray/pe/hdf5-parallel/1.12.2.9/gnu/12.3
buildable: false
parallel-netcdf:
externals:
- spec: [email protected].3+cxx+fortran+pic+shared
prefix: /opt/cray/pe/parallel-netcdf/1.12.3.3/GNU/9.1/
- spec: [email protected].9+cxx+fortran+pic+shared
prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/gnu/12.3
buildable: false
netcdf-c:
externals:
- spec: [email protected].3+mpi~parallel-netcdf
prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/GNU/9.1
- spec: [email protected].9+mpi~parallel-netcdf
prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/gnu/12.3
buildable: false
netcdf-fortran:
externals:
- spec: [email protected]
prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/GNU/9.1
prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/gnu/12.3
buildable: false
{% endif %}
config:
install_missing_compilers: false
compilers:
- compiler:
spec: gcc@12.2.0
spec: gcc@12.3
paths:
cc: cc
cxx: CC
Expand All @@ -170,10 +156,11 @@ spack:
target: x86_64
modules:
- PrgEnv-gnu/8.5.0
- gcc/12.2.0
- gcc-native/12.3
- cray-libsci/23.12.5
- craype-accel-host
- craype/2.7.20
- libfabric/1.15.2.0
- craype/2.7.30
- libfabric/1.20.1
environment:
prepend_path:
PKG_CONFIG_PATH: "/opt/cray/xpmem/2.6.2-2.5_2.33__gd067c3f.shasta/lib64/pkgconfig"
65 changes: 32 additions & 33 deletions mache/spack/pm-cpu_intel_mpich.csh
Original file line number Diff line number Diff line change
@@ -1,38 +1,36 @@
module rm cray-hdf5-parallel &> /dev/null
module rm cray-netcdf-hdf5parallel &> /dev/null
module rm cray-parallel-netcdf &> /dev/null
module rm PrgEnv-gnu &> /dev/null
module rm PrgEnv-intel &> /dev/null
module rm PrgEnv-nvidia &> /dev/null
module rm PrgEnv-cray &> /dev/null
module rm PrgEnv-aocc &> /dev/null
module rm gcc-native &> /dev/null
module rm intel &> /dev/null
module rm intel-oneapi &> /dev/null
module rm cudatoolkit &> /dev/null
module rm climate-utils &> /dev/null
module rm cray-libsci &> /dev/null
module rm matlab &> /dev/null
module rm craype-accel-nvidia80 &> /dev/null
module rm craype-accel-host &> /dev/null
module rm perftools-base &> /dev/null
module rm perftools &> /dev/null
module rm darshan &> /dev/null
module rm cpe \
cray-hdf5-parallel \
cray-netcdf-hdf5parallel \
cray-parallel-netcdf \
PrgEnv-gnu \
PrgEnv-intel \
PrgEnv-nvidia \
PrgEnv-cray \
PrgEnv-aocc \
gcc-native \
intel \
intel-oneapi \
cudatoolkit \
climate-utils \
cray-libsci \
matlab \
craype-accel-nvidia80 \
craype-accel-host \
perftools-base \
perftools \
darshan \
cray-mpich &> /dev/null

module load PrgEnv-intel/8.5.0
module load intel/2023.2.0
module load craype-accel-host
module load craype/2.7.30
module load libfabric/1.15.2.0
module rm cray-mpich &> /dev/null
module load cray-mpich/8.1.28
module load PrgEnv-intel/8.5.0 \
intel/2023.2.0 \
craype-accel-host \
craype/2.7.30 \
libfabric/1.20.1 \
cray-mpich/8.1.28
{% if e3sm_hdf5_netcdf %}
module rm cray-hdf5-parallel &> /dev/null
module rm cray-netcdf-hdf5parallel &> /dev/null
module rm cray-parallel-netcdf &> /dev/null
module load cray-hdf5-parallel/1.12.2.9
module load cray-netcdf-hdf5parallel/4.9.0.9
module load cray-parallel-netcdf/1.12.3.9
module load cray-hdf5-parallel/1.12.2.9 \
cray-netcdf-hdf5parallel/4.9.0.9 \
cray-parallel-netcdf/1.12.3.9
{% endif %}

{% if e3sm_hdf5_netcdf %}
Expand All @@ -52,6 +50,7 @@ setenv HDF5_USE_FILE_LOCKING FALSE
## Not needed
# setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch
setenv FI_CXI_RX_MATCH_MODE software
setenv FI_MR_CACHE_MONITOR kdreg2
setenv MPICH_COLL_SYNC MPI_Bcast
setenv GATOR_INITIAL_MB 4000MB
setenv BLA_VENDOR Intel10_64_dyn
Loading
Loading