-
Notifications
You must be signed in to change notification settings - Fork 229
[Manual] Devito on HX1 (A100 GPUs)
George Bisbas edited this page Sep 11, 2024
·
11 revisions
You have been granted access to the Imperial College Research Computing Service systems.
If you have been granted access to the HPC service, please take the time to read our Getting Started guide which is hosted on our ICL RCS readthedocs site.
Additional advice on using the HPC service may be found from the main HPC user guide.
If you have been granted access to a Research Data Store project space, please take the time to read our Research Data Store guide.
https://icl-rcs-user-guide.readthedocs.io/en/latest/hpc/pilot/hx1/
# After completing the registration
# Do `ssh` to your login node (password only, no keys are used)
ssh -oPubkeyAuthentication=no [email protected]
# To quickly see the available versions of any software do not forget that you can do:
module avail -t 2>&1 | grep -i <keyword>
# e.g.
module avail -t 2>&1 | grep -i nvidia
# https://icl-rcs-user-guide.readthedocs.io/en/latest/hpc/applications/easybuild/
# First, load the production tools
module load tools/prod
# Load Python, create virtual env and activate it
module load Python/3.11.3-GCCcore-12.3.0
# ...create activate and then...
python3 -m pip install -e .
# Requesting an interactive job
qsub -I -l walltime=01:30:00 -l select=1:ncpus=64:mem=200gb:ngpus=1:gpu_type=A100
# See the available GPUs
nvidia-smi --query-gpu=gpu_name --format=csv
# name
# NVIDIA A100-SXM4-80GB
module load tools/eb-dev
module load NVHPC/23.7-CUDA-12.2.0
# DROP (but check in a new env if we can compile mpi4py with its own mpi)
# module load OpenMPI/4.1.5-GCC-12.3.0
module load Python/3.11.5-GCCcore-13.2.0
export PATH=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/comm_libs/mpi/bin:$PATH
DEVITO_LANGUAGE=openacc DEVITO_LOGGING=DEBUG DEVITO_PLATFORM=nvidiaX DEVITO_COMPILER=nvcc python examples/seismic/acoustic/acoustic_example.py -d 256 256 256 --tn 256
# ... 0.29 secs, 17.14 Gpts/s
#!/bin/bash
#PBS -l walltime=00:40:00
#PBS -l select=1:ncpus=8:mpiprocs=2:mem=200gb:ngpus=2:gpu_type=A100
cd $PBS_O_WORKDIR
cat $PBS_NODEFILE
module load NVHPC/23.7-CUDA-12.2.0
module load Python/3.11.5-GCCcore-13.2.0
export PATH=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/comm_libs/mpi/bin:$PATH
export HPCSDK_HOME=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/
# export PATH=/gpfs/easybuild/prod/software/NVHPC/23.7-CUDA-12.2.0/Linux_x86_64/23.7/comm_libs/hpcx/bin:$PATH
module load OpenMPI/4.1.4-NVHPC-22.7-CUDA-11.7.0
cd devito
export DEVITO_MPI=1
export DEVITO_LANGUAGE=openacc
export DEVITO_LOGGING=DEBUG
export DEVITO_PROFILING=advanced2
export DEVITO_PLATFORM=nvidiaX
export DEVITO_COMPILER=nvc
# mpirun -n 4 --map-by ppr:2:node -hostfile $PBS_NODEFILE --report-bindings python examples/seismic/acoustic/acoustic_example.py -d 1024 1024 1024 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/acoustic/acoustic_example.py -d 1024 1024 1024 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/elastic/elastic_example.py -d 768 768 768 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/tti/tti_example.py -d 768 768 768 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/viscoelastic/viscoelastic_example.py -d 768 768 768 --tn 1024 -so 8
mpirun -n 2 python examples/seismic/acoustic/acoustic_example.py -d 1024 1024 1024 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/elastic/elastic_example.py -d 768 768 768 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/tti/tti_example.py -d 768 768 768 --tn 1024 -so 12
mpirun -n 2 python examples/seismic/viscoelastic/viscoelastic_example.py -d 768 768 768 --tn 1024 -so 12
watch -n 10 'qstat -T -u $(whoami)'
watch -n 10 'qstat | grep v1_a100'
watch -n 0.1 'nvidia-smi'
# 1x1
qsub -I -l walltime=01:30:00 -l select=1:ncpus=8:mem=200gb:mpiprocs=1:ngpus=1:gpu_type=A100
# 1x2
qsub -I -l walltime=01:30:00 -l select=1:ncpus=8:mem=200gb:mpiprocs=2:ngpus=2:gpu_type=A100
# 2x1
qsub -I -l walltime=02:30:00 -l select=2:ncpus=8:mem=200gb:mpiprocs=1:ngpus=1:gpu_type=A100 -l place=scatter
mpirun -n 4 --map-by ppr:4:node --report-bindings
mpirun -n 4 --map-by ppr:2:node -hostfile $PBS_NODEFILE --report-bindings python examples/seismic/elastic/elastic_example.py -d 1024 1024 1024 --tn 1024 -so 8