Skip to content

Commit

Permalink
Add gpus-per-node to job scripts and resources
Browse files Browse the repository at this point in the history
  • Loading branch information
xylar committed Jul 12, 2024
1 parent 2c61650 commit 5fb5ac1
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 5 deletions.
14 changes: 11 additions & 3 deletions polaris/job/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def write_job_script(config, machine, target_cores, min_cores, work_dir,
cores = np.sqrt(target_cores * min_cores)
nodes = int(np.ceil(cores / cores_per_node))

partition, qos, constraint, wall_time = get_slurm_options(
partition, qos, constraint, gpus_per_node, wall_time = get_slurm_options(
config, machine, nodes)

job_name = config.get('job', 'job_name')
Expand All @@ -58,7 +58,7 @@ def write_job_script(config, machine, target_cores, min_cores, work_dir,
text = template.render(job_name=job_name, account=account,
nodes=f'{nodes}', wall_time=wall_time, qos=qos,
partition=partition, constraint=constraint,
suite=suite)
gpus_per_node=gpus_per_node, suite=suite)
text = clean_up_whitespace(text)
if suite == '':
script_filename = 'job_script.sh'
Expand Down Expand Up @@ -95,6 +95,9 @@ def get_slurm_options(config, machine, nodes):
constraint : str
Slurm constraint
gpus_per_node : str
The numer of GPUs per node (if any)
wall_time : str
Slurm wall time
"""
Expand Down Expand Up @@ -131,9 +134,14 @@ def get_slurm_options(config, machine, nodes):
else:
constraint = ''

if config.has_option('parallel', 'gpus_per_node'):
gpus_per_node = config.get('parallel', 'gpus_per_node')
else:
gpus_per_node = ''

wall_time = config.get('job', 'wall_time')

return partition, qos, constraint, wall_time
return partition, qos, constraint, gpus_per_node, wall_time


def clean_up_whitespace(text):
Expand Down
3 changes: 3 additions & 0 deletions polaris/job/job_script.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
{% if constraint != '' -%}
#SBATCH --constraint={{ constraint }}
{%- endif %}
{% if gpus_per_node != '' -%}
#SBATCH --gpus-per-node={{ gpus_per_node }}
{%- endif %}

source load_polaris_env.sh
polaris serial {{suite}}
5 changes: 5 additions & 0 deletions polaris/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ def get_available_parallel_resources(config):
cores_per_node=cores_per_node,
mpi_allowed=mpi_allowed
)

if config.has_option('parallel', 'gpus_per_node'):
available_resources['gpus_per_node'] = \
config.getint('parallel', 'gpus_per_node')

return available_resources


Expand Down
3 changes: 3 additions & 0 deletions utils/omega/ctest/job_script.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
{% if constraint != '' -%}
#SBATCH --constraint={{ constraint }}
{%- endif %}
{% if gpus_per_node != '' -%}
#SBATCH --gpus-per-node={{ gpus_per_node }}
{%- endif %}

cd {{ build_dir }}
./omega_ctest.sh
4 changes: 2 additions & 2 deletions utils/omega/ctest/omega_ctest.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def write_job_script(config, machine, compiler, submit):

nodes = 1

partition, qos, constraint, _ = get_slurm_options(
partition, qos, constraint, gpus_per_node, _ = get_slurm_options(
config, machine, nodes)

wall_time = '0:15:00'
Expand Down Expand Up @@ -156,7 +156,7 @@ def write_job_script(config, machine, compiler, submit):
script = template.render(job_name=job_name, account=account,
nodes=f'{nodes}', wall_time=wall_time, qos=qos,
partition=partition, constraint=constraint,
build_dir=build_dir)
gpus_per_node=gpus_per_node, build_dir=build_dir)
script = clean_up_whitespace(script)

build_omega_dir = os.path.abspath('build_omega')
Expand Down

0 comments on commit 5fb5ac1

Please sign in to comment.