Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use mixin class for QuantumESPRESSO #212

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
import math
import shlex
import warnings

import reframe as rfm
import reframe.core.logging as rflog
Expand Down Expand Up @@ -435,7 +434,7 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str):
warn_msg = f"valid_systems has multiple ({len(test.valid_systems)}) items,"
warn_msg += " which is not supported by this hook."
warn_msg += " Make sure to handle filtering yourself."
warnings.warn(warn_msg)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to double check: these changes were not accidental, right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind too much, but you could set bench_name=None in the class body. Then, with the current logic of setting self.banch_name=self.bench_name_ci in that one particular case in the init hook, that should work, right? I.e. it's essentially only one extra line in the test (a default bench_name).

Copy link
Collaborator Author

@smoors smoors Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're right, i re-added the bench_name check in the mixin class.
i've now also set bench_name_ci only for the CI test to avoid that bench_name always has to be set to something.

Copy link
Collaborator Author

@smoors smoors Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and yes, those warning changes are again not accidental :)
they're done to make sure we're now always using reframe warnings, and also to avoid creating logger object in a separate step.

rflog.getlogger().warning(warn_msg)
return


Expand Down Expand Up @@ -529,7 +528,6 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
# and return from this hook (as setting test.extra_resources will be ignored in that case according to
# https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#reframe.core.pipeline.RegressionTest.extra_resources
if 'memory' not in test.current_partition.resources:
logger = rflog.getlogger()
msg = "Your ReFrame configuration file does not specify any resource called 'memory' for this partition "
msg += f" ({test.current_partition.name})."
msg += " Without this, an explicit memory request cannot be made from the scheduler. This test will run,"
Expand All @@ -538,7 +536,7 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
msg += " 'memory' in your ReFrame configuration file for this partition."
msg += " For a SLURM system, one would e.g. define:"
msg += " 'resources': [{'name': 'memory', 'options': ['--mem={size}']}]"
logger.warning(msg)
rflog.getlogger().warning(msg)
# We return, as setting a test.extra_resources is pointless - it would be ignored anyway
# This way, we also don't add any lines to the log that a specific amount of memory was requested
return
Expand Down Expand Up @@ -580,14 +578,13 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float):
log(f"Requested {req_mem_per_task} MiB per task from the torque batch scheduler")

else:
logger = rflog.getlogger()
msg = "hooks.req_memory_per_node does not support the scheduler you configured"
msg += f" ({test.current_partition.scheduler.registered_name})."
msg += " The test will run, but since it doesn't request the required amount of memory explicitely,"
msg += " it may result in an out-of-memory error."
msg += " Please expand the functionality of hooks.req_memory_per_node for your scheduler."
# Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command
logger.warning(msg)
rflog.getlogger().warning(msg)


def set_modules(test: rfm.RegressionTest):
Expand Down Expand Up @@ -671,14 +668,13 @@ def set_compact_process_binding(test: rfm.RegressionTest):
log(f'Set environment variable SLURM_DISTRIBUTION to {test.env_vars["SLURM_DISTRIBUTION"]}')
log(f'Set environment variable SLURM_CPU_BIND to {test.env_vars["SLURM_CPU_BIND"]}')
else:
logger = rflog.getlogger()
msg = "hooks.set_compact_process_binding does not support the current launcher"
msg += f" ({test.current_partition.launcher_type().registered_name})."
msg += " The test will run, but using the default binding strategy of your parallel launcher."
msg += " This may lead to suboptimal performance."
msg += " Please expand the functionality of hooks.set_compact_process_binding for your parallel launcher."
# Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command
logger.warning(msg)
rflog.getlogger().warning(msg)


def set_compact_thread_binding(test: rfm.RegressionTest):
Expand Down
77 changes: 20 additions & 57 deletions eessi/testsuite/tests/apps/QuantumESPRESSO.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,52 +30,30 @@

import reframe as rfm
from hpctestlib.sciapps.qespresso.benchmarks import QEspressoPWCheck
from reframe.core.builtins import ( # added only to make the linter happy
parameter, run_after)
from reframe.core.builtins import parameter, run_after

from eessi.testsuite import hooks
from eessi.testsuite.constants import (COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU,
SCALES, TAGS)
from eessi.testsuite.utils import find_modules, log
from eessi.testsuite.constants import COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU
from eessi.testsuite.eessi_mixin import EESSI_Mixin
from eessi.testsuite.utils import find_modules


@rfm.simple_test
class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck):
scale = parameter(SCALES.keys())
valid_prog_environs = ['default']
valid_systems = ['*']
class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin):
time_limit = '30m'
module_name = parameter(find_modules('QuantumESPRESSO'))
# For now, QE is being build for CPU targets only
# compute_device = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]])
compute_device = parameter([DEVICE_TYPES[CPU], ])
# For now, QE is built for CPU targets only
device_type = parameter([DEVICE_TYPES[CPU]])

@run_after('init')
def run_after_init(self):
"""Hooks to run after the init phase"""

# Filter on which scales are supported by the partitions defined in the ReFrame configuration
hooks.filter_supported_scales(self)

# Make sure that GPU tests run in partitions that support running on a GPU,
# and that CPU-only tests run in partitions that support running CPU-only.
# Also support setting valid_systems on the cmd line.
hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device)

# Support selecting modules on the cmd line.
hooks.set_modules(self)

# Support selecting scales on the cmd line via tags.
hooks.set_tag_scale(self)
def required_mem_per_node(self):
return (self.num_tasks_per_node * 0.9 + 4) * 1024

@run_after('init')
def set_tag_ci(self):
def set_ci(self):
"""Set tag CI on smallest benchmark, so it can be selected on the cmd line via --tag CI"""
min_ecut = min(QEspressoPWCheck.ecut.values)
min_nbnd = min(QEspressoPWCheck.nbnd.values)
if self.ecut == min_ecut and self.nbnd == min_nbnd:
self.tags.add(TAGS['CI'])
log(f'tags set to {self.tags}')
self.bench_name = self.bench_name_ci = 'bench_ci'

@run_after('init')
def set_increased_walltime(self):
Expand All @@ -85,29 +63,14 @@ def set_increased_walltime(self):
if self.ecut == max_ecut and self.nbnd == max_nbnd:
self.time_limit = '60m'

@run_after('setup')
def run_after_setup(self):
"""Hooks to run after the setup phase"""

# Calculate default requested resources based on the scale:
# 1 task per CPU for CPU-only tests, 1 task per GPU for GPU tests.
# Also support setting the resources on the cmd line.
if self.compute_device == DEVICE_TYPES[GPU]:
hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU])
else:
hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[CPU])

@run_after('setup')
def request_mem(self):
memory_required = self.num_tasks_per_node * 0.9 + 4
hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024)

@run_after('setup')
def set_omp_num_threads(self):
@run_after('init')
def set_compute_unit(self):
"""
Set number of OpenMP threads via OMP_NUM_THREADS.
Set default number of OpenMP threads equal to number of CPUs per task.
Set the compute unit to which tasks will be assigned:
one task per CPU core for CPU runs, and one task per GPU for GPU runs.
"""

self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task
log(f'env_vars set to {self.env_vars}')
device_to_compute_unit = {
DEVICE_TYPES[CPU]: COMPUTE_UNIT[CPU],
DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU],
}
self.compute_unit = device_to_compute_unit.get(self.device_type)
Loading