diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..8d974363 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include mache/cime_machine_config/*.xml +include mache/machines/*.cfg diff --git a/conda/meta.yaml b/conda/meta.yaml new file mode 100644 index 00000000..de718a41 --- /dev/null +++ b/conda/meta.yaml @@ -0,0 +1,41 @@ +{% set name = "mache" %} +{% set version = "1.0.0" %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + path: .. + +build: + number: 0 + script: {{ PYTHON }} -m pip install . --no-deps -vv + noarch: python + +requirements: + host: + - python >=3.6 + - pip + run: + - python >=3.6 + - lxml + +test: + + imports: + - mache + + +about: + home: https://github.com/E3SM-Project/mache + license: BSD-3-Clause + license_family: BSD + license_file: LICENSE + summary: A package for providing configuration data relate to E3SM supported machines + doc_url: https://github.com/E3SM-Project/mache/README.rst + dev_url: https://github.com/E3SM-Project/mache + +extra: + recipe-maintainers: + - xylar diff --git a/conda/run_test.py b/conda/run_test.py new file mode 100755 index 00000000..ee1843c0 --- /dev/null +++ b/conda/run_test.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +from mache import MachineInfo, discover_machine + +machine = discover_machine() + +machinfo = MachineInfo(machine='anvil') +print(machinfo) + +machinfo = MachineInfo(machine='unknown') +print(machinfo) + diff --git a/mache/__init__.py b/mache/__init__.py index 7e0e3db1..2db18df6 100644 --- a/mache/__init__.py +++ b/mache/__init__.py @@ -1,4 +1,5 @@ from mache.machine_info import MachineInfo +from mache.discover import discover_machine __version_info__ = (1, 0, 0) __version__ = '.'.join(str(vi) for vi in __version_info__) diff --git a/mache/discover.py b/mache/discover.py new file mode 100644 index 00000000..e6faad36 --- /dev/null +++ b/mache/discover.py @@ -0,0 +1,37 @@ +import socket +import warnings + + +def discover_machine(): + """ + Figure out the machine from the host name + + Returns + ------- + machine : str + The name of the current machine + """ + hostname = socket.gethostname() + if hostname.startswith('acme1'): + machine = 'acme1' + elif hostname.startswith('andes'): + machine = 'andes' + elif hostname.startswith('blueslogin'): + machine = 'anvil' + elif hostname.startswith('ba-fe'): + machine = 'badger' + elif hostname.startswith('chrlogin'): + machine = 'chrysalis' + elif hostname.startswith('compy'): + machine = 'compy' + elif hostname.startswith('cooley'): + machine = 'cooley' + elif hostname.startswith('cori'): + warnings.warn('defaulting to cori-haswell. Use -m cori-knl if you' + ' wish to run on KNL.') + machine = 'cori-haswell' + elif hostname.startswith('gr-fe'): + machine = 'grizzly' + else: + machine = None + return machine diff --git a/mache/machine_info.py b/mache/machine_info.py index f663e154..bce88b8c 100644 --- a/mache/machine_info.py +++ b/mache/machine_info.py @@ -1,10 +1,10 @@ -import socket -import warnings from lxml import etree from importlib.resources import path import configparser import os +from mache.discover import discover_machine + class MachineInfo: """ @@ -57,9 +57,11 @@ def __init__(self, machine=None): The name of an E3SM supported machine. By default, the machine will be inferred from the host name """ + if machine is None: + machine = discover_machine() + if machine is None: + raise ValueError('Unable to discover machine form host name') self.machine = machine - if self.machine is None: - self._discover_machine() self.config = self._get_config() @@ -88,7 +90,7 @@ def __str__(self): """ info = f'Machine: {self.machine}\n' \ - f'E3SM Supported Machine? {self.e3sm_supported}' + f' E3SM Supported Machine: {self.e3sm_supported}' if self.e3sm_supported: info = f'{info}\n' \ @@ -96,6 +98,8 @@ def __str__(self): f' MPI libraries: {", ".join(self.mpilibs)}\n' \ f' OS: {self.os}' + info = f'{info}\n' + print_unified = (self.e3sm_unified_activation is not None or self.e3sm_unified_base is not None or self.e3sm_unified_mpi is not None) @@ -115,6 +119,7 @@ def __str__(self): if self.e3sm_unified_mpi is not None: info = f'{info}\n' \ f' MPI type: {self.e3sm_unified_mpi}' + info = f'{info}\n' print_diags = self.diagnostics_base is not None if print_diags: @@ -124,7 +129,17 @@ def __str__(self): if self.diagnostics_base is not None: info = f'{info}\n' \ f' Base path: {self.diagnostics_base}' + info = f'{info}\n' + info = f'{info}\n' \ + f'Config options:' + for section in self.config.sections(): + info = f'{info}\n' \ + f' [{section}]' + for key, value in self.config.items(section): + info = f'{info}\n' \ + f' {key} = {value}' + info = f'{info}\n' return info def get_modules_and_mpi_compilers(self, compiler, mpilib): @@ -253,34 +268,45 @@ def get_modules_and_mpi_compilers(self, compiler, mpilib): return mpicc, mpicxx, mpifc, mod_commands - def _discover_machine(self): - """ Figure out the machine from the host name """ - if self.machine is not None: - return - hostname = socket.gethostname() - if hostname.startswith('acme1'): - machine = 'acme1' - elif hostname.startswith('andes'): - machine = 'andes' - elif hostname.startswith('blueslogin'): - machine = 'anvil' - elif hostname.startswith('ba-fe'): - machine = 'badger' - elif hostname.startswith('chrlogin'): - machine = 'chrysalis' - elif hostname.startswith('compy'): - machine = 'compy' - elif hostname.startswith('cooley'): - machine = 'cooley' - elif hostname.startswith('cori'): - warnings.warn('defaulting to cori-haswell. Use -m cori-knl if you' - ' wish to run on KNL.') - machine = 'cori-haswell' - elif hostname.startswith('gr-fe'): - machine = 'grizzly' + def get_account_defaults(self): + """ + Get default account, partition and quality of service (QOS) for + this machine. + + Returns + ------- + account : str + The E3SM account on the machine + + partition : str + The default partition on the machine, or ``None`` if no partition + should be specified + + qos : str + The default quality of service on the machine, or ``None`` if no + QOS should be specified + """ + config = self.config + if config.has_option('parallel', 'account'): + account = config.get('parallel', 'account') else: - raise ValueError('Unable to discover machine form host name') - self.machine = machine + account = None + + if config.has_option('parallel', 'partitions'): + partition = config.get('parallel', 'partitions') + # take the first entry + partition = partition.split(',')[0].strip() + else: + partition = None + + if config.has_option('parallel', 'qos'): + qos = config.get('parallel', 'qos') + # take the first entry + qos = qos.split(',')[0].strip() + else: + qos = None + + return account, partition, qos def _get_config(self): """ get a parser for config options """ @@ -310,11 +336,13 @@ def _parse_compilers_and_mpi(self): machines = next(root.iter('config_machines')) mach = None + found = False for mach in machines: if mach.tag == 'machine' and mach.attrib['MACH'] == machine: + found = True break - if mach is None: + if not found: # this is not an E3SM supported machine, so we're done self.e3sm_supported = False return diff --git a/mache/machines/acme1.cfg b/mache/machines/acme1.cfg index a1c4b28a..7107f80b 100644 --- a/mache/machines/acme1.cfg +++ b/mache/machines/acme1.cfg @@ -16,3 +16,16 @@ base_path = /usr/local/e3sm_unified/envs # The base path to the diagnostics directory base_path = /space2/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = single_node + +# whether to use mpirun or srun to run a task +parallel_executable = mpirun + +# cores per node on the machine +cores_per_node = 192 diff --git a/mache/machines/andes.cfg b/mache/machines/andes.cfg index 266423cf..e0196c10 100644 --- a/mache/machines/andes.cfg +++ b/mache/machines/andes.cfg @@ -16,3 +16,22 @@ base_path = /ccs/proj/cli900/sw/rhea/e3sm-unified # The base path to the diagnostics directory base_path = /gpfs/alpine/proj-shared/cli115/diagnostics/ + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 32 + +# account for running diagnostics jobs +account = cli115 + +# available partition(s) (default is the first) +partitions = batch diff --git a/mache/machines/anvil.cfg b/mache/machines/anvil.cfg index 2510bc46..767ab680 100644 --- a/mache/machines/anvil.cfg +++ b/mache/machines/anvil.cfg @@ -21,4 +21,26 @@ base_path = /lcrc/soft/climate/e3sm-unified [diagnostics] # The base path to the diagnostics directory -base_path = /lcrc/group/e3sm/diagnostics \ No newline at end of file +base_path = /lcrc/group/e3sm/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 36 + +# account for running diagnostics jobs +account = condo + +# available partition(s) (default is the first) +partitions = acme-small, acme-medium, acme-large + +# quality of service (default is the first) +qos = regular, acme_high diff --git a/mache/machines/badger.cfg b/mache/machines/badger.cfg index b9cb0319..b7476493 100644 --- a/mache/machines/badger.cfg +++ b/mache/machines/badger.cfg @@ -22,3 +22,22 @@ base_path = /turquoise/usr/projects/climate/SHARED_CLIMATE/anaconda_envs # The base path to the diagnostics directory base_path = /turquoise/usr/projects/climate/SHARED_CLIMATE/diagnostic + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 36 + +# account for running diagnostics jobs +account = e3sm + +# quality of service (default is the first) +qos = regular, interactive diff --git a/mache/machines/chrysalis.cfg b/mache/machines/chrysalis.cfg index 97256329..bac1860c 100644 --- a/mache/machines/chrysalis.cfg +++ b/mache/machines/chrysalis.cfg @@ -21,4 +21,20 @@ base_path = /lcrc/soft/climate/e3sm-unified [diagnostics] # The base path to the diagnostics directory -base_path = /lcrc/group/e3sm/diagnostics \ No newline at end of file +base_path = /lcrc/group/e3sm/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 64 + +# available partition(s) (default is the first) +partitions = debug, compute, high diff --git a/mache/machines/compy.cfg b/mache/machines/compy.cfg index b8cd3372..6724d461 100644 --- a/mache/machines/compy.cfg +++ b/mache/machines/compy.cfg @@ -22,3 +22,25 @@ base_path = /share/apps/E3SM/conda_envs # The base path to the diagnostics directory base_path = /compyfs/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun --mpi=pmi2 + +# cores per node on the machine +cores_per_node = 40 + +# account for running diagnostics jobs +account = e3sm + +# available partition(s) (default is the first) +partitions = slurm + +# quality of service (default is the first) +qos = regular diff --git a/mache/machines/cooley.cfg b/mache/machines/cooley.cfg index f1b90551..8229608c 100644 --- a/mache/machines/cooley.cfg +++ b/mache/machines/cooley.cfg @@ -16,3 +16,19 @@ base_path = /lus/theta-fs0/projects/ccsm/acme/tools/e3sm-unified # The base path to the diagnostics directory base_path = /lus/theta-fs0/projects/ClimateEnergy_4/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = cobalt + +# whether to use mpirun or srun to run a task +parallel_executable = mpirun + +# cores per node on the machine +cores_per_node = 12 + +# account for running diagnostics jobs +account = ClimateEnergy_4 diff --git a/mache/machines/cori-haswell.cfg b/mache/machines/cori-haswell.cfg index ef344cb8..9ecfe948 100644 --- a/mache/machines/cori-haswell.cfg +++ b/mache/machines/cori-haswell.cfg @@ -22,3 +22,25 @@ base_path = /global/common/software/e3sm/anaconda_envs # The base path to the diagnostics directory base_path = /global/cfs/cdirs/e3sm/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 32 + +# account for running diagnostics jobs +account = e3sm + +# available partition(s) (default is the first) +partitions = haswell + +# quality of service (default is the first) +qos = regular, premium, debug diff --git a/mache/machines/cori-knl.cfg b/mache/machines/cori-knl.cfg index 876192f7..05e76f09 100644 --- a/mache/machines/cori-knl.cfg +++ b/mache/machines/cori-knl.cfg @@ -22,3 +22,25 @@ base_path = /global/common/software/e3sm/anaconda_envs # The base path to the diagnostics directory base_path = /global/cfs/cdirs/e3sm/diagnostics + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 68 + +# account for running diagnostics jobs +account = e3sm + +# available partition(s) (default is the first) +partitions = knl + +# quality of service (default is the first) +qos = regular, premium, debug diff --git a/mache/machines/grizzly.cfg b/mache/machines/grizzly.cfg index b9cb0319..b7476493 100644 --- a/mache/machines/grizzly.cfg +++ b/mache/machines/grizzly.cfg @@ -22,3 +22,22 @@ base_path = /turquoise/usr/projects/climate/SHARED_CLIMATE/anaconda_envs # The base path to the diagnostics directory base_path = /turquoise/usr/projects/climate/SHARED_CLIMATE/diagnostic + + +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 36 + +# account for running diagnostics jobs +account = e3sm + +# quality of service (default is the first) +qos = regular, interactive diff --git a/setup.cfg b/setup.cfg index c1777c94..9020d90e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,12 +16,7 @@ classifiers = [options] packages = find: +include_package_data = True python_requires = >=3.7 install_requires = lxml - -[options.data_files] -data = - mache/cime_machine_config/config_compilers.xml - mache/cime_machine_config/config_machines.xml - mache/machines/*.cfg