esm-tools · pgierz · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/configs/components/echam/echam.yaml b/configs/components/echam/echam.yaml
@@ -79,7 +79,7 @@ compile_infos:
                make -j `nproc --all`; make install -j `nproc --all`;
             install_bins: ''
             branch: esm-tools
-            git-repository: https://git.geomar.de/foci/src/echam.git 
+            git-repository: https://git.geomar.de/foci/src/echam.git
           6.3.05p2-foci_oasismct4:
             branch: esm-tools-oasis3mct4
             git-repository: https://git.geomar.de/foci/src/echam.git
@@ -183,6 +183,7 @@ adj_input_dir: "${input_dir}/${resolution}"
 forcing_dir: "${input_dir}/${resolution}"
 greenhouse_dir: "${pool_dir}"
 namelist_dir: "${general.esm_namelist_dir}/echam/${version}/${scenario_type}"
+has_namelist_streams: True  # ECHAM has output filenames defined somehow in the namelist
 
 switch: 1
 value : "echam default"
@@ -585,7 +586,7 @@ choose_with_lco2_emis:
                 co2ctl:
                     lco2_emis: true # read carbon emissions; need carbon_emissions.nc in work
                     lco2_2perc: true # limit maximum co2 growth rate to 2% wrt previous time step
-        add_choose_scenario: # other loop order (1:add_forcing_files 2:add_choose_scenario) does not work 
+        add_choose_scenario: # other loop order (1:add_forcing_files 2:add_choose_scenario) does not work
             HIST:
                 add_forcing_files:
                     carbon_emissions: carbon_emissions_hist
@@ -617,7 +618,7 @@ choose_icb_code:
                         namelist.echam:
                                 submodelctl:
                                         licb: "${licb}"
-                
+
 
 choose_wiso_code:
         True:
@@ -687,7 +688,7 @@ forcing_in_work:
         sic: "unit.96"
         sst: "unit.20"
         # ok this is another crazy ECHAM6 speciality
-        # every year the background aerosol files for 1849 to 1851 
+        # every year the background aerosol files for 1849 to 1851
         # need to be linked to the same file for historical/scenario runs
         # don't blame me (seb-wahl), blame the MAC-SP developers at MPI
         # MAC-SP describes aerosol w.r.t. piControl conditions.

diff --git a/src/esm_runscripts/echam.py b/src/esm_runscripts/echam.py
@@ -0,0 +1,178 @@
+"""
+This module provides functionality specific to the ECHAM component.
+
+The functions included in this module are:
+
+- ``append_namelist_dependent_sources``: Appends namelist dependent sources to the ECHAM
+  configuration.
+
+These functions are used to manage and update the configuration settings for ECHAM,
+particularly focusing on handling namelist files and updating output data sources
+based on the configuration parameters.
+"""
+
+import f90nml
+from loguru import logger
+
+
+def _get_mvstream_tags_from_namelist(namelist):
+    """
+    Extracts mvstream tags from a given namelist.
+
+    Parameters
+    ----------
+    namelist : str or f90nml.namelist.Namelist
+        The path to the namelist file or an already parsed namelist object.
+
+    Returns
+    -------
+    list of str
+        A list of mvstream tags found in the namelist.
+
+    Raises
+    ------
+    FileNotFoundError
+        If the namelist file specified by the path cannot be found.
+    TypeError
+        If the provided namelist cannot be converted to an f90nml.namelist.Namelist
+        object.
+
+    Examples
+    --------
+    Assuming you have a namelist file at ``tests/namelists/echam/paul_custom_namelist.echam`` with the
+    following contents:
+
+    .. code-block:: fortran
+
+    ! This is the "no output" variant of namelist.echam. It contains absolutely no
+    ! output, and can be used as a template for testing.
+    !
+    ! Extended by mvstreamctl namelist block, as an example for the mvstream tags.
+    !
+    ! P. Gierz
+    ! Alfred Wegener Institute, Helmholtz Centre for Polar and Marine Research
+    ! July 2021
+    !
+    ! P. Gierz
+    ! October 2024
+
+    &runctl
+        dt_start = 2285, 12, 31, 23, 52, 30
+        dt_stop = 6699, 12, 31, 23, 52, 30
+        putrerun = 12, 'months', 'last', 0
+        lfractional_mask = .false.
+        lresume = .true.
+        out_datapath = './'
+        out_expname = 'E280'
+        rerun_filetype = 4
+        delta_time = 450
+        putdata = 1, 'months', 'last', 0
+        nproma = 8
+        lcouple = .true.
+        getocean = 1, 'days', 'last', 0
+        putocean = 1, 'days', 'last', 0
+        lcouple_co2 = .true.
+        default_output = .false.
+    /
+
+    &parctl
+        nproca = 24
+        nprocb = 24
+    /
+
+    &submodelctl
+        lmethox = .true.
+    /
+
+    &submdiagctl
+        vphysc_lpost = .false.
+    /
+
+    &radctl
+        iaero = 3
+        io3 = 4
+        isolrad = 6
+        ich4 = 3
+        in2o = 3
+        co2vmr = 284.3169860840e-06
+        ch4vmr = 808.2490234375e-09
+        n2ovmr = 273.0210571289e-09
+        yr_perp = 1850
+    /
+    &mvstreamctl
+        filetag = 'paul_custom'
+        source = 'g3b'
+        variables = 'temp2:mean>temp2=167'
+        interval = 1, 'months', 'last', 0
+    /
+
+    The following code will extract the mvstream tags from the namelist:
+
+    >>> namelist_path = "tests/namelists/echam/paul_custom_namelist.echam"
+    >>> tags = _get_mvstream_tags_from_namelist(namelist_path)
+    >>> print(tags)
+    ['paul_custom']
+    """
+    mvstream_tags = []
+    if not isinstance(namelist, f90nml.namelist.Namelist):
+        try:
+            namelist = f90nml.read(namelist)
+        except FileNotFoundError:
+            logger.error(f"Namelist specified by {namelist} could not be found")
+        except TypeError as e:
+            logger.error(
+                f"Could not convert {namelist} to f90nml.namelist.Namelist object."
+            )
+            raise e
+
+    for chapter, contents in namelist.items():
+        if chapter == "mvstreamctl":
+            tag = contents.get("filetag")
+            if tag is not None:
+                mvstream_tags.append(tag)
+    return mvstream_tags
+
+
+def append_namelist_dependent_sources(config):
+    """
+    Append namelist dependent sources to the ECHAM configuration.
+
+    This function updates the `outdata_sources` in the ECHAM configuration
+    based on the namelist objects and other configuration parameters.
+
+    Parameters
+    ----------
+    config : dict
+        The configuration dictionary containing general, ECHAM, and JSBACH settings.
+
+    Notes
+    -----
+    - The function reads the namelist from the specified directory if not
+      already loaded.
+    - It filters out tags that are to be ignored based on the JSBACH streams
+      or specified ignore tags.
+    - The output file type is checked, and if it is NetCDF (indicated by
+      ``out_filetype`` == 2), the file extension `.nc` is appended to the tags.
+    - The function logs the updates made to the ``outdata_sources``.
+    """
+    expid = config["general"]["expid"]
+    econfig = config["echam"]
+    try:
+        namelist = econfig["namelist_objs"]
+    except KeyError:  # Namelists not yet loaded...
+        namelist = f90nml.read(f"{econfig['namelist_dir']}/namelist.echam")
+    mvstream_tags = _get_mvstream_tags_from_namelist(namelist)
+    jsbach_streams = config["jsbach"].get("streams", [])
+    ignore_these_tags = econfig.get("ignore_tags", [])
+    if econfig.get("ignore_tags_include_jsbach_tags", True):
+        ignore_these_tags.extend(jsbach_streams)
+    mvstream_tags = [tag for tag in mvstream_tags if tag not in ignore_these_tags]
+    mvstream_dict = {tag: f"{expid}*{tag}" for tag in mvstream_tags}
+    if namelist["runctl"].get("out_filetype") == 2:
+        # Using NetCDF Outputs:
+        mvstream_dict = {k: v + ".nc" for k, v in mvstream_dict.items()}
+    logger.debug("Updating outdata_sources...")
+    for k, v in mvstream_dict.items():
+        logger.debug(f"{k}: {v}")
+    econfig["outdata_sources"].update(mvstream_dict)
+    logger.debug("...done!")
diff --git a/src/esm_runscripts/filelists.py b/src/esm_runscripts/filelists.py
@@ -3,6 +3,7 @@
 import filecmp
 import glob
 import hashlib
+import importlib
 import os
 import pathlib
 import re
@@ -11,12 +12,11 @@
 
 import f90nml
 import yaml
+from loguru import logger
 
 import esm_parser
-from loguru import logger
 
-from . import helpers
-from . import jinja
+from . import helpers, jinja
 
 
 def rename_sources_to_targets(config):
@@ -225,7 +225,7 @@ def choose_needed_files(config):
             new_sources = new_targets = {}
             for category, name in config[model][filetype + "_files"].items():
                 # TODO: change with user_error()
-                if not name in config[model][filetype + "_sources"]:
+                if name not in config[model][filetype + "_sources"]:
                     logger.error(
                         "Implementation "
                         + name
@@ -1632,10 +1632,27 @@ def get_movement(config, model, category, filetype, source, target):
         sys.exit(42)
 
 
+def append_namelist_dependent_sources(config):
+    """If a model has streams defined in the one of it's namelists, append them to the sources here"""
+    for model in config["general"]["valid_model_names"] + ["general"]:
+        if config[model].get("has_namelist_streams", False):  # Something truthy
+            try:
+                model_module = importlib.import_module(f"esm_runscripts.{model}")
+                # Important: we need to define something that is called append_namelist_dependent_sources in <model>.py
+                model_module.append_namelist_dependent_sources(config)
+            except ImportError:
+                logger.error(
+                    f"Model {model} specifies that it has namelist streams, but there is module to import to handle that..."
+                )
+                # keep going...
+    return config
+
+
 def assemble(config):
     config = complete_all_file_movements(config)
     config = rename_sources_to_targets(config)
     config = choose_needed_files(config)
+    config = append_namelist_dependent_sources(config)
     config = complete_targets(config)
     config = complete_sources(config)
     config = reuse_sources(config)

diff --git a/src/esm_runscripts/jsbach.py b/src/esm_runscripts/jsbach.py
@@ -0,0 +1,115 @@
+"""
+Functionality specific for the JSBACH sub-component.
+"""
+
+import f90nml
+from loguru import logger
+
+from .echam import _get_mvstream_tags_from_namelist
+
+
+def _get_comments_for_streams(namelist, mvstream_tags, flag="ESM_TOOLS_JSBACH_STREAM"):
+    """
+        Extracts tags from a namelist file that have a specific comment flag.
+
+        Parameters
+        ----------
+        namelist : str
+            Path to the namelist file.
+        mvstream_tags : list of str
+            List of tags to search for in the namelist file.
+    flag : str, optional
+            The flag to search for in the comments (default is "ESM_TOOLS_JSBACH_STREAM").
+
+        Returns
+        -------
+        list of str
+            List of tags that have the specified comment flag.
+
+        Examples
+        --------
+        Assuming you have a namelist file with the following contents:
+
+        .. code-block:: fortran
+
+        &mvstreamctl
+            filetag = 'plants' ! ESM_TOOLS_JSBACH_STREAM
+            source = 'g3b'
+            variables = 'soilwet', 'soiltemp', 'lai', 'gpp', 'npp'
+            interval = 1, 'months', 'last', 0
+        /
+        &mvstreamctl
+            filetag = 'soil' ! ESM_TOOLS_JSBACH_STREAM
+            source = 'g3b'
+            variables = 'soilwet', 'soiltemp'
+            interval = 1, 'months', 'last', 0
+        /
+        &mvstreamctl
+            filetag = 'something'
+            source = 'g3b'
+            variables = 'temp2'
+            interval = 1, 'months', 'last', 0
+        /
+        The following will extract ['plants', 'soil']::
+
+        >>> namelist = "tests/namelists/echam/jsbach_tags_namelist.echam"
+        >>> mvstream_tags = ["plants", "soil", "something"]
+        >>> _get_comments_for_streams(namelist, mvstream_tags)
+        ['plants', 'soil']
+    """
+    with open(namelist, "r") as f:
+        lines = f.readlines()
+    jsbach_tags = []
+    for tag in mvstream_tags:
+        # Find the tag in the namlist
+        matching_lines = [line for line in lines if tag in line]
+        if not matching_lines:
+            continue
+        # Find the comment
+        for line in matching_lines:
+            if "!" not in line:
+                continue
+            comment = line.split("!")[1].strip()
+            if flag in comment:
+                jsbach_tags.append(tag)
+    return jsbach_tags
+
+
+def append_namelist_dependent_sources(config):
+    """
+    Append namelist dependent sources of JSBACH configuration.
+
+    Parameters
+    ----------
+    config : dict
+
+    Notes
+    -----
+    - The function reads the namelist and will filter mvstream tags that have
+      the comment "ESM_TOOLS_JSBACH_STREAM".
+    """
+    expid = config["general"]["expid"]
+    jconfig = config["jsbach"]
+    namelist_file = f"{jconfig['namelist_dir']}/namelist.echam"
+    try:
+        namelist = jconfig["namelist_objs"]
+    except KeyError:
+        # NOTE(PG): This is one of the reasons ECHAM/JSBACH is unfriendly...
+        #           JSBACH is controlled by the ECHAM namelist, this makes
+        #           it difficult to separate the two components.
+        namelist = f90nml.read(namelist_file)
+    mvstream_tags = _get_mvstream_tags_from_namelist(namelist)
+    # Check if any of the JSBACH streams identified in the namelist
+    # have a comment attached to them:
+    flagged_tags = _get_comments_for_streams(namelist.file, mvstream_tags)
+    mvstream_dict = {tag: f"{expid}*{tag}" for tag in flagged_tags}
+    if namelist["runctl"].get("out_filetype") == 2:
+        # Using NetCDF Outputs:
+        mvstream_dict = {k: v + ".nc" for k, v in mvstream_dict.items()}
+    logger.debug("Updating outdata_sources...")
+    for k, v in mvstream_dict.items():
+        logger.debug(f"{k}: {v}")
+    jconfig["outdata_sources"].update(mvstream_dict)
+    logger.debug("...done!")
+
+    return config