diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 2bd2b4ccb..000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,29 +0,0 @@ -# This file is a template, and might need editing before it works on your project. -# see https://docs.gitlab.com/ce/ci/yaml/README.html for all available options - - -before_script: - - echo "Before script section" - - echo "For example you might run an update here or install a build dependency" - - echo "Or perhaps you might print out some debugging details" - -after_script: - - echo "After script section" - - echo "For example you might do some cleanup here" - -install_tools: - tags: - - ubuntu - - mistral, xen - # TODO: Add ollie - stage: build - script: - - git --version - - python --version - - pip --version - - ./install.sh - - export PATH=${HOME}/.local/bin:${PATH} - - which esm_master - - which esm_runscripts - - which esm_versions - diff --git a/.readthedocs.yml b/.readthedocs.yml index 3aa8a9687..40800b3d9 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -8,7 +8,7 @@ version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py - fail_on_warning: true + fail_on_warning: false # Build documentation with MkDocs #mkdocs: diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a51bcd5a5..000000000 --- a/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -# Config file for automatic testing at travis-ci.com - -language: python -python: - - 3.8 - - 3.7 - - 3.6 - - 3.5 - -# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: pip install -U tox-travis - -# Command to run tests, e.g. python setup.py test -script: tox - - diff --git a/configs/components/echam/echam.yaml b/configs/components/echam/echam.yaml index f5b804ecf..a3b0a5626 100644 --- a/configs/components/echam/echam.yaml +++ b/configs/components/echam/echam.yaml @@ -102,24 +102,13 @@ metadata: Authors: Bjorn Stevens (bjorn.stevens@mpimet.mpg.de) among others at MPI-Met Publications: 'Atmosphericcomponent of the MPI-M earth system model: ECHAM6 ' - License: > + License: Please make sure you have a license to use ECHAM. Otherwise downloading ECHAM will already fail. - To use the repository on any of these locations: - - * gitlab.dkrz.de/modular_esm/echam.git - * gitlab.awi.de/paleodyn/models/echam.git - - please register for the MPI-ESM user forum at: - - https://code.mpimet.mpg.de/projects/mpi-esm-license - + To use the repository on either gitlab.dkrz.de/modular_esm/echam6.git or gitlab.awi.de/paleodyn/models/echam6.git, + please register for the MPI-ESM user forum at https://code.mpimet.mpg.de/projects/mpi-esm-license and send a screenshot of yourself logged in to the forum to either paul.gierz@awi.de, miguel.andres-martinez@awi.de, - or nadine.wieters@awi.de. - - Note also that you can otherwise ignore the instructions on that page, just the registiration and login screen shot - is important for us. - - Have fun using ECHAM! :-) + or nadine.wieters@awi.de. Note also that you can otherwise ignore the instructions on that page, just the registration + and login screen shot is the relevant part for obtaining the license. standalone_model: True @@ -405,11 +394,11 @@ choose_scenario: aerofin_1850: piaerofin aerofin_1851: piaerofin ozone: ozone_hist_scenario - greenhouse: histgreenh + greenhouse: greenhouse_hist_scenario volcir: histvolcir volcsw: histvolcsw swflux: histswflux - MAC-SP: MAC-SP + MAC-SP: MAC-SP_hist_scenario cmip6hist: scenario_type: cmip6 diff --git a/configs/components/fesom/fesom-2.0.yaml b/configs/components/fesom/fesom-2.0.yaml index e542a987e..212bcc386 100644 --- a/configs/components/fesom/fesom-2.0.yaml +++ b/configs/components/fesom/fesom-2.0.yaml @@ -170,8 +170,10 @@ restart_in_in_work: fesom_bin_restart_info: fesom_bin_restart/*.info fesom_bin_restart: fesom_bin_restart/np${nproc}/* restart_in_sources: - oce_restart: fesom.${parent_date!syear}.oce.restart.nc - ice_restart: fesom.${parent_date!syear}.ice.restart.nc + # TODO: When cleaning up the FESOM yamls, remove the wildcard and make this file + # be dependent on the corresponding namelist option + oce_restart: fesom.${parent_date!syear}.o*e.restart.nc + ice_restart: fesom.${parent_date!syear}.i*e.restart.nc par_oce_restart: fesom.${parent_date!syear}.oce.restart/*.nc par_ice_restart: fesom.${parent_date!syear}.ice.restart/*.nc fesom_raw_restart_info: fesom_raw_restart/*.info diff --git a/configs/components/oifs/oifs.yaml b/configs/components/oifs/oifs.yaml index 4b92218d5..e9f80b42a 100644 --- a/configs/components/oifs/oifs.yaml +++ b/configs/components/oifs/oifs.yaml @@ -93,6 +93,8 @@ compile_infos: - 43r3-master - 43r3-awicm-3.0 - 43r3-awicm-3.1 + - 43r3-awicm-3.1.1 + - 43r3-awicm-3.1.2 - 43r3-awicm-frontiers-xios - 43r3 - 40r1-foci @@ -139,6 +141,24 @@ compile_infos: install_bins: make/esm/oifs/bin/oifs destination: oifs-43r3 with_xios: true + 43r3-awicm-3.1.1: + requires: + - oasis3mct-4.0-awicm-3.1 + branch: awicm-3.1.1 + comp_command: "export OIFS_TOPLEVEL_DIR=${model_dir}; export OIFS_XIOS=enable ; export OIFS_XIOS_DIR=${model_dir}/../xios ; export OIFS_XIOS_INCLUDE=-I/${model_dir}/../xios/inc/; cd make; ../fcm/bin/fcm make -v -j8 -f oifs.fcm ; chmod -R ${source_code_permissions} .; mv esm/oifs/bin/master.exe esm/oifs/bin/oifs" + git-repository: https://gitlab.dkrz.de/ec-earth/oifs-43r3.git + install_bins: make/esm/oifs/bin/oifs + destination: oifs-43r3 + with_xios: true + 43r3-awicm-3.1.2: + requires: + - oasis3mct-4.0-awicm-3.1 + branch: awicm-3.1.2 + comp_command: "export OIFS_TOPLEVEL_DIR=${model_dir}; export OIFS_XIOS=enable ; export OIFS_XIOS_DIR=${model_dir}/../xios ; export OIFS_XIOS_INCLUDE=-I/${model_dir}/../xios/inc/; cd make; ../fcm/bin/fcm make -v -j8 -f oifs.fcm ; chmod -R ${source_code_permissions} .; mv esm/oifs/bin/master.exe esm/oifs/bin/oifs" + git-repository: https://gitlab.dkrz.de/ec-earth/oifs-43r3.git + install_bins: make/esm/oifs/bin/oifs + destination: oifs-43r3 + with_xios: true 43r3-awicm-frontiers-xios: requires: - oasis3mct-4.0-awicm-frontiers diff --git a/configs/components/tux/tux.yaml b/configs/components/tux/tux.yaml index 4726507b9..46113cb22 100644 --- a/configs/components/tux/tux.yaml +++ b/configs/components/tux/tux.yaml @@ -24,11 +24,10 @@ clean_command: "rm -fr ${install_bins}" comp_command: "display ${install_bins}" metadata: - Institute: wiki - Description: - "Tux image" - Authors: "who knows" + Institute: "" + Description: "" + Authors: "" Publications: - - "are you serious?" + - " " License: GPL diff --git a/configs/couplings/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5.yaml b/configs/couplings/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5.yaml new file mode 100644 index 000000000..3bf104f09 --- /dev/null +++ b/configs/couplings/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5.yaml @@ -0,0 +1,11 @@ +components: +- xios-2.5 +- rnfmap-awicm-3.1 +- oifs-43r3-awicm-3.1.1 +- fesom-2.0-awicm-3.1 +- oasis3mct-4.0-awicm-3.1 +coupling_changes: +- sed -i '/COUPLENEMOECE = /s/.TRUE./.FALSE./g' oifs-43r3/src/ifs/module/yommcc.F90 +- sed -i '/COUPLEFESOM2 = /s/.FALSE./.TRUE./g' oifs-43r3/src/ifs/module/yommcc.F90 +- sed -i '/COUPLENEMOFOCI = /s/.TRUE./.FALSE./g' oifs-43r3/src/ifs/module/yommcc.F90 + diff --git a/configs/couplings/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5.yaml b/configs/couplings/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5.yaml new file mode 100644 index 000000000..583e8cd9a --- /dev/null +++ b/configs/couplings/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5/fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5.yaml @@ -0,0 +1,11 @@ +components: +- xios-2.5 +- rnfmap-awicm-3.1 +- oifs-43r3-awicm-3.1.2 +- fesom-2.0-awicm-3.1 +- oasis3mct-4.0-awicm-3.1 +coupling_changes: +- sed -i '/COUPLENEMOECE = /s/.TRUE./.FALSE./g' oifs-43r3/src/ifs/module/yommcc.F90 +- sed -i '/COUPLEFESOM2 = /s/.FALSE./.TRUE./g' oifs-43r3/src/ifs/module/yommcc.F90 +- sed -i '/COUPLENEMOFOCI = /s/.TRUE./.FALSE./g' oifs-43r3/src/ifs/module/yommcc.F90 + diff --git a/configs/setups/awicm3/awicm3.yaml b/configs/setups/awicm3/awicm3.yaml index 4ca5e9c24..2ac1f8722 100644 --- a/configs/setups/awicm3/awicm3.yaml +++ b/configs/setups/awicm3/awicm3.yaml @@ -19,20 +19,41 @@ general: available_versions: - 'v3.0' - 'v3.1' + - 'v3.1.1' + - 'v3.1.2' - 'master' - 'frontiers-xios' choose_version: v3.0: + major_version: v3.0 couplings: - fesom-2.0-awicm-3.0+oifs-43r3-awicm-3.0 v3.1: + major_version: v3.1 couplings: - fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1+xios-2.5 add_include_models: - xios add_further_reading: - xios/xios.env.yaml + v3.1.1: + major_version: v3.1 + couplings: + - fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.1+xios-2.5 + add_include_models: + - xios + add_further_reading: + - xios/xios.env.yaml + v3.1.2: + major_version: v3.1 + couplings: + - fesom-2.0-awicm-3.1+oifs-43r3-awicm-3.1.2+xios-2.5 + add_include_models: + - xios + add_further_reading: + - xios/xios.env.yaml master: + major_version: master couplings: - fesom-2.0-master+oifs-43r3-master add_include_models: @@ -40,6 +61,7 @@ general: add_further_reading: - xios/xios.env.yaml frontiers-xios: + major_version: frontiers-xios couplings: - fesom-2.0-frontiers+oifs-43r3-awicm-frontiers-xios+xios-2.5 add_include_models: @@ -109,7 +131,7 @@ oifs: rtables_dir: ${input_dir}/rtables/ forcing_dir: ${input_dir}/${version}/ifsdata/ ifsdata_dir: ${input_dir}/${version}/ifsdata/ - choose_general.version: + choose_general.major_version: "v3.0": namelist_dir: ${general.esm_namelist_dir}/oifs/${version}/awicm3/v3.0 mip: "cmip6" @@ -336,7 +358,7 @@ fesom: oce_tra: surf_relax_s: "${surf_relax_s}" - choose_general.version: + choose_general.major_version: "v3.0": namelist_dir: "${esm_namelist_dir}/fesom2/2.0/awicm3/v3.0/" add_namelist_changes: @@ -547,7 +569,7 @@ oasis3mct: r2a_seq: 2 coupling_time_step: "${oasis3mct.time_step}" - choose_general.version: + choose_general.major_version: 'frontiers-xios': mct_version: 4.0 input_dir: ${pool_dir}/cy${oifs.version}/${oifs.resolution}-${fesom.resolution}/ @@ -706,9 +728,9 @@ oasis3mct: true: lag: "${time_step}" # If ini_restart_dir is not specified in the runscript and, therefore, - # it is not a branched off experiment add the /fesom.nproc/ to the + # it is not a branched off experiment adds the /fesom.nproc/ to the # sources when the run_number is 1 - load_restart_from_pool: "$(( '${input_dir}' == '${ini_restart_dir}' and ${general.run_number} == 1))" + load_restart_from_pool: "$(( os.path.realpath('${input_dir}') == os.path.realpath('${ini_restart_dir}') and ${general.run_number} == 1 ))" choose_load_restart_from_pool: true: add_restart_in_sources: diff --git a/docs/esm_runscripts.rst b/docs/esm_runscripts.rst index 16b05bee9..8df405d70 100644 --- a/docs/esm_runscripts.rst +++ b/docs/esm_runscripts.rst @@ -331,12 +331,12 @@ This will enable the `pdb Python debugger ' + +.. warning:: + The branch **needs to exist on GitHub** as it is cloned form there, and **not from your + local folder**. If you made any changes in your local branch make sure they are pushed before + running ``esm_runscripts`` with a virtual environment, so that your changes are included in the + virtual environment installation. + +You may also select to install esm_tools in `editable mode`, in which case +they will be installed in a folder ``src/esm_tools/`` in the root of +your experiment. Any changes made to the code in that folder **will** influence how +ESM-Tools behave. To create a virtual environment with ESM-Tools installed in +`editable` mode use: + +.. code-block:: yaml + + general: + install__editable: true/false + +.. note:: + When using a virtual environment, config files and namelists will come of the + folder .venv_esmtools listed above and **not** from your user install directory. + You should make **all** changes to the namelists and config files via your user + runscript (:ref:`yaml:Changing Namelists`). This is recommended in all cases!!! diff --git a/docs/esm_tools.pdf b/docs/esm_tools.pdf deleted file mode 100644 index 5f4ad493b..000000000 Binary files a/docs/esm_tools.pdf and /dev/null differ diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 7efb7a5cb..000000000 --- a/docs/make.bat +++ /dev/null @@ -1,36 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=python -msphinx -) -set SOURCEDIR=. -set BUILDDIR=_build -set SPHINXPROJ=esm_tools - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The Sphinx module was not found. Make sure you have Sphinx installed, - echo.then set the SPHINXBUILD environment variable to point to the full - echo.path of the 'sphinx-build' executable. Alternatively you may add the - echo.Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/docs/recipes/sbatch_flags.rst b/docs/recipes/sbatch_flags.rst index 96072abf2..4599506c7 100644 --- a/docs/recipes/sbatch_flags.rst +++ b/docs/recipes/sbatch_flags.rst @@ -28,3 +28,17 @@ your runscript: computer: additional_flags: "--qos=24h" + +Adding more than one flag +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Alternatively, you can include a list of additional flags: + +.. code-block:: yaml + + computer: + additional_flags: + - "--qos=24h" + - "--comment='My Slurm Comment'" + +See the documentation for the batch scheduler on your HPC system to see the allowed options. diff --git a/esm_tools/motd/motd.yaml b/esm_tools/motd/motd.yaml index c2a652a84..01a6c248c 100644 --- a/esm_tools/motd/motd.yaml +++ b/esm_tools/motd/motd.yaml @@ -134,9 +134,9 @@ release6.7: action: DELAY(1) announcement: true -oasis_branchoff6.21.17: +oasis_branchoff6.21.23: package: "esm_tools" - versions: "<6.21.17" + versions: "<6.21.23" message: "\x1b[1m\x1b[96mOASIS BRANCH-OFF RESTARTS FIXED!\x1b[0m\n The bug on the copying of the wrong flux files from OASIS \x1b[96mwhen running\n diff --git a/motd/motd.yaml b/motd/motd.yaml index c2a652a84..01a6c248c 100644 --- a/motd/motd.yaml +++ b/motd/motd.yaml @@ -134,9 +134,9 @@ release6.7: action: DELAY(1) announcement: true -oasis_branchoff6.21.17: +oasis_branchoff6.21.23: package: "esm_tools" - versions: "<6.21.17" + versions: "<6.21.23" message: "\x1b[1m\x1b[96mOASIS BRANCH-OFF RESTARTS FIXED!\x1b[0m\n The bug on the copying of the wrong flux files from OASIS \x1b[96mwhen running\n diff --git a/runscripts/awicm/awicm-mistral-initial-monthly.run b/runscripts/awicm/awicm-mistral-initial-monthly.run deleted file mode 100755 index 606079dbd..000000000 --- a/runscripts/awicm/awicm-mistral-initial-monthly.run +++ /dev/null @@ -1,47 +0,0 @@ -#! /bin/ksh -l -set -e - - -setup_name="awicm" -#check=1 - -account=ab0995 -compute_time="00:25:00" -############################################################################### - -INITIAL_DATE_awicm=2000-01-01 # Initial exp. date -FINAL_DATE_awicm=2000-02-01 # Final date of the experiment - -awicm_VERSION="CMIP6" -POST_PROCESSING_awicm=0 -SCENARIO_awicm="PI-CTRL" - -RES_fesom=CORE2 - -MODEL_DIR_awicm=${HOME}/esm-master/awicm-test/ - -BASE_DIR=/work/ab0995/a270058/esm_yaml_test/ - -POOL_DIR_fesom=/work/bm0944/input/ - -MESH_DIR_fesom=/pool/data/AWICM/FESOM1/MESHES/core/ - -NYEAR_awicm=0 # Number of years per run -NMONTH_awicm=1 # Number of months per run - -LRESUME_echam=0 -LRESUME_fesom=0 -LRESUME_oasis3mct=0 - -RESTART_RATE_fesom=1 -RESTART_FIRST_fesom=1 -RESTART_UNIT_fesom='m' - - - -further_reading_fesom="fesom_output_control.yaml" - - -############################################################################### -load_all_functions -general_do_it_all $@ diff --git a/runscripts/awicm/awicm-mistral-restart-LGM_BoundaryConditions.run b/runscripts/awicm/awicm-mistral-restart-LGM_BoundaryConditions.run deleted file mode 100755 index 7afc2651c..000000000 --- a/runscripts/awicm/awicm-mistral-restart-LGM_BoundaryConditions.run +++ /dev/null @@ -1,101 +0,0 @@ -#! /bin/ksh -l -set -e - -module purge - -export FUNCTION_PATH=${HOME}/esm-master/esm-runscripts/functions/all -export FPATH=$FUNCTION_PATH - -machine_name=mistral -setup_name="awicm" # mpiesm, pism_mpiesm, echam. mpiom, or pism -ACCOUNT=ab0246 -check=0 # run job in check mode, without submitting and / or launching - - -# the executable. Set to 0 to turn off -#compute_time="00:20:00" # this will cause walltime barriers, bu the job should start right away... -compute_time="03:00:00" -post_time="01:00:00" -ESM_USE_C_CALENDAR=1 -############################################################################### -awicm_VERSION="1.1" -MODEL_DIR_awicm=${HOME}/esm-master/awicm-test/ - - -########################## -## GENERAL SECTION ####### -########################## -expid="LGM_TEST" - -BASE_DIR=/work/ba0989/a270077/esm_tools_dev - -INITIAL_DATE=1850-01-01 # initial exp. date -FINAL_DATE=1852-01-01 # final date of the experiment - - -NYEAR_echam=1 # number of years per run -NMONTH_echam=0 - - -########################## -## ECHAM/JSBACH SECTION ## -########################## -RES_echam=T63 -SCENARIO_echam=PALEO - -echam_DATASET=r0007 -jsbach_DATASET=r0009 - -#BIN_DIR_echam=${HOME}/esm-master/echam-test/bin - -# if you want to start from echam restarts, use: -LRESUME_echam=1 -INI_RESTART_DIR_echam=/work/ab0246/a270064/esm-experiments/lgm_anm/restart/echam -INI_PARENT_DATE_echam=39281231 #, for example 24001231235640 -INI_PARENT_EXP_ID_echam=lgm_anm # of spinup experiment, for example TST - -nproca_echam=12 -nprocb_echam=24 - -INI_RESTART_DIR_jsbach=/work/ab0246/a270064/esm-experiments/lgm_anm/restart/jsbach -INI_RESTART_DIR_hdmodel=/work/ab0246/a270064/esm-experiments/lgm_anm/restart/hdmodel - -JAN_SURF_echam=/work/ab0246/a270064/esm-experiments/lgm_anm/input/echam/T63LGM_jan_surf.nc -VGRATCLIM_echam=/work/ab0246/a270064/esm-experiments/lgm_anm/input/echam/T63LGM_VGRATCLIM.nc -VLTCLIM_echam=/work/ab0246/a270064/esm-experiments/lgm_anm/input/echam/T63LGM_VLTCLIM.nc - -HDPARA_FILE_hdmodel=/work/ab0246/a270064/esm-experiments/lgm_anm/input/hdmodel/hdpara.nc -LAND_BOUNDARY_CONDITIONS_jsbach=/work/ab0246/a270064/esm-experiments/lgm_anm/input/jsbach/jsbach_T63LGM_11tiles_5layers_1850.nc -DYNVEG_jsbach=dynveg - -CO2_echam=190.0e-6 -CH4_echam=0.375e-6 -N2O_echam=0.200e-6 -CECC_echam=0.018994 -COBLD_echam=22.949 -CLONP_echam=294.42 -DISTURBED_YEARS_echam='1894' -DISTURBANCE_echam=1.00001 - -################### -## FESOM SECTION ## -################### - -RES_fesom=LGM -MESH_DIR_fesom=/mnt/lustre02/work/ba0989/a270077/FESOM_meshes/CORE2_LGM - -INI_RESTART_DIR_fesom=/work/ab0246/a270064/esm-experiments/lgm_anm/restart/fesom -INI_PARENT_DATE_fesom=39281231 #, for example 24001231235640 - -RESTART_RATE_fesom=1 -RESTART_FIRST_fesom=1 -RESTART_UNIT_fesom='y' - - - -further_reading_fesom="fesom_output_control.yaml" -############################################################################### - - -load_all_functions -general_do_it_all $@ diff --git a/runscripts/awicm/awicm-ollie-initial-monthly.run b/runscripts/awicm/awicm-ollie-initial-monthly.run deleted file mode 100755 index 21ed5b415..000000000 --- a/runscripts/awicm/awicm-ollie-initial-monthly.run +++ /dev/null @@ -1,45 +0,0 @@ -#! /bin/ksh -l -set -e - - -setup_name="awicm" -#check=1 - -account=ab0995 -compute_time="00:15:00" -############################################################################### - -INITIAL_DATE_awicm=2000-01-01 # Initial exp. date -FINAL_DATE_awicm=2000-02-29 # Final date of the experiment - -awicm_VERSION="CMIP6" -POST_PROCESSING_awicm=0 -SCENARIO_awicm="PI-CTRL" - -RES_fesom=CORE2 - -MODEL_DIR_awicm=/work/ollie/dbarbi/modelcodes/awicm-CMIP6/ -BASE_DIR=/work/ollie/dbarbi/esm_yaml_test/ - -POOL_DIR_fesom=/work/ollie/pool/FESOM/ -MESH_DIR_fesom=/work/ollie/pool/FESOM/meshes_default/core/ - -NYEAR_awicm=0 # Number of years per run -NMONTH_awicm=1 # Number of months per run - -LRESUME_echam=0 -LRESUME_fesom=0 -LRESUME_oasis3mct=0 - -RESTART_RATE_fesom=1 -RESTART_FIRST_fesom=1 -RESTART_UNIT_fesom='m' - - - -further_reading_fesom="fesom_output_control.yaml" - - -############################################################################### -load_all_functions -general_do_it_all $@ diff --git a/runscripts/awicm/awicm1-CMIP6-initial-daily.yaml b/runscripts/awicm/awicm1-CMIP6-initial-daily.yaml new file mode 100644 index 000000000..8abdcab29 --- /dev/null +++ b/runscripts/awicm/awicm1-CMIP6-initial-daily.yaml @@ -0,0 +1,90 @@ +general: + setup_name: 'awicm' + account: ab0995 #!ENV ${ACCOUNT} + compute_time: "00:30:00" + initial_date: "1850-01-03" + final_date: "1850-01-04" + base_dir: !ENV ${WORKCD} #${ESM_TESTING_DIR} + nyear: 0 + nmonth: 0 + nday: 1 + lresume: false + ini_restart_exp_id: i971_awi1 + ini_restart_dir: /work/ab0995/a270152/i971_awi1/restart/ + +awicm: + version: "CMIP6" + scenario: "PI-CTRL" + post_processing: 0 + model_dir: /work/ab0995/a270152/model_codes/awicm-CMIP6/ #!ENV ${MODEL_DIR} + pool_dir: /pool/data/ + resolution: T127 + +fesom: + restart_rate: 1 + restart_unit: "d" + restart_first: 1 + + choose_computer.name: + levante: + rpool_dir: /work/ab0995/a270152/testing_pool/input/ + rmesh_dir: /work/ab0995/a270152/testing_pool/AWICM/FESOM1/MESHES/core/ + rproc: 384 + pool_dir: ${fesom.rpool_dir} + mesh_dir: ${fesom.rmesh_dir} + resolution: CORE2 + time_step: 450 + nproc: ${rproc} + + lresume: ${general.lresume} + ini_restart_exp_id: ${general.ini_restart_exp_id} + ini_restart_date: "${prev_date}" + ini_restart_dir: ${general.ini_restart_dir}/fesom/ + +echam: + restart_rate: 1 + restart_unit: "days" + restart_first: 1 + + scenario: "PI-CTRL" + resolution: T127 + levels: L95 + post_processing: False + lresume: ${general.lresume} + choose_computer.name: + levante: + nproca: 32 + nprocb: 32 + "*": + nproca: 24 + nprocb: 24 + time_step: 200 + + ini_restart_exp_id: ${general.ini_restart_exp_id} + ini_restart_date: "${prev_date}" + ini_restart_dir: ${general.ini_restart_dir}/echam/ + +jsbach: + dataset: r0009 + streams: + - jsbach + - jsbid + - yasso + - surf + - veg + remove_jsbach.forcing_files: + - a_nd_file + - lightning + - popdens + - LU + + lresume: ${general.lresume} + ini_restart_exp_id: ${general.ini_restart_exp_id} + ini_restart_date: "${prev_date}" + ini_restart_dir: ${general.ini_restart_dir}/jsbach/ + +oasis3mct: + lresume: ${general.lresume} + ini_restart_exp_id: ${general.ini_restart_exp_id} + ini_restart_date: "${prev_date}" + ini_restart_dir: ${general.ini_restart_dir}/oasis3mct/ diff --git a/runscripts/awicm/echam-mistral-restart-monthly.run b/runscripts/awicm/echam-mistral-restart-monthly.run deleted file mode 100755 index 68eb3c163..000000000 --- a/runscripts/awicm/echam-mistral-restart-monthly.run +++ /dev/null @@ -1,34 +0,0 @@ -#! /bin/ksh -l -set -e - - -setup_name="echam_standalone" -#check=1 - -account=ab0995 -compute_time="00:15:00" -############################################################################### - -INITIAL_DATE=2000-01-01 # Initial exp. date -FINAL_DATE=2000-02-29 # Final date of the experiment - -echam_VERSION="6.3.04p1" -POST_PROCESSING_echam=0 -SCENARIO_echam="PI-CTRL" - -RES_echam=T63 - -MODEL_DIR=/work/ab0995/a270058/modelcodes/echam-6.3.04p1// -BASE_DIR=/work/ab0995/a270058/esm_yaml_test/ - -NYEAR=0 # Number of years per run -NMONTH=1 # Number of months per run - -LRESUME_echam=1 -INI_RESTART_DIR_echam=/work/ab0246/a270064/esm-experiments/lgm_anm/restart/echam -INI_PARENT_DATE_echam=37991231 -INI_PARENT_EXP_ID_echam=lgm_anm - -############################################################################### -load_all_functions -general_do_it_all $@ diff --git a/runscripts/awicm/foci-mistral-initial_monthly.run b/runscripts/awicm/foci-mistral-initial_monthly.run deleted file mode 100755 index 6da9cdcaa..000000000 --- a/runscripts/awicm/foci-mistral-initial_monthly.run +++ /dev/null @@ -1,32 +0,0 @@ - -setup_name="foci" -compute_time="00:15:00" -account=ab0995 - -############################################################################### - -INITIAL_DATE=2000-01-01 # Initial exp. date -FINAL_DATE=2000-02-29 # Final date of the experiment - -foci_VERSION="1.0" -POST_PROCESSING_foci=0 -SCENARIO="PI-CTRL" - -RES_nemo=ORCA05 -RES_echam=T63 - -MODEL_DIR=/work/ab0995/a270058/modelcodes/foci-1.0/ -BASE_DIR=/work/ab0995/a270058/esm_yaml_test/ - -NYEAR=0 # Number of years per run -NMONTH=1 # Number of months per run - -LRESUME_echam=0 -LRESUME_nemo=0 -LRESUME_oasis3mct=0 - -RESTART_RATE_nemo=1 -RESTART_FIRST_nemo=1 -RESTART_UNIT_nemo='m' - -############################################################################### diff --git a/runscripts/awicm3/v3.1.1/awicm3-v3.1.1-albedo-TCO95L91-CORE2.yaml b/runscripts/awicm3/v3.1.1/awicm3-v3.1.1-albedo-TCO95L91-CORE2.yaml new file mode 100644 index 000000000..64c1499b4 --- /dev/null +++ b/runscripts/awicm3/v3.1.1/awicm3-v3.1.1-albedo-TCO95L91-CORE2.yaml @@ -0,0 +1,53 @@ +general: + user: !ENV ${USER} + setup_name: "awicm3" + version: "v3.1.1" + account: paleodyn.paleodyn + compute_time: "00:10:00" + initial_date: "2000-01-01" + final_date: "2000-02-01" + base_dir: "/albedo/work/user/${user}/runtime/${general.setup_name}-${general.version}/" + nday: 0 + nmonth: 1 + nyear: 0 + +computer: + taskset: true + additional_flags: [ --qos=30min ] + +awicm3: + postprocessing: false + model_dir: "/albedo/home/${user}/model_codes/${general.setup_name}-${general.version}//" + +fesom: + resolution: "CORE2" + pool_dir: "/albedo/work/user/mandresm/test_pool/clidyn/FESOM2/" + climate_data_dir: "${pool_dir}/hydrography/phc3.0/" + mesh_dir: "${pool_dir}/meshes/core2/" + restart_rate: 1 + restart_unit: "m" + restart_first: 1 + lresume: false + time_step: 1800 + nproc: 128 + +oifs: + resolution: "TCO95" + levels: "L91" + prepifs_expid: aack + input_expid: awi3 + wam: true + lresume: false + time_step: 3600 + nproc: 128 + omp_num_threads: 1 + +oasis3mct: + lresume: false # Set to false to generate the rst files for first leg + time_step: 7200 + +xios: + with_model: oifs + nproc: 4 + omp_num_threads: 32 + diff --git a/runscripts/awicm3/v3.1.1/awicm3-v3.1.1-levante-TCO95L91-CORE2.yaml b/runscripts/awicm3/v3.1.1/awicm3-v3.1.1-levante-TCO95L91-CORE2.yaml new file mode 100644 index 000000000..7d5a6b1a1 --- /dev/null +++ b/runscripts/awicm3/v3.1.1/awicm3-v3.1.1-levante-TCO95L91-CORE2.yaml @@ -0,0 +1,56 @@ +general: + user: !ENV ${USER} + setup_name: "awicm3" + version: "v3.1.1" + account: "ab0995" + compute_time: "00:20:00" + initial_date: "2000-01-01" + final_date: "2000-02-01" + base_dir: "/work/ab0246/${user}/runtime/${general.setup_name}-${general.version}/" + nday: 0 + nmonth: 1 + nyear: 0 + +computer: + taskset: true + +awicm3: + postprocessing: false + model_dir: "/home/a/${user}/model_codes/${general.setup_name}-${general.version}//" + +fesom: + resolution: "CORE2" + pool_dir: "/work/ab0246/a270092/input/fesom2/" + mesh_dir: "${pool_dir}/core2/" + tide_forcing_dir: "${pool_dir}/fesom/tide_forcing/" + add_namelist_changes: + namelist.config: + paths: + tideforcingpath: "${pool_dir}/fesom/tide_forcing/" + restart_rate: 1 + restart_unit: "m" + restart_first: 1 + lresume: false + time_step: 1800 + nproc: 128 + +oifs: + resolution: "TCO95" + levels: "L91" + prepifs_expid: aack + input_expid: awi3 + wam: true + lresume: false + time_step: 3600 + nproc: 128 + omp_num_threads: 1 + +oasis3mct: + lresume: false # Set to false to generate the rst files for first leg + time_step: 7200 + +xios: + with_model: oifs + nproc: 4 + omp_num_threads: 32 + diff --git a/runscripts/awicm3/v3.1.2/awicm3-v3.1.2-albedo-TCO95L91-CORE2.yaml b/runscripts/awicm3/v3.1.2/awicm3-v3.1.2-albedo-TCO95L91-CORE2.yaml new file mode 100644 index 000000000..f73edab15 --- /dev/null +++ b/runscripts/awicm3/v3.1.2/awicm3-v3.1.2-albedo-TCO95L91-CORE2.yaml @@ -0,0 +1,53 @@ +general: + user: !ENV ${USER} + setup_name: "awicm3" + version: "v3.1.2" + account: paleodyn.paleodyn + compute_time: "00:10:00" + initial_date: "2000-01-01" + final_date: "2000-02-01" + base_dir: "/albedo/work/user/${user}/runtime/${general.setup_name}-${general.version}/" + nday: 0 + nmonth: 1 + nyear: 0 + +computer: + taskset: true + additional_flags: [ --qos=30min ] + +awicm3: + postprocessing: false + model_dir: "/albedo/home/${user}/model_codes/${general.setup_name}-${general.version}//" + +fesom: + resolution: "CORE2" + pool_dir: "/albedo/work/user/mandresm/test_pool/clidyn/FESOM2/" + climate_data_dir: "${pool_dir}/hydrography/phc3.0/" + mesh_dir: "${pool_dir}/meshes/core2/" + restart_rate: 1 + restart_unit: "m" + restart_first: 1 + lresume: false + time_step: 1800 + nproc: 128 + +oifs: + resolution: "TCO95" + levels: "L91" + prepifs_expid: aack + input_expid: awi3 + wam: true + lresume: false + time_step: 3600 + nproc: 128 + omp_num_threads: 1 + +oasis3mct: + lresume: false # Set to false to generate the rst files for first leg + time_step: 7200 + +xios: + with_model: oifs + nproc: 4 + omp_num_threads: 32 + diff --git a/runscripts/awicm3/v3.1.2/awicm3-v3.1.2-levante-TCO95L91-CORE2.yaml b/runscripts/awicm3/v3.1.2/awicm3-v3.1.2-levante-TCO95L91-CORE2.yaml new file mode 100644 index 000000000..55ced8241 --- /dev/null +++ b/runscripts/awicm3/v3.1.2/awicm3-v3.1.2-levante-TCO95L91-CORE2.yaml @@ -0,0 +1,56 @@ +general: + user: !ENV ${USER} + setup_name: "awicm3" + version: "v3.1.2" + account: "ab0995" + compute_time: "00:20:00" + initial_date: "2000-01-01" + final_date: "2000-02-01" + base_dir: "/work/ab0246/${user}/runtime/${general.setup_name}-${general.version}/" + nday: 0 + nmonth: 1 + nyear: 0 + +computer: + taskset: true + +awicm3: + postprocessing: false + model_dir: "/home/a/${user}/model_codes/${general.setup_name}-${general.version}//" + +fesom: + resolution: "CORE2" + pool_dir: "/work/ab0246/a270092/input/fesom2/" + mesh_dir: "${pool_dir}/core2/" + tide_forcing_dir: "${pool_dir}/fesom/tide_forcing/" + add_namelist_changes: + namelist.config: + paths: + tideforcingpath: "${pool_dir}/fesom/tide_forcing/" + restart_rate: 1 + restart_unit: "m" + restart_first: 1 + lresume: false + time_step: 1800 + nproc: 128 + +oifs: + resolution: "TCO95" + levels: "L91" + prepifs_expid: aack + input_expid: awi3 + wam: true + lresume: false + time_step: 3600 + nproc: 128 + omp_num_threads: 1 + +oasis3mct: + lresume: false # Set to false to generate the rst files for first leg + time_step: 7200 + +xios: + with_model: oifs + nproc: 4 + omp_num_threads: 32 + diff --git a/setup.cfg b/setup.cfg index 933d60e1f..9870226c5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 6.21.19 +current_version = 6.23.4 commit = True tag = True diff --git a/setup.py b/setup.py index 5c76bc54b..9b03282bf 100644 --- a/setup.py +++ b/setup.py @@ -26,9 +26,11 @@ "packaging", "pandas>=1.0", "psutil", + "pytest", "pyyaml", "pyyaml>=5.1", "questionary", + "ruamel.yaml==0.17.32", "semver", "sqlalchemy", "tabulate", @@ -93,6 +95,6 @@ test_suite="tests", tests_require=test_requirements, url="https://github.com/esm-tools/esm_tools", - version="6.21.19", + version="6.23.4", zip_safe=False, ) diff --git a/src/esm_archiving/__init__.py b/src/esm_archiving/__init__.py index c7386692d..4d45392fb 100644 --- a/src/esm_archiving/__init__.py +++ b/src/esm_archiving/__init__.py @@ -4,7 +4,7 @@ __author__ = """Paul Gierz""" __email__ = "pgierz@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_archiving import (archive_mistral, check_tar_lists, delete_original_data, determine_datestamp_location, diff --git a/src/esm_calendar/__init__.py b/src/esm_calendar/__init__.py index 064d78e64..8fbe59e02 100644 --- a/src/esm_calendar/__init__.py +++ b/src/esm_calendar/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_calendar import * diff --git a/src/esm_calendar/esm_calendar.py b/src/esm_calendar/esm_calendar.py index db786599b..885b2bd02 100644 --- a/src/esm_calendar/esm_calendar.py +++ b/src/esm_calendar/esm_calendar.py @@ -718,10 +718,10 @@ def format( """ Beautifully returns a ``Date`` object as a string. - Parameters: - ----------- + Parameters + ---------- form : str or int - Some cryptic that Dirk over-took from MPI-Met + Logic taken from from MPI-Met givenph : bool-ish Print hours givenpm : bool-ish @@ -729,10 +729,9 @@ def format( givenps : bool-ish Print seconds - Notes: - ------ - How to use the ``form`` argument: - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Note + ---- + **How to use the ``form`` argument** The following forms are accepted: + SELF: uses the format which was given when constructing the date + 0: A Date formatted as YYYY diff --git a/src/esm_cleanup/__init__.py b/src/esm_cleanup/__init__.py index afd6c596c..c110ed1c5 100644 --- a/src/esm_cleanup/__init__.py +++ b/src/esm_cleanup/__init__.py @@ -2,4 +2,4 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" diff --git a/src/esm_database/__init__.py b/src/esm_database/__init__.py index 2b2919991..a6f5fc310 100644 --- a/src/esm_database/__init__.py +++ b/src/esm_database/__init__.py @@ -2,4 +2,4 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" diff --git a/src/esm_environment/__init__.py b/src/esm_environment/__init__.py index 76370577a..f70408809 100644 --- a/src/esm_environment/__init__.py +++ b/src/esm_environment/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_environment import * diff --git a/src/esm_master/__init__.py b/src/esm_master/__init__.py index f1efb69bf..ab0da901e 100644 --- a/src/esm_master/__init__.py +++ b/src/esm_master/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from . import database diff --git a/src/esm_master/compile_info.py b/src/esm_master/compile_info.py index 2d7f00f0a..cbc42418d 100644 --- a/src/esm_master/compile_info.py +++ b/src/esm_master/compile_info.py @@ -10,7 +10,6 @@ SETUPS_DIR, DEFAULTS_DIR, ESM_SOFTWARE_DIR, - ESM_MASTER_PICKLE, ESM_MASTER_DIR, ) @@ -337,16 +336,7 @@ def get_relevant_info(relevant_entries, raw_config, merge_into_this_config=None) class setup_and_model_infos: def __init__(self, vcs, general, parsed_args): - if not os.path.isfile(ESM_MASTER_PICKLE): - self.config, self.relevant_entries = combine_components_yaml(parsed_args) - save_pickle(self.config, ESM_MASTER_PICKLE) - - elif "list_all_packages" in parsed_args: - self.config = load_pickle(ESM_MASTER_PICKLE) - - else: - self.config, self.relevant_entries = combine_components_yaml(parsed_args) - save_pickle(self.config, ESM_MASTER_PICKLE) + self.config, self.relevant_entries = combine_components_yaml(parsed_args) self.model_kinds = list(self.config.keys()) self.meta_todos = general.meta_todos diff --git a/src/esm_master/general_stuff.py b/src/esm_master/general_stuff.py index 1f30d3223..07951ba48 100644 --- a/src/esm_master/general_stuff.py +++ b/src/esm_master/general_stuff.py @@ -18,8 +18,6 @@ CONFIG_YAML = esm_tools.get_config_filepath("/esm_software/esm_master/esm_master.yaml") VCS_FOLDER = esm_tools.get_config_filepath("/other_software/vcs/") -ESM_MASTER_PICKLE = ESM_SOFTWARE_DIR + "/esm_master/esm_master.pkl" - ###################################################################################### ##################################### tab completion ################################# ###################################################################################### diff --git a/src/esm_motd/__init__.py b/src/esm_motd/__init__.py index 0aa165b0b..4cd4e97bd 100644 --- a/src/esm_motd/__init__.py +++ b/src/esm_motd/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_motd import * diff --git a/src/esm_parser/__init__.py b/src/esm_parser/__init__.py index da6ae2c80..299f79e91 100644 --- a/src/esm_parser/__init__.py +++ b/src/esm_parser/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_parser import * diff --git a/src/esm_parser/esm_parser.py b/src/esm_parser/esm_parser.py index ab5fcb887..d51bce619 100644 --- a/src/esm_parser/esm_parser.py +++ b/src/esm_parser/esm_parser.py @@ -621,6 +621,7 @@ def dict_merge(dct, merge_dct, resolve_nested_adds=False, **kwargs): """ # option to overwrite a dict value if merge_dict contains empty value. Default # is False + dont_overwrite_with_empty_value = kwargs.get( "dont_overwrite_with_empty_value", False ) @@ -653,6 +654,7 @@ def dict_merge(dct, merge_dct, resolve_nested_adds=False, **kwargs): # An idea...but I have absolutely no clue how to cleanly implement that... if k != "debug_info": dict_merge(dct[k], merge_dct[k], resolve_nested_adds) + # TODO: check if this can be removed else: if "debug_info" in dct: if isinstance(dct["debug_info"]["loaded_from_file"], str): @@ -1061,7 +1063,7 @@ def add_entry_to_chapter( mod_list.extend(list(flatten_nested_lists(add_entries))) # Remove duplicates - mod_list_no_dupl = [] + mod_list_no_dupl = ListWithProvenance([], None) for el in mod_list: if not isinstance(el, (dict, tuple, list)): if el not in mod_list_no_dupl: @@ -1698,6 +1700,7 @@ def add_more_important_tasks(choose_keyword, all_set_variables, task_list): return task_list +@keep_provenance_in_recursive_function def recursive_run_function(tree, right, level, func, *args, **kwargs): """Recursively runs func on all nested dicts. @@ -1796,7 +1799,10 @@ def func(tree, right, *args, **kwargs) # logger.debug("finished with do_func_for") if isinstance(right, list): - newright = [] + if isinstance(right, ListWithProvenance): + newright = ListWithProvenance([], None) + else: + newright = [] for index, item in enumerate(right): new_item = recursive_run_function( tree + [None], item, level, func, *args, **kwargs @@ -1814,10 +1820,7 @@ def func(tree, right, *args, **kwargs) newright.extend(new_item) else: newright.append(new_item) - if isinstance(right, ListWithProvenance): - right = ListWithProvenance(newright, right.get_provenance()) - else: - right = newright + right = newright elif isinstance(right, dict): keys = list(right) for key in keys: @@ -1885,7 +1888,7 @@ def determine_regex_list_match(test_str, regex_list): return any(result) -@keep_provenance +@keep_provenance_in_recursive_function def find_variable(tree, rhs, full_config, white_or_black_list, isblacklist): raw_str = rhs if not tree[-1]: @@ -2248,7 +2251,7 @@ def determine_computer_from_hostname(): # ) -@keep_provenance +@keep_provenance_in_recursive_function def do_math_in_entry(tree, rhs, config): if not tree[-1]: tree = tree[:-1] @@ -2353,7 +2356,7 @@ def do_math_in_entry(tree, rhs, config): return convert(entry.strip(), tree) -@keep_provenance +@keep_provenance_in_recursive_function def mark_dates(tree, rhs, config): """Adds the ``DATE_MARKER`` to any entry who's key ends with ``"date"``""" if not tree[-1]: @@ -2368,7 +2371,7 @@ def mark_dates(tree, rhs, config): return entry -@keep_provenance +@keep_provenance_in_recursive_function def marked_date_to_date_object(tree, rhs, config): """Transforms a marked date string into a Date object""" if not tree[-1]: @@ -2399,7 +2402,7 @@ def marked_date_to_date_object(tree, rhs, config): return entry -@keep_provenance +@keep_provenance_in_recursive_function def unmark_dates(tree, rhs, config): """Removes the ``DATE_MARKER`` to any entry who's entry contains the ``DATE_MARKER``.""" if not tree[-1]: @@ -2411,7 +2414,7 @@ def unmark_dates(tree, rhs, config): return entry -@keep_provenance +@keep_provenance_in_recursive_function def perform_actions(tree, rhs, config): if not tree[-1]: tree = tree[:-1] @@ -2462,7 +2465,7 @@ def perform_actions(tree, rhs, config): return entry -@keep_provenance +@keep_provenance_in_recursive_function def purify_booleans(tree, rhs, config): if not tree[-1]: tree = tree[:-1] diff --git a/src/esm_parser/provenance.py b/src/esm_parser/provenance.py index b885a80ce..5b64584d9 100644 --- a/src/esm_parser/provenance.py +++ b/src/esm_parser/provenance.py @@ -1,8 +1,233 @@ -import esm_parser +""" +Provenance's dark magic. The basic idea is that one use the following to understand +from which yaml file (line and column) a variable in ``config`` is coming from: + +.. code-block:: python + + config["fesom"]["version"].provenance + +And that will return a list of the provenance history of that variable, for example: + +.. code-block:: python + + [{'category': 'components', + 'col': 10, + 'line': 6, + 'yaml_file': '/Users/mandresm/Codes/esm_tools/configs/components/fesom/fesom-2.0.yaml'}, + {'category': 'setups', + 'col': 18, + 'extended_by': 'dict.__setitem__', + 'line': 321, + 'yaml_file': '/Users/mandresm/Codes/esm_tools/configs/setups/awicm3/awicm3.yaml'}] + +The last element in the provenance list represents the provenance of the current value +(last provenance). +This module contains: +* The provenance class, to store the provenance of values with extended functionality +* A wrapper factory to create classes and objects dynamically that subclass the value + types and append provenances to them (``WithProvenance`` classes) +* Class attributes common to all ``WithProvenance`` classes +* Classes for mappings with provenance (dictionaries and lists) to recursively put and + get provenance from nested values, and extend the standard mapping methods + (``__setitem__``, ``update``...) +* A decorator to keep provenance in ``esm_parser``'s recursive functions +* A method to clean provenance recursively and get back the data without provenance +""" + +import copy + +import esm_parser from esm_calendar import Date +# ================= +# PROVENANCE CLASS +# ================= +class Provenance(list): + """ + A subclass of list in which each element represents the provenance of the value + at a point in the key-value history. The whole point of this class is to have a + list subclass that allows us to include information about which function is + changing the list within each provenance element. + + To assign the provenance to a value, instanciate it as an attribute of that value + (i.e. ``self.provenance = Provenance(my_provenance)``). To be used from the + ``WithProvenance`` classes created by ``wrapper_with_provenance_factory``. + + The following class methods provide the extended functionality to lists: + * ``self.append_last_step_modified_by``: to duplicate the last element of the list + and add to it information about the function that is modifying the value + * ``self.extend_and_modified_by``: to extend a list while including in the + provenance the function which is responsible for extending it + """ + + def __init__(self, provenance_data): + """ + Initialize the provenance object with its subclass (``list``) giving + ``provenance_data`` as input. If ``provenance_data`` is not a ``list`` + initialize the object as a list which only element is ``provenance_data``. + + Parameters + ---------- + provenance_data : list + List of provenance elements that describes the history of a key-value, or + a single provenance element. + """ + + if isinstance(provenance_data, list): + super().__init__(provenance_data) + else: + super().__init__([provenance_data]) + + def append_last_step_modified_by(self, func): + """ + Copies the last element in the provenance history and adds the entry ``modify_by`` + with value ``func`` to the copy. + + Parameters + ---------- + func : str + Function that is modifying the variable + """ + new_provenance_step = copy.deepcopy(self[-1]) + new_provenance_step = self.add_modified_by(new_provenance_step, func) + + self.append(new_provenance_step) + + def extend_and_modified_by(self, additional_provenance, func): + """ + Extends the current provenance history with an ``additional_provenance``. This + happens when for example a variable comes originally from a file, but then the + value is overwritten by another value that comes from a file higher in the + hierarchy. This method keeps both histories, with the history of the second been + on top of the first. + + Parameters + ---------- + additional_provenance : esm_parser.Provenance + Additional provenance history to be used for extending ``self`` + func : str + Function triggering this method + """ + new_additional_provenance = additional_provenance + # If the new provenance is not identical to the current one extend the + # provenance + if new_additional_provenance is not self: + for elem in new_additional_provenance: + new_additional_provenance.add_modified_by( + elem, func, modified_by="extended_by" + ) + self.extend(new_additional_provenance) + # If the new provenance is identical just mark the variable as modified_by + # func + else: + self.append_last_step_modified_by(func) + + def add_modified_by(self, provenance_step, func, modified_by="modified_by"): + """ + Adds a variable of name defined by ``modified_by`` to the given provenance step + with value ``func``. This variable is used to label provenance steps of the + provenance history with functions that modified it. + + Parameters + ---------- + provenance_step : dict + Provenance entry of the current step + func : str + Function triggering this method + modified_by : str + Name of the key for the labelling the type of modification + + Returns + ------- + provenance_step : dict + Provenance entry of the current step with the ``modified_by`` item + """ + if provenance_step is not None: + provenance_step[modified_by] = str(func) + + return provenance_step + + +# ======================================================== +# PROVENANCE WRAPPER FACTORY CLASS METHODS AND PROPERTIES +# ======================================================== +@classmethod +def wrapper_with_provenance_new(cls, *args, **kwargs): + """ + To be used as the ``__new__`` method for WithProvenance classes. This is key for + ``copy.deepcopy``, without this ``copy.deepcopy`` breaks. + """ + return super(cls, cls).__new__(cls, args[1]) + + +def wrapper_with_provenance_init(self, value, provenance=None): + """ + To be used as the ``__init__`` method for WithProvenance classes. Adds the + ``provenance`` value as an instance of ``Provenance`` to the ``self._provenance`` + attribute, and stores the original ``value`` to the ``self.value`` attribute. + + Parameters + ---------- + value : any + Value of the object + provenance : any + The provenance information + """ + self._provenance = Provenance(provenance) + self.value = value + + +# @Paul: maybe we could add a classmethod here for including it in +# ProvenanceClassForTheUnsubclassable, Date and the dynamic classes from +# wrapper_with_provenance_factory (but not for ListWithProvenance or +# DictWithProvenance)? That might be easier than having to add_representer somewhere +# else, but no clue... you're the yaml expert :) + + +@property +def prop_provenance(self): + """ + To be used as the ``provenance`` property in WithProvenance classes. + + Returns + ------- + self._provenance : esm_parser.provenance.Provenance + The provenance history stored in ``self._provenance`` + """ + return self._provenance + + +@prop_provenance.setter +def prop_provenance(self, new_provenance): + """ + Setter for the ``provenance`` property of WithProvenance classes. Makes sure that + any value assigned to this property is a ``Provenance`` object and if it is not + returns an error. + + Parameters + ---------- + new_provenance : esm_parser.provenance.Provenance + New provenance history to be set + + Raises + ------ + ValueError : + If the given ``new_provenance`` is not a ``Provenance`` object + """ + # Check if new_provenance is an instance of Provenance + if not isinstance(new_provenance, Provenance): + raise ValueError( + "Provenance must be an instance of the provenance.Provenance class!" + ) + + self._provenance = new_provenance + + +# ======================================================= +# CLASSES FOR THE UNSUBCLASSABLE CLASSES (BOOL AND NONE) +# ======================================================= class ProvenanceClassForTheUnsubclassable: """ A class to reproduce the methods of the unclassable ``bool`` and ``NoneType`` @@ -12,18 +237,6 @@ class ProvenanceClassForTheUnsubclassable: ``bool`` or a ``NoneType``) and the other is the ``provenance``. """ - def __init__(self, value, provenance): - """ - Parameters - ---------- - value : bool, None - Value of the object - provenance : any - The provenance information - """ - self.value = value - self.provenance = provenance - def __repr__(self): return f"{self.value}" @@ -40,7 +253,21 @@ def __hash__(self): return hash(self.value) +# Add the class attributes that are common to all WithProvenance classes +ProvenanceClassForTheUnsubclassable.__init__ = wrapper_with_provenance_init +ProvenanceClassForTheUnsubclassable.provenance = prop_provenance + + class BoolWithProvenance(ProvenanceClassForTheUnsubclassable): + """ + Class for emulating ``Bool`` behaviour, but with Provenance. + + Objects of this class reproduce the following ``Bool`` behaviours: + * ``isinstance(, bool)`` returns ``True`` + * `` == True`` returns ``True`` + * `` is True`` returns ``False``. This is not reproducing the behavior! + """ + @property def __class__(self): """ @@ -50,25 +277,39 @@ def __class__(self): class NoneWithProvenance(ProvenanceClassForTheUnsubclassable): + """ + Class for emulating ``None`` behaviour, but with Provenance. + + Objects of this class reproduce the following ``None`` behaviours: + * ``isinstance(, None)`` returns ``True`` + * `` == None`` returns ``True`` + * `` is None`` returns ``False``. This is not reproducing the behavior! + """ + @property def __class__(self): """ - This is here for having ``isinstance(, None)`` return ``True`` + This is here for having ``isinstance(, None)`` return + ``True`` """ return type(None) +# ================================ +# WRAPPER WITH PROVENANCE FACTORY +# ================================ def wrapper_with_provenance_factory(value, provenance=None): """ A function to subclass and instanciate all types of subclassable objects in the - ESM-Tools ``config`` and add the ``provenance`` attribute to them. It uses the - ``WrapperWithProvenance`` class defined within the function for that purpose. For classes - that are not subclassable (``bool`` and ``NoneType``) intanciates an object that - mimics their behaviour but also contains the ``provenance`` attribute. + ESM-Tools ``config`` and add the ``provenance`` attribute to them. It also creates + the ``{type(value)}WithProvenance`` classes globally on the fly depending on the + ``value``'s type, if it doesn't exist yet. For classes that are not subclassable + (``Date``, ``Bool`` and ``NoneType``) intanciates an object that mimics their + behaviour but also contains the ``provenance`` attribute. - Objects of type ``esm_calendari.esm_calendar.Date`` are not subclass (and the + Objects of type ``esm_calendar.esm_calendar.Date`` are not subclassed (and the ``provenance`` attribute is simply added to them, because they fail to be subclassed - with in the ``WrapperWithProvenance`` with the following error:: + with in the ``DateWithProvenance`` with the following error:: __new__ method giving error object.__new__() takes exactly one argument (the type to instantiate) @@ -82,7 +323,7 @@ def wrapper_with_provenance_factory(value, provenance=None): Returns ------- - WrapperWithProvenance, esm_calendar.esm_calendar.Date, BoolWithProvenance, + {type(value)}WithProvenance, esm_calendar.esm_calendar.Date, BoolWithProvenance, NoneWithProvenance The new instance with the ``provenance`` attribute """ @@ -94,71 +335,50 @@ def wrapper_with_provenance_factory(value, provenance=None): return NoneWithProvenance(value, provenance) elif type(value) == Date: - value.provenance = provenance - return value + value.provenance = prop_provenance + value.provenance = Provenance(provenance) + return value + elif isinstance(value, PROVENANCE_MAPPINGS): + return value else: - - class WrapperWithProvenance(type(value)): - """ - Dynamically create a subclass of the type of the given value - """ - - def __new__(cls, value, *args, **kwargs): - return super(WrapperWithProvenance, cls).__new__(cls, value) - - def __init__(self, value, provenance=None): - self.provenance = provenance + subtype = type(value) + class_name = f"{subtype}".split("'")[1] + class_name = f"{class_name[0].upper()}{class_name[1:]}WithProvenance" + + if class_name in globals(): + pass + else: + globals()[class_name] = type( + class_name, + (subtype,), + { + "_class_name": class_name, + "__new__": wrapper_with_provenance_new, + "__init__": wrapper_with_provenance_init, + "provenance": prop_provenance, + }, + ) # Instantiate the subclass with the given value and provenance - return WrapperWithProvenance(value, provenance) + return globals()[class_name](value, provenance) +# ========================= +# MAPPINGS WITH PROVENANCE +# ========================= class DictWithProvenance(dict): """ - A dictionary subclass that contains a ``provenance`` attribute. This attribute is - a ``dict`` that contains those `keys` of the original dictionary whose `values` - **are not a** ``dict`` (leaves of the dictionary tree), and a provenance value - defined during the instancing of the object. The ``provenance`` attribute is - applied recursively within the nested dictionaries during instancing or when the - ``self.set_provenance()`` is used. - - Example - ------- - After instancing the object: - - .. code-block:: python - - dict_with_provenance = DictWithProvenance(config_dict, {"file": "echam.yaml"}) - - where ``config_dict`` is defined as: - - .. code-block:: python - - config_dict = { - "echam": { - "type": "atmosphere", - "files": { - "greenhouse": {"kind": "input", "path_in_computer": "/my/path/in/computer"} - }, - } - } - - then ``config_dict["echam"].provenance`` will take the following values: - - .. code-block:: python - - >>> config_dict["echam"].provenance - {'type': {'file': 'echam.yaml'}} - - Note that the `key` ``"files"`` does not exist as the value for that key in the - ``config_dict`` is a dictionary (**it is not a leaf of the dictionary tree**). - - The `provenance value` can be defined to be any python object. The ``provenance`` - attribute is inherited when merging dictionaries with the ``update`` method - when merging two ``DictWithProvenance`` objects, with the same rewriting strategy - as for the keys in the dictionary, and ``provenance`` is also inherited when - redefining a `value` to contain a ``DictWithProvenance``. + A dictionary subclass that contains methods for: + * recursively transforming leaf values into provenance (``put_provenance`` and + ``set_provenance``) + * recursively retrieving provenance from nested values + * extending the ``dict.__init__`` method to recursively assign provenance to all + nested values + * extending the ``dict.__setitem__`` method to keep a record of previous history + when adding new values to a given key + * extending the ``dict.update`` method to keep a record of the previous history + when updating the dictionary Use --- @@ -166,15 +386,16 @@ class DictWithProvenance(dict): dict_with_provenance = DictWithProvenance(, ) - Redefine the provenance of an existing ``key``:: + Redefine the provenance of an existing ``DictWithProvenance`` with the same + provenance for all its nested values:: - dict_with_provenance[""].set_provenance() + dict_with_provenance.set_provenance() Set the provenace of a specific leaf within a nested dictionary:: - dict_with_provenance["key1"]["key1"].provenance["leaf_key"] = + dict_with_provenance["key1"]["key1"].provenance = - Get the ``provenance`` representation of the dictionary:: + Get the ``provenance`` representation of the whole dictionary:: provenance_dict = dict_with_provenance.get_provenance() """ @@ -182,40 +403,48 @@ class DictWithProvenance(dict): def __init__(self, dictionary, provenance): """ Instanciates the ``dictionary`` as an object of ``DictWithProvenance`` and - defines its ``provenance`` attribute recursively with ``set_provenance``. + defines its nested values as objects of WithProvenance classes, assigning them + recursively the corresponding ``provenance`` attribute with ``set_provenance``. Parameters ---------- dictionary : dict The ``dict`` that needs to be converted to a ``DictWithProvenance`` object - provenance : any + provenance : dict The provenance that will be recursively assigned to all leaves of the dictionary tree """ super().__init__(dictionary) + self.custom_setitem = False self.put_provenance(provenance) + self.custom_setitem = True def put_provenance(self, provenance): """ - Defines recursively the ``provenance`` of the ``DictWithProvenance`` object - ``self`` or it's nested ``dictionary``. + Recursively transforms every value in ``DictWithProvenance`` into its + corresponding WithProvenance object and appends its corresponding + ``provenance``. Each value has its corresponding provenance defined in the + ``provenance`` dictionary, and this method just groups them together 1-to-1. Parameters ---------- - provenance : any - dictionary : dict - Dictionary for which the ``provenance`` is to be set. When a value is not - given, the ``dictionary`` takes the value of ``self``. Only for recursion - within nested ``DictWithProvenance``, do not use it outside of this method. + provenance : dict + The provenance that will be recursively assigned to all leaves of the + dictionary tree. The provenance needs to be a ``dict`` with the same keys + as ``self`` (same structure) so that it can successfully transfer each + provenance value to its corresponding value on ``self`` (1-to-1 + conrrespondance). """ for key, val in self.items(): if isinstance(val, dict): self[key] = DictWithProvenance(val, provenance.get(key, {})) elif isinstance(val, list): - self[key] = ListWithProvenance(val, provenance.get(key, {})) + self[key] = ListWithProvenance(val, provenance.get(key, [])) + elif hasattr(val, "provenance"): + self[key].provenance.extend(provenance.get(key, {})) else: self[key] = wrapper_with_provenance_factory( val, provenance.get(key, None) @@ -223,28 +452,32 @@ def put_provenance(self, provenance): def set_provenance(self, provenance): """ - Defines recursively the ``provenance`` of the ``DictWithProvenance`` object - ``self`` or it's nested ``dictionary``. + Recursively transforms every value in ``DictWithProvenance`` into its + corresponding WithProvenance object and appends the same ``provenance`` to it. + Note that this method differs from ``put_provenance`` in that the same + ``provenance`` value is applied to the different values of ``self``. Parameters ---------- provenance : any New `provenance value` to be set - dictionary : dict - Dictionary for which the ``provenance`` is to be set. When a value is not - given, the ``dictionary`` takes the value of ``self``. Only for recursion - within nested ``DictWithProvenance``, do not use it outside of this method. """ + if not isinstance(provenance, list): + provenance = [provenance] for key, val in self.items(): if isinstance(val, dict): - self[key] = DictWithProvenance(val, provenance) - if isinstance(val, list): - self[key] = ListWithProvenance(val, provenance) + self[key] = DictWithProvenance(val, {}) + self[key].set_provenance(provenance) + elif isinstance(val, list): + self[key] = ListWithProvenance(val, []) + self[key].set_provenance(provenance) + elif hasattr(val, "provenance"): + self[key].provenance.extend(provenance) else: self[key] = wrapper_with_provenance_factory(val, provenance) - def get_provenance(self): + def get_provenance(self, index=-1): """ Returns a ``dictionary`` containing the all the nested provenance information of the current ``DictWithProvenance`` with a structure and `keys` equivalent to @@ -253,11 +486,9 @@ def get_provenance(self): Parameters ---------- - dictionary : dict - Dictionary for which the provenance needs to be extracted. When a value is - not given, the ``dictionary`` takes the value of ``self``. Only for - recursion within nested ``DictWithProvenance``, do not use it outside of - this method. + index : int + Defines the element of the provenance history to be returned. The default + is ``-1``, meaning the last provenance (the one of the current value). Returns ------- @@ -270,9 +501,9 @@ def get_provenance(self): for key, val in self.items(): if isinstance(val, PROVENANCE_MAPPINGS): - provenance_dict[key] = val.get_provenance() + provenance_dict[key] = val.get_provenance(index=index) elif hasattr(val, "provenance"): - provenance_dict[key] = val.provenance + provenance_dict[key] = val.provenance[index] else: # The DictWithProvenance object might have dictionaries inside that # are not instances of that class (i.e. a dictionary added in the @@ -281,96 +512,203 @@ def get_provenance(self): return provenance_dict - def set_leaf_id_provenance(self, key): + def __setitem__(self, key, val): """ - Stores the last-leaf provenance information in the class level - variable ``leaf_id_provenance``. + Any time an item in a DictWithProvenance is set, extend the old provenance of + the old value with the provenance of the new ``val`` and make that be the new + extended provenance history of the value. - This method gets the ``id`` value (unique Python object counter), which - is used as a key in the `leaf_id_provenance`. The value becomes the - provenance of that key, or defaults to ``None`` + Parameters + ---------- + key : str + Key of the item + val : any + Value of the item + """ + val_new = val + if ( + key in self + and not isinstance(self[key], (dict, list)) + and hasattr(self[key], "provenance") + and hasattr(self, "custom_setitem") + and self.custom_setitem + ): + new_provenance = self[key].provenance + if hasattr(val, "provenance"): + new_provenance.extend_and_modified_by( + val.provenance, "dict.__setitem__" + ) + val_new = copy.deepcopy(val) + val_new.provenance = new_provenance + + super().__setitem__(key, val_new) + + def update(self, dictionary, *args, **kwargs): + """ + Preserves the provenance history when using the ``update`` method Parameters ---------- - key : Any - The key of the "inner-most" leaf to store provenance information for - """ - # If it's a leaf - if not isinstance(super().__getitem__(key), DictWithProvenance): - val_id = id(super().__getitem__(key)) - # Stores the provenance in a class variable, under an id key - DictWithProvenance.leaf_id_provenance[val_id] = self.provenance.get( - key, None - ) + dictionary : dict, esm_parser.provenance.DictWithProvenance + Dictionary that will update ``self`` + """ + + new_provs = {} + + for key, val in dictionary.items(): + if ( + key in self + and not isinstance(self[key], (dict, list)) + and hasattr(self[key], "provenance") + and hasattr(self, "custom_setitem") + and self.custom_setitem + ): + new_provenance = self[key].provenance + if hasattr(val, "provenance"): + new_provenance.extend_and_modified_by(val.provenance, "dict.update") + new_provs[key] = new_provenance + + super().update(dictionary, *args, **kwargs) + + for key, val in new_provs.items(): + self[key].provenance = val class ListWithProvenance(list): + """ + A list subclass that contains methods for: + * recursively transforming leaf values into provenance (``put_provenance`` and + ``set_provenance``) + * recursively retrieving provenance from nested values + * extending the ``list.__init__`` method to recursively assign provenance to all + nested values + * extending the ``list.__setitem__`` method to keep a record of previous history + when adding new values to a given key + + Use + --- + Instance a new ``ListWithProvenance`` object:: + + list_with_provenance = ListWithProvenance(, ) + + Redefine the provenance of an existing ``ListWithProvenance`` with the same + provenance for all its nested values:: + + list_with_provenance.set_provenance() + + Set the provenace of the element 0 of a list:: + + list_with_provenance[0].provenance = + + Get the ``provenance`` representation of the whole list:: + + provenance_list = list_with_provenance.get_provenance() + """ + def __init__(self, mylist, provenance): + """ + Instanciates the ``list`` as an object of ``ListWithProvenance`` and defines + its nested values as objects of WithProvenance classes, assigning them + recursively the corresponding ``provenance`` attribute with ``set_provenance``. + + Parameters + ---------- + mylist : list + The ``list`` that needs to be converted to a ``ListWithProvenance`` object + provenance : list + The provenance that will be recursively assigned to all leaves of the + dictionary tree + """ + super().__init__(mylist) + self.custom_setitem = False self.put_provenance(provenance) + self.custom_setitem = True def put_provenance(self, provenance): + """ + Recursively transforms every value in ``ListWithProvenance`` into its + corresponding WithProvenance object and appends its corresponding + ``provenance``. Each value has its corresponding provenance defined in the + ``provenance`` list, and this method just groups them together 1-to-1. + + Parameters + ---------- + provenance : list + The provenance that will be recursively assigned to all elements of the + list. The provenance needs to be a ``list`` with the same number of elements + as ``self`` (same structure) so that it can successfully transfer each + provenance value to its corresponding value on ``self`` (1-to-1 + conrrespondance). + """ + + if not provenance: + provenance = [{}] * len(self) + for c, elem in enumerate(self): if isinstance(elem, dict): self[c] = DictWithProvenance(elem, provenance[c]) elif isinstance(elem, list): self[c] = ListWithProvenance(elem, provenance[c]) + elif hasattr(elem, "provenance"): + self[c].provenance.extend(provenance[c]) else: self[c] = wrapper_with_provenance_factory(elem, provenance[c]) def set_provenance(self, provenance): """ - Defines recursively the ``provenance`` of the ``DictWithProvenance`` object - ``self`` or it's nested ``dictionary``. + Recursively transforms every value in ``ListWithProvenance`` into its + corresponding WithProvenance object and appends the same ``provenance`` to it. + Note that this method differs from ``put_provenance`` in that the same + ``provenance`` value is applied to the different values of ``self``. Parameters ---------- provenance : any New `provenance value` to be set - dictionary : dict - Dictionary for which the ``provenance`` is to be set. When a value is not - given, the ``dictionary`` takes the value of ``self``. Only for recursion - within nested ``DictWithProvenance``, do not use it outside of this method. """ + if not isinstance(provenance, list): + provenance = [provenance] - for c, elem in enummerate(self): + for c, elem in enumerate(self): if isinstance(elem, dict): - self[c] = DictWithProvenance(elem, provenance) - if isinstance(elem, list): - self[c] = ListWithProvenance(elem, provenance) + self[c] = DictWithProvenance(elem, {}) + self[c].set_provenance(provenance) + elif isinstance(elem, list): + self[c] = ListWithProvenance(elem, []) + self[c].set_provenance(provenance) + elif hasattr(elem, "provenance"): + self[c].provenance.extend(provenance) else: self[c] = wrapper_with_provenance_factory(elem, provenance) - def get_provenance(self): + def get_provenance(self, index=-1): """ - Returns a ``dictionary`` containing the all the nested provenance information - of the current ``DictWithProvenance`` with a structure and `keys` equivalent to - the ``self`` dictionary, but with `values` of the `key` leaves those of the - provenance. + Returns a ``list`` containing the all the nested provenance information + of the current ``ListWithProvenance`` with a structure equivalent to the + ``self`` list, but with list elements been provenance values. Parameters ---------- - dictionary : dict - Dictionary for which the provenance needs to be extracted. When a value is - not given, the ``dictionary`` takes the value of ``self``. Only for - recursion within nested ``DictWithProvenance``, do not use it outside of - this method. + index : int + Defines the element of the provenance history to be returned. The default + is ``-1``, meaning the last provenance (the one of the current value). Returns ------- - provenance_list : dict - A dictionary with a structure and `keys` equivalent to the ``self`` - dictionary, but with `values` of the `key` leaves those of the provenance + provenance_list : list + A list with a structure equivalent to that of the ``self`` list, but with + the `values` of the provenance of each element """ provenance_list = [] for elem in self: if isinstance(elem, PROVENANCE_MAPPINGS): - provenance_list.append(elem.get_provenance()) + provenance_list.append(elem.get_provenance(index=index)) elif hasattr(elem, "provenance"): - provenance_list.append(elem.provenance) + provenance_list.append(elem.provenance[index]) else: # The DictWithProvenance object might have dictionaries inside that # are not instances of that class (i.e. a dictionary added in the @@ -379,56 +717,128 @@ def get_provenance(self): return provenance_list + def __setitem__(self, indx, val): + """ + Any time an item in a ListWithProvenance is set, extend the old provenance of + the old value with the provenance of the new ``val`` and make that be the new + extended provenance history of the value. + + Parameters + ---------- + indx : int + Index of the element + val : any + Value of the item + """ + val_new = val + if ( + indx in self + and not isinstance(self[indx], (dict, list)) + and hasattr(self[indx], "provenance") + and hasattr(self, "custom_setitem") + and self.custom_setitem + ): + new_provenance = self[indx].provenance + if hasattr(val, "provenance"): + new_provenance.extend_and_modified_by( + val.provenance, "dict.__setitem__" + ) + val_new = copy.deepcopy(val) + val_new.provenance = new_provenance + + super().__setitem__(indx, val_new) + +# Define the global variable PROVENANCE_MAPPINGS for operations such as ``isinstance`` PROVENANCE_MAPPINGS = (DictWithProvenance, ListWithProvenance) -def keep_provenance(func): +# ============================================== +# DECORATORS FOR ESM_PARSER RECURSIVE FUNCTIONS +# e.g. find_variable, purify_booleans... +# ============================================== +def keep_provenance_in_recursive_function(func): """ Decorator for recursive functions in ``esm_parser`` to preserve provenance. - Recursive run functions in ``esm_parser`` are generally called on the innermost - leaf. Here, we still run the function, but additionally store the output of the - function into the `leaf_id_provenance` container so that provenance can be added - to the result of the function call. - Parameters ---------- func : Callable The function to decorate """ + does_not_modify_prov = ["find_variable", "recursive_run_function"] + modify_prov = not func.__name__ in does_not_modify_prov + def inner(tree, rhs, *args, **kwargs): + custom_setitem_was_turned_off_in_this_instance = False + if hasattr(rhs, "custom_setitem") and rhs.custom_setitem: + rhs.custom_setitem = False + custom_setitem_was_turned_off = True + output = func(tree, rhs, *args, **kwargs) if hasattr(rhs, "provenance"): - provenance = rhs.provenance + provenance = copy.deepcopy(rhs.provenance) # Value was modified if type(rhs) != type(output) or rhs != output: + output = copy.deepcopy(output) # If the new value has an inherited provenance, keep it (i.e. variable # was called: rhs = ${fesom.namelist_dir}, output = # /actual/path/with/provenance/to/be/kept}) if hasattr(output, "provenance"): - append_modified_by_to_provenance(output.provenance, func) + if modify_prov: + provenance.extend_and_modified_by(output.provenance, func) + output.provenance = provenance # If the rhs.provenance is not None and output has no provenance, keep - # the old proveance - elif provenance != None: - provenance = append_modified_by_to_provenance(provenance, func) + # the old provenance + elif provenance is not None: + if modify_prov: + provenance.append_last_step_modified_by(func) output = wrapper_with_provenance_factory(output, provenance) + if custom_setitem_was_turned_off_in_this_instance: + rhs.custom_setitem = True + return output return inner -def append_modified_by_to_provenance(provenance, func): - if "modified_by" not in provenance: - provenance["modified_by"] = [str(func)] - else: - provenance["modified_by"].append(str(func)) +# ======== +# HELPERS +# ======== +def clean_provenance(data): + """ + Returns the values of provenance mappings in their original classes (without the + provenance). Recurs through mappings. Make sure you copy.deepcopy the data mapping + before running this function if you don't want that your provenance information gets + lost on the original ``data`` mapping. + + Parameters + ---------- + data : any + Mapping or values with provenance. - return provenance + Returns + ------- + value : any + Values in their original format, or lists and dictionaries containing provenance + values. + """ + if hasattr(data, "value"): + assert ( + data == data.value, + "The provenance object's value and the original value do not match!", + ) + return data.value + elif isinstance(data, list): + return [clean_provenance(item) for item in data] + elif isinstance(data, dict): + return {key: clean_provenance(value) for key, value in data.items()} + else: + return data if __name__ == "__main__": diff --git a/src/esm_parser/yaml_to_dict.py b/src/esm_parser/yaml_to_dict.py index 70296c672..b86945c79 100644 --- a/src/esm_parser/yaml_to_dict.py +++ b/src/esm_parser/yaml_to_dict.py @@ -18,9 +18,16 @@ CONFIG_PATH = esm_tools.get_config_filepath() -class CommentedYamlDumper(yaml.SafeDumper): +def provenance_representer(dumper, provenance): + return dumper.represent_str("provenance") + +#class CommentedYamlDumper(ruamel.yaml.SafeDumper): +class CommentedYamlDumper(yaml.Dumper): pass +CommentedYamlDumper.add_representer( + str,provenance_representer +) class EsmConfigFileError(Exception): """ diff --git a/src/esm_plugin_manager/__init__.py b/src/esm_plugin_manager/__init__.py index f6993e728..d0703886e 100644 --- a/src/esm_plugin_manager/__init__.py +++ b/src/esm_plugin_manager/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi, Paul Gierz, Sebastian Wahl""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_plugin_manager import * diff --git a/src/esm_profile/__init__.py b/src/esm_profile/__init__.py index 04a178ecf..e236cd795 100644 --- a/src/esm_profile/__init__.py +++ b/src/esm_profile/__init__.py @@ -2,6 +2,6 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .esm_profile import * diff --git a/src/esm_runscripts/__init__.py b/src/esm_runscripts/__init__.py index cc742b840..98fdd1fb1 100644 --- a/src/esm_runscripts/__init__.py +++ b/src/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .batch_system import * from .chunky_parts import * diff --git a/src/esm_runscripts/cli.py b/src/esm_runscripts/cli.py index eaae9006a..4498c92f6 100644 --- a/src/esm_runscripts/cli.py +++ b/src/esm_runscripts/cli.py @@ -18,6 +18,7 @@ from .helpers import SmartSink from .sim_objects import * +from esm_parser import user_error def parse_shargs(): """The arg parser for interactive use""" @@ -247,6 +248,12 @@ def main(): runscript_full_path = os.path.realpath(ARGS.runscript) runscript_dir, runscript = os.path.split(runscript_full_path) runscript_dir += "/" + if not os.path.exists(runscript_full_path): + user_error( + "runscript not found", + f"The runscript ``{ARGS.runscript}`` does not exists in folder ``{runscript_dir}``. ", + dsymbols=["``", "'"], + ) # this might contain the relative path but it will be taken care of later command_line_config["original_command"] = original_command.strip() diff --git a/src/esm_runscripts/filedicts.py b/src/esm_runscripts/filedicts.py new file mode 100644 index 000000000..6c8d332ef --- /dev/null +++ b/src/esm_runscripts/filedicts.py @@ -0,0 +1,1260 @@ +""" +The file-dictionary implementation + +Developer Notes +--------------- +* Internal functions, decorators, and methods are prefixed with _. They should + only be used inside of this file. +* Decorators should have names that map to an attribute of the object. See the + example in ``_allowed_to_be_missing``. +""" +import copy +import functools +import glob +import inspect +import os +import pathlib +import shutil +import sys +from enum import Enum, auto +from typing import Any, AnyStr, Dict, Iterator + +import dpath.util +import yaml +from loguru import logger + +from esm_calendar import Date +# These should be relative +from esm_parser import ConfigSetup, user_error + +# Set up the logger: +logger.remove() +LEVEL = "ERROR" +LOGGING_FORMAT = "[{time:HH:mm:ss DD/MM/YYYY}] |{level}| [{file} -> {function}() line:{line: >3}] >> {message}" +logger.add(sys.stderr, level=LEVEL, format=LOGGING_FORMAT) + + +class DatestampFormatError(Exception): + """Raise this error when the Datestamp formatter is incorrectly used""" + + +class DotDict(dict): + """ + A dictionary subclass that allows accessing data via dot-attributes and keeps changes between dictionary + keys and dot-attributes in sync. + + This class inherits from the built-in `dict` class and overrides the `__getattr__` and `__setattr__` methods + to provide dot-attribute access to dictionary items. When an attribute is accessed using dot notation, the + corresponding dictionary key is returned. Similarly, when an attribute is set using dot notation, the corresponding + dictionary key is updated. Changes made using dictionary keys are also reflected in the dot-attributes. + + Note that this implementation assumes that the keys in the dictionary are strings, since dot-attributes can + only be strings in Python. + """ + + def __getattr__(self, attr): + try: + return self[attr] + except KeyError: + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{attr}'" + ) + + def __setattr__(self, attr, value): + self[attr] = value + + +class NameIterEnum(Enum): + def __iter__(self) -> Iterator[str]: + """Returns list of names of the iteration, guarentted to be lower-case""" + return iter(str(name).lower() for name in self.__members__) + + +class FileTypes(NameIterEnum): + """Describes which type a file might belong to, e.g. input, outdata, forcing""" + + ANALYSIS = auto() + CONFIG = auto() + COUPLE = auto() + FORCING = auto() + IGNORE = auto() + INPUT = auto() + LOG = auto() + MON = auto() + OUTDATA = auto() + RESTART = auto() + VIZ = auto() + + +class FileLocations(NameIterEnum): + """Posibile locations for a file""" + + COMPUTER = auto() + EXP_TREE = auto() + RUN_TREE = auto() + WORK = auto() + + +class FileStatus(NameIterEnum): + """Describes which status a particular file might have, e.g. ``FILE``, ``NOT_EXISTS``, ``BROKEN_LINK``.""" + + FILE = auto() # ordinary file + DIR = auto() # directory + LINK = auto() # symbolic link + EXISTS = auto() # object exists in the system + NOT_EXISTS = auto() # file does not exist + BROKEN_LINK = auto() # target of the symbolic link does not exist + + +# NOTE(PG): Comment can be removed later. Here I prefix with an underscore as +# this decorator should **only** be used inside of this file. +def _allowed_to_be_missing(method): + """Allows to decorate a method with ``_allowed_to_be_missing``, causing it + to always return ``None``. + + If a method is decorated with ``@_allowed_to_be_missing``, it will return + ``None`` instead of executing if the file has a attribute of + ``_allowed_to_be_missing`` set to ``True``. You get a warning via the logger + giving the full method name that was decorated and a representation of the + file that was attempted to be moved, linked, or copied. + + Usage Example + ------------- + Given you have an instantiated simulation file under ``sim_file`` with + the following property in YAML:: + + echam: + files: + sim_file: + allowed_to_be_missing: True + ...other properties... + + >>> sim_file.allowed_to_be_missing # doctest: +SKIP + True + + And given that you have a decorated method foo, that would act on the file:: + >>> rvalue = sim_file.foo(*args, **kwargs) # doctest: +SKIP + >>> rvalue is None # doctest: +SKIP + True + >>> print(rvalue) # doctest: +SKIP + None + + Programming Example + ------------------- + class MyCoolClass: + def __init__(self): + self.allowed_to_be_missing = True + + @_allowed_to_be_missing + def foo(self, *args, **kwargs): + # This method will always return None, the return below is never + # reached: + return 123 + + Notes + ----- + Why does this thing have an underscore and the attribute does not? + + Because this is a decorator to enable the functionality, and I do not want + anyone to use this decorator outside of this file, so, we start with ``_`` + to denote that ("Private", even though Python does not formally have that). + + And, the attribute might be interesting for the end-user, not the programmer. + + That's why. + """ + + @functools.wraps(method) + def inner_method(self, *args, **kwargs): + if self.allowed_to_be_missing: + try: + return method(self, *args, **kwargs) + except (FileNotFoundError, IOError): + logger.warning( + f"Skipping {method.__qualname__} as this file ({self}) is allowed to be missing!" + ) + # None is the default return, but let us be explicit here, as it is a bit confusing + return None + else: + return method(self, *args, **kwargs) + + return inner_method + + +def _fname_has_date_stamp_info(fname, date, reqs=["%Y", "%m", "%d"]): + """ + Checks if a particular file has all elements of a particular date in its name. + + Parameters + ---------- + fname : str + The name of the file to check + date : esm_calendar.Date + The date to be checked against + reqs : list of str + A list of ``strftime`` compliant strings to determine which elements of + the date to check. Compatible with %Y %m %d %H %M %S (year, month, day, + hour, minute, second) + + Returns + ------- + bool : + True if all elements appear in the filename, False otherwise. + + """ + date_attrs = { + "%Y": "syear", + "%m": "smonth", + "%d": "sday", + "%H": "shour", + "%M": "sminute", + "%S": "ssecond", + } + required_attrs = [getattr(date, v) for k, v in date_attrs.items() if k in reqs] + # all(attr in fname for attr in required_attrs) + for attr in required_attrs: + if attr in fname: + fname = fname.replace(attr, "checked", 1) + return fname.count("checked") == len(reqs) + + +def _globbing(method): + """ + Decorator method for ``SimulationFile``'s methods ``cp``, ``mv``, ``ln``, that + enables globbing. If a ``*`` is found on the ``source`` or ``target`` the globbing + logic is activated, and consist of: + - run checks for globbing syntax + - check if any file matches the globbing pattern + - construct one instance of ``SimulationFile`` for each file matching the globbing + - run the ``method`` for that particular file + + Parameters + ---------- + method : method + The decorated method (``cp``, ``mv``, ``ln``) + + Returns + ------- + method : method + If no globbing is needed, returns the method as it was given originally. + """ + + @functools.wraps(method) + def inner_method(self, source, target, *args, **kwargs): + method_name = method.__name__ + source_name = self[f"name_in_{source}"] + target_name = self[f"name_in_{target}"] + + if "*" in source_name or "*" in target_name: + # Get wildcard patterns + source_pattern = source_name.split("*") + target_pattern = target_name.split("*") + + # Check wild cards syntax + self._wild_card_check(source_pattern, target_pattern) + + # Obtain source files + glob_source_paths = self._find_globbing_files(source) + + # Extract globbing source names + glob_source_names = [ + pathlib.Path(glob_source_path).name + for glob_source_path in glob_source_paths + ] + + # Solve the globbing target names + glob_target_names = [] + for glob_source_name in glob_source_names: + glob_target_name = glob_source_name + for sp, tp in zip(source_pattern, target_pattern): + glob_target_name = glob_target_name.replace(sp, tp) + glob_target_names.append(glob_target_name) + + # Loop through source files + for glob_source_name, glob_target_name in zip( + glob_source_names, glob_target_names + ): + # Create a new simulation file object for this specific glob file's config + glob_dict = dict(self) + glob_dict[f"name_in_{source}"] = glob_source_name + glob_dict[f"name_in_{target}"] = glob_target_name + glob_file = SimulationFile(**glob_dict) + # Use method + this_method = getattr(glob_file, method_name) + return this_method(source, target, *args, **kwargs) + else: + return method(self, source, target, *args, **kwargs) + + return inner_method + + +class SimulationFile(DotDict): + """ + Describes a file used within a ESM Simulation. + + A ``SimulationFile`` object describes one particular file used within an + ``esm-tools`` run. This description is similar to a standard Python + dictionary. Beyond the standard dictionary methods and attributes, there + are a variety of attributes that describe how the file should behave, as + well as a few additional methods you can use to relocate the file around on + the system. Please see the detailed documentation on each of the methods + for more specifics, but in summary, a ``SimulationFile`` has the following + additional functions:: + + >>> sim_file = SimulationFile(...) # doctest: +SKIP + >>> sim_file.mv("computer", "work") # doctest: +SKIP + >>> sim_file.ln("work", "run_tree") # doctest: +SKIP + >>> sim_file.cp("run_tree", "exp_tree") # doctest: +SKIP + + You get extra functions for moving, copying, or linking a file from one + location to another. Location keys are desccribed in detail in the Notes + section. + + Furthermore, there are a few attributes that you should be aware of. These + include: + + * ``name`` : A human readable name for the file. + * ``allowed_to_be_missing`` : A ``bool`` value to set a certain file as + allowed to be missing or not. In case it is, the cp/ln/mv command will not + fail if the original file is not found. + * ``datestamp_method`` : Sets how a datestamp should be added. See + ``_allowed_datestamp_methods`` for more information. + * ``datestamp_format`` : Sets how a datestamp should be formatted. See + ``_allowed_datestamp_methods`` for more information. + + Example + ------- + Given a config, you should be able to use this in YAML:: + + $ cat dummy_config.yaml + + echam: + files: + jan_surf: + name_in_computer: T63CORE2_jan_surf.nc + name_in_work: unit.24 + filetype: NetCDF + allowed_to_be_missing: True + description: > + Initial values used for the simulation, including + properties such as geopotential, temperature, pressure + + And, assuming config is as described above:: + + >>> sim_file = SimulationFile.from_config(config, 'echam.files.jan_surf') # doctest: +SKIP + + You could then copy the file to the experiment folder:: + + >>> sim_file.cp("pool", "work") # doctest: +SKIP + + Notes + ----- + A file can be located in one of these categories (``LOCATION_KEYS``): + - computer: pool/source directory (for input files) + - exp_tree: file in the category directory in experiment directory (eg. input, output, ...) + - run_tree: file in the experiment/run_// directory + - work: file in the current work directory. Eg. experiment/run_/work/ + + LOCATION_KEY is one of the strings defined in LOCATION_KEY list + - name_in_ : file name (without path) in the LOCATION_KEY + - eg. name_in_computer: T63CORE2_jan_surf.nc + - eg. name_in_work: unit.24 + - absolute_path_in_ : absolute path in the LOCATION_KEY + - eg. absolute_path_in_run_tree: /work/ollie/pgierz/some_exp/run_20010101-20010101/input/echam/T63CORE2_jan_surf.nc + """ + + # Should all be replaced by Enums: + input_file_kinds = [ + "config", + "forcing", + "input", + ] + output_file_kinds = [ + "analysis", + "couple", + "log", + "mon", + "outdata", + "restart", + "viz", + "ignore", + ] + all_model_filekinds = ( + input_file_kinds + output_file_kinds + ["src"] + ) # FIXME: In review, someone should check this + + def __init__( + self, + name="", + component="", + paths={}, + kind=None, + allowed_to_be_missing=False, + description="", + filetype="", + datestamp_method="avoid_overwrite", + **kwargs, + ): + # self.name = name + # self.paths = paths + # self.kind = kind + # self.allowed_to_be_missing = allowed_to_be_missing + # self.description = description + # self.filetype = filetype + # self._datestamp_method = datestamp_method + # self.locations = {k: v.parent for k, v in self.paths.items()} + + super().__init__( + name=name, + component=component, + paths={k: pathlib.Path(v) for k, v in paths.items()}, + kind=kind, + allowed_to_be_missing=allowed_to_be_missing, + description=description, + filetype=filetype, + datestamp_method=datestamp_method, + locations={k: pathlib.Path(v).parent for k, v in paths.items()}, + **kwargs, + ) + + for location, path in paths.items(): + for attr_name, attr_value in { + f"absolute_path_in_{location}": path.resolve(), + f"name_in_{location}": path.name, + }.items(): + if attr_name not in self: + self[attr_name] = attr_value + + # possible paths for files: + + # location_keys = ["computer", "exp_tree", "run_tree", "work"] + # initialize the locations and complete paths for all possible locations + # self.locations = dict.fromkeys(location_keys, None) + + # Current Attributes: + # {'absolute_path_in_computer': PosixPath('/work/ollie/pool/ECHAM/T63/T63CORE2_jan_surf.nc'), + # 'absolute_path_in_exp_tree': PosixPath('/work/ollie/pgierz/some_exp/input/echam/T63CORE2_jan_surf.nc'), + # 'absolute_path_in_run_tree': PosixPath('/work/ollie/pgierz/some_exp/run_20000101-20000101/input/echam/T63CORE2_jan_surf.nc'), + # 'absolute_path_in_work': PosixPath('/work/ollie/pgierz/some_exp/run_20010101-20010101/work/unit.24'), + # 'allowed_to_be_missing': False, + # 'description': 'Initial values used for the simulation, including properties such as geopotential, temperature, pressure\n', + # 'filetype': 'NetCDF', + # 'name_in_computer': 'T63CORE2_jan_surf.nc', + # 'name_in_exp_tree': 'T63CORE2_jan_surf.nc', + # 'name_in_run_tree': 'T63CORE2_jan_surf.nc', + # 'name_in_work': 'unit.24', + # 'path_in_computer': '/work/ollie/pool/ECHAM/T63', + # 'type': 'input'} + + ############################################################################################## + # Initialize from esm-tools config + ############################################################################################## + + @classmethod + def from_config(cls, full_config: dict, attrs_address: str): + """ + - Initiates the properties of the object + - Triggers basic checks + + Parameters + ---------- + full_config : dict + The full simulation configuration + attrs_address : str + The address of this specific file in the full config, separated by dots. + + Note + ---- + A file can be located in one of these categories (``LOCATION_KEYS``): + - computer: pool/source directory (for input files) + - exp_tree: file in the category directory in experiment directory (eg. input, output, ...) + - run_tree: file in the experiment/run_// directory + - work: file in the current work directory. Eg. experiment/run_/work/ + + LOCATION_KEY is one of the strings defined in LOCATION_KEY list + - name_in : file name (without path) in the LOCATION_KEY + - eg. name_in_computer: T63CORE2_jan_surf.nc + - eg. name_in_work: unit.24 + - absolute_path_in_ : absolute path in the LOCATION_KEY + - eg. absolute_path_in_run_tree: + - /work/ollie/pgierz/some_exp/run_20010101-20010101/input/echam/T63CORE2_jan_surf.nc + """ + attrs_dict = dpath.util.get( + full_config, attrs_address, separator=".", default={} + ) + # _original_filedict = copy.deepcopy(attrs_dict) + + name = attrs_address.split(".")[-1] + component = attrs_address.split(".")[0] + # Check if attr dict gives a sufficient representation of a file + cls._check_config_syntax(attrs_dict, name) + kind = attrs_dict.get("kind") + # Complete tree names if not defined by the user + names = cls._complete_file_names(attrs_dict) + paths = cls._resolve_abs_paths(full_config, component, attrs_dict, names, kind) + obj = cls(name=name, paths=paths, **attrs_dict) + + # Verbose set to true by default, for now at least + obj._verbose = full_config.get("general", {}).get("verbose", True) + + # Checks + obj._check_path_in_computer_is_abs(paths, component, name) + return obj + + @classmethod + def _check_config_syntax(cls, cfg, name) -> None: + """ + Checks for missing variables: + - ``kind`` + - ``path_in_computer`` if the file it an input for the experiment + - ``name_in_computer`` if the file it an input for the experiment + - ``name_in_work`` if the file it an output of the experiment + + It also checks whether ``kind``'s value is correct. + + It notifies the user about these errors in the syntax using + ``esm_parser.error``. + """ + error_text = "" + missing_vars = "" + kinds_text = ", ".join(cls.all_model_filekinds) + this_filedict = copy.deepcopy(cfg) + + if "kind" not in cfg.keys(): + error_text = ( + f"{error_text}" + f"- the ``kind`` variable is missing. Please define a ``kind`` " + f"({kinds_text})\n" + ) + missing_vars = ( + f"{missing_vars} ``kind``: forcing/input/restart/outdata/...\n" + ) + elif cfg["kind"] not in cls.all_model_filekinds: + error_text = ( + f"{error_text}" + f"- ``{cfg['kind']}`` is not a supported ``kind`` " + f"(``files.{name}.kind``), please choose one of the following " + f"kinds: {kinds_text}\n" + ) + this_filedict["kind"] = f"``{this_filedict['kind']}``" + + if ( + "path_in_computer" not in cfg.keys() + and cfg.get("kind") in cls.input_file_kinds + ): + error_text = ( + f"{error_text}" + f"- the ``path_in_computer`` variable is missing. Please define a " + f"``path_in_computer`` (i.e. the path to the file excluding its name)." + f" NOTE: this is only required for {', '.join(cls.input_file_kinds)} file " + f"kinds\n" + ) + missing_vars = ( + f"{missing_vars} ``path_in_computer``: \n" + ) + + if ( + "name_in_computer" not in cfg.keys() + and cfg.get("kind") in cls.input_file_kinds + ): + error_text = ( + f"{error_text}" + f"- the ``name_in_computer`` variable is missing. Please define a ``name_in_computer`` " + f"(i.e. name of the file in the work folder). NOTE: this is only required for " + f"{', '.join(cls.input_file_kinds)} file kinds\n" + ) + missing_vars = f"{missing_vars} ``name_in_computer``: \n" + + if ( + "name_in_work" not in cfg.keys() + and cfg.get("kind") in cls.output_file_kinds + ): + error_text = ( + f"{error_text}" + f"- the ``name_in_work`` variable is missing. Please define a ``name_in_work`` " + f"(i.e. name of the file in the work folder). NOTE: this is only required for " + f"{', '.join(cls.output_file_kinds)} file kinds\n" + ) + missing_vars = ( + f"{missing_vars} ``name_in_work``: \n" + ) + + missing_vars = ( + f"Please, complete/correct the following vars for your file:\n\n" + f"{_pretty_filedict(name, this_filedict)}" + f"{missing_vars}" + ) + + if error_text: + error_text = ( + f"The file dictionary ``{name}`` is missing relevant information " + f"or is incorrect:\n{error_text}" + ) + user_error("File Dictionaries", f"{error_text}\n{missing_vars}") + + @classmethod + def _complete_file_names(cls, cfg): + """ + Complete missing names in the file with the default name, depending whether + the file is of kind ``input`` or ``output``. + """ + if cfg["kind"] in cls.input_file_kinds: + default_name = cfg["name_in_computer"] + elif cfg["kind"] in cls.output_file_kinds: + default_name = cfg["name_in_work"] + else: + raise TypeError(f"Unknown file kind: {cfg['kind']}") + names = {} + names["computer"] = cfg.get("name_in_computer", default_name) + names["run_tree"] = cfg.get("name_in_run_tree", default_name) + names["exp_tree"] = cfg.get("name_in_exp_tree", default_name) + names["work"] = cfg.get("name_in_work", default_name) + return names + + @staticmethod + def _resolve_abs_paths(config, component, attrs_dict, names, kind) -> Dict: + # NOTE(PG): I....hate this! :-( + """ + Builds the absolute paths of the file for the different locations + (``computer``, ``work``, ``exp_tree``, ``run_tree``) using the information + about the experiment paths in ``config`` and the + ``self["path_in_computer"]``. + + It defines these new variables in the ``SimulationFile`` dictionary: + - ``self["absolute_path_in_work"]`` + - ``self["absolute_path_in_computer"]`` + - ``self["absolute_path_in_run_tree"]`` + - ``self["absolute_path_in_exp_tree"]`` + """ + locations = { + "work": pathlib.Path(config["general"]["thisrun_work_dir"]), + "computer": pathlib.Path(attrs_dict.get("path_in_computer", "/dev/null")), + "exp_tree": pathlib.Path(config[component][f"experiment_{kind}_dir"]), + "run_tree": pathlib.Path(config[component][f"thisrun_{kind}_dir"]), + } + + return {key: path.joinpath(names[key]) for key, path in locations.items()} + + @staticmethod + def _check_path_in_computer_is_abs(paths, component, name): + if paths["computer"] is not None and not paths["computer"].is_absolute(): + user_error( + "File Dictionaries", + "The path defined for " + f"``{component}.files.{name}.path_in_computer`` is not " + f"absolute (``{paths['computer']}``). Please, always define an " + "absolute path for the ``path_in_computer`` variable.", + ) + + ############################################################################################## + # Overrides of standard dict methods + ############################################################################################## + + def __setattr__(self, name: str, value: Any) -> None: + """Checks when changing dot attributes for disallowed values""" + if name == "datestamp_format": + self._check_datestamp_format_is_allowed(value) + if name == "datestamp_method": + self._check_datestamp_method_is_allowed(value) + return super().__setattr__(name, value) + + def __setitem__(self, key: Any, value: Any) -> None: + """Checks for changing with sim_file['my_key'] = 'new_value'""" + if key == "datestamp_format": + self._check_datestamp_format_is_allowed(value) + if key == "datestamp_method": + self._check_datestamp_method_is_allowed(value) + return super().__setitem__(key, value) + + def update(self, *args, **kwargs): + """ + Standard dictionary update method, enhanced by additional safe-guards + for particular values. + """ + for k, v in dict(*args, **kwargs).items(): + if k == "datestamp_format": + self._check_datestamp_format_is_allowed(v) + if k == "datestamp_method": + self._check_datestamp_method_is_allowed(v) + self[k] = v + + ############################################################################################## + + ############################################################################################## + # Object Properties + ############################################################################################## + + @property + def datestamp_method(self): + """ + Defines which datestamp_method shall be used when possibly including + date stamps to the file. Valid choices are "never", "always", + "avoid_overwrite". + """ + return self._datestamp_method + + @datestamp_method.setter + def datestamp_method(self, new_attr_value): + """ + Sets a new value for datestamp method. + """ + # NOTE(PG): The checks could go here + self._datestamp_method = new_attr_value + + @property + def datestamp_format(self): + """ + Defines which datestamp_format shall be used when possibly including + date stamps to the file. Valid choices are "check_from_filename" and + "append". + """ + datestamp_format = self.get( + "datestamp_format", "append" + ) # This is the old default behaviour + return datestamp_format + + ############################################################################################## + # Main Methods + ############################################################################################## + @_globbing + @_allowed_to_be_missing + def cp(self, source: str, target: str) -> None: + """ + Copies the source file or folder to the target path. It changes the name of the + target if ``self["name_in_"]`` differs from ``self["name_in_"]. + + Parameters + ---------- + source : str + String specifying one of the following options: ``"computer"``, ``"work"``, + ``"exp_tree"``, ``run_tree`` + target : str + String specifying one of the following options: ``"computer"``, ``"work"``, + ``"exp_tree"``, ``run_tree`` + """ + if source not in self.locations: + raise ValueError( + f"Source is incorrectly defined, and needs to be in {self.locations}" + ) + if target not in self.locations: + raise ValueError( + f"Target is incorrectly defined, and needs to be in {self.locations}" + ) + source_path = self[f"absolute_path_in_{source}"] + target_path = self[f"absolute_path_in_{target}"] + + # Datestamps + if self.datestamp_method == "always": + target_path = self._always_datestamp(target_path) + if self.datestamp_method == "avoid_overwrite": + target_path = self._avoid_override_datestamp(target_path) + + # General Checks + # TODO (deniz): need to add higher level exception handler (eg. user_error) + self._check_source_and_target(source_path, target_path) + + # Actual copy + source_path_type = self._path_type(source_path) + if source_path_type == FileStatus.DIR: + copy_func = shutil.copytree + else: + copy_func = shutil.copy2 + try: + copy_func(source_path, target_path) + logger.success(f"Copied {source_path} --> {target_path}") + except IOError as error: + raise IOError( + f"Unable to copy {source_path} to {target_path}\n\n" + f"Exception details:\n{error}" + ) + + @_globbing + @_allowed_to_be_missing + def ln(self, source: AnyStr, target: AnyStr) -> None: + """creates symbolic links from the path retrieved by ``source`` to the one by ``target``. + + Parameters + ---------- + source : str + key to retrieve the source from the file dictionary. Possible options: ``computer``, ``work``, ``exp_tree``, ``run_tree`` + + target : str + key to retrieve the target from the file dictionary. Possible options: ``computer``, ``work``, ``exp_tree``, ``run_tree`` + + Returns + ------- + None + + Raises + ------ + FileNotFoundError + - Source path does not exist + OSError + - Target path is a directory + - Symbolic link is trying to link to itself + - Target path does not exist + FileExistsError + - Target path already exists + """ + if source not in self.locations: + raise ValueError( + f"Source is incorrectly defined, and needs to be in {self.locations}" + ) + if target not in self.locations: + raise ValueError( + f"Target is incorrectly defined, and needs to be in {self.locations}" + ) + # full paths: directory path / file name + source_path = self[f"absolute_path_in_{source}"] + target_path = self[f"absolute_path_in_{target}"] + + # Datestamps + if self.datestamp_method == "always": + target_path = self._always_datestamp(target_path) + if self.datestamp_method == "avoid_overwrite": + target_path = self._avoid_override_datestamp(target_path) + # General Checks + # TODO (deniz): need to add higher level exception handler (eg. user_error) + self._check_source_and_target(source_path, target_path) + + try: + os.symlink(source_path, target_path) + except IOError as error: + raise IOError( + f"Unable to link {source_path} to {target_path}\n\n" + f"Exception details:\n{error}" + ) + + @_globbing + @_allowed_to_be_missing + def mv(self, source: str, target: str) -> None: + """ + Moves (renames) the SimulationFile from it's location in ``source`` to + it's location in ``target``. + + Parameters + ---------- + source : str + One of ``"computer"``, ``"work"``, ``"exp_tree"``, "``run_tree``" + target : str + One of ``"computer"``, ``"work"``, ``"exp_tree"``, "``run_tree``" + """ + if source not in self.locations: + raise ValueError( + f"Source is incorrectly defined, and needs to be in {self.locations}" + ) + if target not in self.locations: + raise ValueError( + f"Target is incorrectly defined, and needs to be in {self.locations}" + ) + source_path = self[f"absolute_path_in_{source}"] + target_path = self[f"absolute_path_in_{target}"] + + # Datestamps + if self.datestamp_method == "always": + target_path = self._always_datestamp(target_path) + if self.datestamp_method == "avoid_overwrite": + target_path = self._avoid_override_datestamp(target_path) + # General Checks + # TODO (deniz): need to add higher level exception handler (eg. user_error) + self._check_source_and_target(source_path, target_path) + + # Perform the movement: + try: + source_path.rename(target_path) + logger.success(f"Moved {source_path} --> {target_path}") + except IOError as error: + raise IOError( + f"Unable to move {source_path} to {target_path}\n\n" + f"Exception details:\n{error}" + ) + + _allowed_datestamp_methods = {"never", "always", "avoid_overwrite"} + """ + Set containing the allowed datestamp methods which can be chosen from. + + Notes on possible datestamp methods + ----------------------------------- + never : str + This will never add a datestamp to a file. **WARNING** this will + cause you to possibly overwrite files. + always : str + This will always add a datestamp to a file, even if the canonical + target name would not suggest one. + avoid_overwrite : str + This will add a datestamp at the end of the file, if the during the + mv/cp/ln operation the file would be identically named. + """ + + _allowed_datestamp_formats = {"check_from_filename", "append"} + """ + Set containing the allowed datestamp formats which can be chosen from. + + Notes on possible datestamp formats + ----------------------------------- + check_from_filename : str + This option will add a datestamp to a file, if the year, month, and day + cannot be extracted from the standard declared filename. + append : str + This will add a datestamp at the end of the file, regardless of if it + can be extracted from the file or not. + """ + + def _check_datestamp_method_is_allowed(self, datestamp_method): + """ + Ensures that the datestamp method is in the defined valid set. + """ + if datestamp_method not in self._allowed_datestamp_methods: + raise ValueError( + "The datestamp_method must be defined as one of never, always, or avoid_overwrite" + ) + + def _check_datestamp_format_is_allowed(self, datestamp_format): + """ + Ensures that the datestamp format is in the defined valid set. + """ + if datestamp_format not in self._allowed_datestamp_formats: + raise ValueError( + "The datestamp_format must be defined as one of check_from_filename or append" + ) + + def _path_type(self, path: pathlib.Path) -> FileStatus: + """ + Checks if the given ``path`` exists. If it does returns it's type, if it + doesn't, returns ``None``. + + Parameters + ---------- + path : pathlib.Path + Path to be checked. + + Returns + ------- + Enum value + One of the values from FileType enumeration + + Raises + ------ + TypeError + - when ``path`` has incompatible type + - when ``path`` is not identified + """ + if not isinstance(path, (str, pathlib.Path)): + datatype = type(path).__name__ + raise TypeError( + f"Path ``{path}`` has an incompatible datatype ``{datatype}``. str or pathlib.Path is expected" + ) + + path = pathlib.Path(path) + + # NOTE: is_symlink() needs to come first because it is also a is_file() + # NOTE: pathlib.Path().exists() also checks is the target of a symbolic link exists or not + if path.is_symlink() and not path.exists(): + return FileStatus.BROKEN_LINK + elif not path.exists(): + return FileStatus.NOT_EXISTS + elif path.is_symlink(): + return FileStatus.LINK + elif path.is_file(): + return FileStatus.FILE + elif path.is_dir(): + return FileStatus.DIR + else: + # probably, this will not happen + raise TypeError(f"{path} can not be identified") + + def _always_datestamp(self, fname) -> pathlib.Path: + """ + Method called when ``always`` is the ``datestamp_method. + + Appends the datestamp in any case if ``datestamp_format`` is + ``append``. Appends the datestamp only if it is not obviously in the + filename if the ``datestamp_format`` is ``check_from_filename``. Only + appends to files or links, not directories. + + Parameters + ---------- + fname : pathlib.Path + The file who's name should be modified. + + Returns + ------- + pathlib.Path + A modified file with an added date stamp. + """ + if fname.is_dir(): + return fname + if self.datestamp_format == "append": + return pathlib.Path(f"{fname}_{self._sim_date}") + if self.datestamp_format == "check_from_filename": + if _fname_has_date_stamp_info(fname, self._sim_date): + return fname + else: + return pathlib.Path(f"{fname}_{self._sim_date}") + raise DatestampFormatError( + "Unknown Datestamp formatting type, please use `append` or `check_from_filename`" + ) + + def _avoid_override_datestamp(self, target: pathlib.Path) -> pathlib.Path: + """ + If source and target are identical, adds the date stamp to the target. + + This method is used in the case that the object's attribute + ``datestamp_method`` is set to ``avoid_overwrite``, and is called + before the checks of each of ln, cp, and mv. + + Parameters + ---------- + target : pathlib.Path + + Returns + ------- + pathlib.Path : + The new target that can be used + """ + if target.exists() and not target.is_dir(): + if self.datestamp_format == "append": + target = pathlib.Path(f"{target}_{self._sim_date}") + # The other case ("check_from_filename") is meaningless? + return target + + @staticmethod + def _wild_card_check(source_pattern: list, target_pattern: list) -> bool: + """ + Checks for syntax mistakes. If any were found, it notifies the user about these + errors in the syntax using ``esm_parser.error``. + + Parameters + ---------- + source_pattern : list + A list including the different pieces of the source name pattern + target_pattern : list + A list including the different pieces of the target name pattern + + Returns + ------- + bool : + If no issues were found + """ + target_and_source_patterns_match = len(target_pattern) == len(source_pattern) + if not target_and_source_patterns_match: + user_error( + "Wild card", + ( + "The wild card pattern of the source " + + f"``{source_pattern}`` does not match with the " + + f"target ``{target_pattern}``. Make sure the " + + f"that the number of ``*`` are the same in both " + + f"sources and targets." + ), + ) + + return target_and_source_patterns_match + + def _find_globbing_files(self, location: str) -> list: + """ + Lists the files matching the globbing path of the given ``location``, and + notifies the user if none were found, via ``esm_parser.user_error``. + + Parameters + ---------- + location : str + The location string (``work``, ``computer``, ``exp_tree``, ``run_tree``) + + Returns + ------- + glob_paths : list + List of paths found matching the globbing case for the ``location`` pattern + """ + absolute_path_in_location = str(self[f"absolute_path_in_{location}"]) + glob_paths = glob.glob(absolute_path_in_location) + + # Check that there are any source files available + if len(glob_paths) == 0: + user_error( + "Globbing", + f"No files found for the globbing pattern " + f"``{absolute_path_in_location}``.", + ) + + return glob_paths + + def _check_source_and_target( + self, source_path: pathlib.Path, target_path: pathlib.Path + ) -> None: + """ + Performs common checks for file movements + + Parameters + ---------- + source_path : pathlib.Path + path of the file to be copied / linked / moved + + target_path : pathlib.Path + path of the file to be generated + + Returns + ------- + True + + Raises + ------ + Exception + - If the ``source_path`` does not exist + - If the ``target_path`` exists + - If the parent dir of the ``target_path`` does not exist + """ + # Types. Eg. file, dir, link, or None + source_path_type = self._path_type(source_path) + target_path_type = self._path_type(target_path) + + # Checks + # ------ + # Source does not exist + if source_path_type == FileStatus.NOT_EXISTS: + err_msg = f"Unable to perform file operation. Source ``{source_path}`` does not exist!" + raise FileNotFoundError(err_msg) + + # Target already exists + target_exists = ( + os.path.exists(target_path) or target_path_type == FileStatus.LINK + ) + if target_exists: + err_msg = f"Unable to perform file operation. Target ``{target_path}`` already exists" + raise FileExistsError(err_msg) + + # Target parent directory does not exist + if not target_path.parent.exists(): + # TODO: we might consider creating it (Miguel) + err_msg = f"Unable to perform file operation. Parent directory of the target ``{target_path}`` does not exist" + raise FileNotFoundError(err_msg) + + # if source is a broken link. Ie. pointing to a non-existing file + if source_path_type == FileStatus.BROKEN_LINK: + err_msg = f"Unable to create symbolic link: ``{source_path}`` points to a broken path: {source_path.resolve()}" + raise FileNotFoundError(err_msg) + + +class DatedSimulationFile(SimulationFile): + """A SimultionFile which also needs to know about dates""" + + def __init__( + self, + date=Date("2000-01-01"), + **kwargs, + ): + super().__init__(**kwargs) + self._sim_date = date + + @classmethod + def from_config(cls, full_config: dict, attrs_address: str, date: Date): + obj = super().from_config(full_config, attrs_address) + obj._sim_date = date + return obj + + +def _pretty_filedict(name, filedict): + """ + Returns a string in yaml format of the given file dictionary. + + Parameters + ---------- + dict + A file dictionary + + Returns + ------- + str + A string in yaml format of the given file dictionary + """ + return yaml.dump({"files": {name: filedict}}) + + +def copy_files(config): + """Copies files""" + # PG: No. We do not want this kind of general function. This is just to + # demonstrate how the test would work + return config + + +class SimulationFileCollection(dict): + """ + Once instanciated, searches in the ``config`` dictionary for the ``files`` keys. + This class contains the methods to: 1) instanciate each of the files defined in + ``files`` as ``SimulationFile`` objects and 2) loop through these objects + triggering the desire file movement. + """ + + def __init__(self): + pass + + # PG: Not sure I need this... + @property + def _defined_from(self): + stack = inspect.stack() + caller_frame = stack[1] # Get the frame of the caller + caller_name = caller_frame.function + return caller_name + + @classmethod + def from_config(cls, config: dict): + sim_files = cls() + for component in config["general"]["valid_model_names"]: + config_address = f"{component}.files" + for file_key in dpath.util.get( + config, config_address, separator="." + ).keys(): + sim_files[file_key] = SimulationFile.from_config( + config, f"{config_address}.{file_key}" + ) + return sim_files + + def _gather_file_movements(self) -> None: + """Puts the methods for each file movement into the dictionary as callable values behind the `_filesystem_op` key""" + for sim_file_id, sim_file_obj in self.items(): + movement_type = sim_file_obj.get("movement_type", "cp") + if movement_type == "mv": + self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "mv") + elif movement_type == "cp": + self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "cp") + elif movement_type == "ln": + self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "ln") + else: + raise ValueError( + f"Movement Type is not defined correctly, please use `mv`, `cp` or `ln` for {sim_file_id}" + ) + + def execute_filesystem_operation( + self, config: ConfigSetup + ) -> ConfigSetup: # , from: pathlib.Path | str, to: pathlib.Path | str) -> None: + self._gather_file_movements() + for sim_file_id, sim_file_obj in self.items(): + logger.info(f"Processing {sim_file_id}") + if config["general"]["jobtype"] == "prepcompute": + src, dest = "pool", "work" + elif config["general"]["jobtype"] == "tidy": + src, dest = "work", "exp_tree" + else: + raise ValueError(f"Incorrect jobtype specified for {sim_file_obj}") + sim_file_obj["_filesystem_op"](src, dest) + return config + + +def resolve_file_movements(config: ConfigSetup) -> ConfigSetup: + """ + Runs all methods required to get files into their correct locations. This will + instantiate the ``SimulationFiles`` class. It's called by the recipe manager. + + Parameters + ---------- + config : ConfigSetup + The complete simulation configuration. + + Returns + ------- + config : ConfigSetup + The complete simulation configuration, potentially modified. + """ + sim_file_collection = SimulationFileCollection.from_config(config) + config = sim_file_collection.execute_filesystem_operation(config) + return config diff --git a/src/esm_runscripts/filelists.py b/src/esm_runscripts/filelists.py index d516d80d3..ed2aca675 100644 --- a/src/esm_runscripts/filelists.py +++ b/src/esm_runscripts/filelists.py @@ -891,7 +891,7 @@ def check_for_unknown_files(config): return config -def resolve_symlinks(file_source): +def resolve_symlinks(config, file_source): if os.path.islink(file_source): points_to = os.path.realpath(file_source) @@ -904,7 +904,7 @@ def resolve_symlinks(file_source): return file_source # recursively find the file that the link is pointing to - return resolve_symlinks(points_to) + return resolve_symlinks(config, points_to) else: return file_source @@ -955,7 +955,7 @@ def copy_files(config, filetypes, source, target): helpers.print_datetime(config) continue dest_dir = os.path.dirname(file_target) - file_source = resolve_symlinks(file_source) + file_source = resolve_symlinks(config, file_source) if not os.path.isdir(file_source): try: if not os.path.isdir(dest_dir): diff --git a/src/esm_runscripts/namelists.py b/src/esm_runscripts/namelists.py index 1f9c85f6a..6089fa573 100644 --- a/src/esm_runscripts/namelists.py +++ b/src/esm_runscripts/namelists.py @@ -90,6 +90,72 @@ def nmls_load(mconfig): mconfig["namelists"][nml].uppercase = True return mconfig + @staticmethod + def nmls_check_changes(namelist_changes): + """ + Checks if namelist changes are given in correct syntax. + If not, a user_error will be raised and stop the execution. + + Programmer Information + ---------------------- + + Parameters + ---------- + namelist_changes : nested dict + + If the syntax is correct, namelist_changes should be a nested dict of the following form:: + + {'namelist1.nml': {'namelist1': {'variable1': 'value1', 'variable2': 'value2', 'variable3': 'value3'}, 'namelist2': {'variable1': value1}}} + + Returns + ------- + None + + """ + + error_message = ( + "There is a syntax error, probably in your runscript (but potentially in other yaml files), " + "regarding namelist changes (e.g. in a 'add_namelist_changes' block). " + "It seems that either 'namelist_file' or " + "'namelist_group' or 'both' are missing.\n" + "Please make sure that namelist changes are specified in the correct syntax (see example below)" + " and rerun your runscript.\n" + ) + example = ( + "\nExample of a ``correct syntax`` for [add_]namelist_changes:\n" + "\t [add_]namelist_changes:\n" + "\t '':\n" + "\t '':\n" + "\t : " + ) + + nml_syntax_error = False + + for namelist in list(namelist_changes): + changes = namelist_changes[namelist] + # Check if namelist_changes are specified in correct syntax (e.g. in runscript) + # If correct syntax, changes is always a dict. + if not isinstance(changes, dict): + nml_syntax_error = True + this_is_wrong = (f"There is a syntax error in the following lines:\n\n[add_]namelist_changes:\n '{namelist}: {changes}'\n...") + break + else: + for change_chapter in list(changes): + change_entries = changes[change_chapter] + # Check if namelist_changes are specified in correct syntax (e.g. in runscript) + # If correct syntax, change_entries is always a dict. + if not isinstance(change_entries, dict): + nml_syntax_error = True + this_is_wrong = ( + f"There is a syntax error in the following lines:\n\n[add_]namelist_changes:\n '{namelist}':\n '{change_chapter}: {change_entries}'\n...") + break + if nml_syntax_error: + user_error( + "Syntax error in namelist changes", + f"{error_message}\n{this_is_wrong}\n{example}", + dsymbols=["``", "'"], + ) + @staticmethod def nmls_remove(mconfig): """ @@ -122,9 +188,15 @@ def nmls_remove(mconfig): ------- mconfig : dict The modified configuration. + + Calls to other methods + ---------------------- + nmls_check_changes """ namelist_changes = mconfig.get("namelist_changes", {}) + # Check if namelist_changes have correct syntax + Namelist.nmls_check_changes(namelist_changes) namelist_removes = [] for namelist in list(namelist_changes): changes = namelist_changes[namelist] @@ -136,7 +208,6 @@ def nmls_remove(mconfig): value = change_entries[key] if value == "remove_from_namelist": namelist_removes.append((namelist, change_chapter, key)) - # the key is probably coming from esm_tools config # files or from a user runscript. It can contain lower # case, but the original Fortran namelist could be in @@ -145,7 +216,6 @@ def nmls_remove(mconfig): # `key` is the processed variable from f90nml module and # is lowercase. remove_original_key = False - # traverse the namelist chapter and see if a mixed case # variable is also found for key2 in namelist_changes[namelist][change_chapter]: @@ -156,12 +226,10 @@ def nmls_remove(mconfig): namelist_removes.append( (namelist, change_chapter, original_key) ) - # remove both lowercase and mixed case variables del namelist_changes[namelist][change_chapter][key] if remove_original_key: del namelist_changes[namelist][change_chapter][original_key] - # mconfig instead of config, Grrrrr print( f"- NOTE: removing the variable: {key} from the namelist: {namelist}" @@ -219,8 +287,15 @@ def nmls_modify(mconfig): ------- mconfig : dict The modified configuration. + + Calls to other methods: + ---------------------- + nmls_check_changes """ namelist_changes = mconfig.get("namelist_changes", {}) + # Check if namelist_changes have correct syntax + Namelist.nmls_check_changes(namelist_changes) + for namelist, changes in namelist_changes.items(): mconfig["namelists"][namelist].patch(changes) return mconfig diff --git a/src/esm_runscripts/oasis.py b/src/esm_runscripts/oasis.py index 8767a82ee..32b7f4706 100644 --- a/src/esm_runscripts/oasis.py +++ b/src/esm_runscripts/oasis.py @@ -404,11 +404,35 @@ def add_output_file(self, lefts, rights, leftmodel, rightmodel, config): config["outdata_in_work"][thisfile] = thisfile config["outdata_sources"][thisfile] = thisfile - def add_restart_files(self, restart_file, fconfig): + def add_restart_files(self, restart_file_label, fconfig): + """ + Handles the special restart case of the coupling fields. + + Cases + ----- + 1. If this run is a restart but not a branch-off experiment, set the source to + be the same as defined by the user in ``restart_in_sources`` or the same + as the name coming from ``coupling__fields``, if the first is + missing (the normal case). + 2. Same as case 1 but with the time stamp added to the name of the restart file + to make sure the correct file (and not a link to the last restart file made) + is loaded for the branch-off experiment. This option uses the + non-timestamped version of the file when only one file is found (e.g. the + parent simulation only has one run, or the files are taken for the first + run from the pool, as in AWICM3). + + Parameters + ---------- + restart_file_label : str + The file's label (not the file name itself!). Used to retrieve the + file's source and target path. As defined in the keys of + ``coupling_target_fields`` or ``coupling_input_fields`` in the yamls + fconfig : ConfigSetup + The complete simulation configuration. + """ config = fconfig[self.name] gconfig = fconfig["general"] - restart_file_label = restart_file is_runtime = gconfig["run_or_compile"] == "runtime" enddate = "_" + gconfig["end_date"].format( form=9, givenph=False, givenpm=False, givenps=False @@ -431,23 +455,32 @@ def add_restart_files(self, restart_file, fconfig): if "restart_in_sources" not in config: config["restart_in_sources"] = {} - config["restart_out_files"][restart_file] = restart_file - config["restart_out_files"][restart_file + "_recv"] = restart_file + "_recv" + # Find the actual path of the restart + restart_file_path = config["restart_in_sources"].get(restart_file_label, None) + # Find the actual name of the restart: if a path is given in restart_in_sources + # get the basename of that path, otherwise assign the file label also as name + # of the file (coming from ``coupling__fields``) + if restart_file_path: + restart_file = os.path.basename(restart_file_label) + else: + restart_file = restart_file_label + + config["restart_out_files"][restart_file_label] = restart_file + config["restart_out_files"][restart_file_label + "_recv"] = restart_file + "_recv" - config["restart_out_in_work"][restart_file] = restart_file # + enddate - config["restart_out_in_work"][restart_file + "_recv"] = ( + config["restart_out_in_work"][restart_file_label] = restart_file # + enddate + config["restart_out_in_work"][restart_file_label + "_recv"] = ( restart_file + "_recv" ) # + enddate - config["restart_out_sources"][restart_file] = restart_file - config["restart_out_sources"][restart_file + "_recv"] = restart_file + "_recv" + config["restart_out_sources"][restart_file_label] = restart_file + config["restart_out_sources"][restart_file_label + "_recv"] = restart_file + "_recv" - config["restart_in_files"][restart_file] = restart_file - config["restart_in_in_work"][restart_file] = restart_file + config["restart_in_files"][restart_file_label] = restart_file + config["restart_in_in_work"][restart_file_label] = restart_file # In case of a branch-off experiment -> use the correct oasis restart files: - # Not the rstas.nc soft link to the last, but the actual one for the - # branch-off date + # Not the soft link to the last, but the actual one for the branch-off date if gconfig["run_number"] == 1 and config["lresume"] and gconfig["jobtype"] == "prepcompute": # If they do not exist, define ``ini_restart_date`` and ``ini_restart_dir`` # based on ``ini_parent_date`` and ``ini_parent_dir`` @@ -455,46 +488,56 @@ def add_restart_files(self, restart_file, fconfig): config["ini_restart_date"] = config["ini_parent_date"] if "ini_parent_dir" in config and "ini_restart_dir" not in config: config["ini_restart_dir"] = config["ini_parent_dir"] + # If the restart file path is not defined, or it's not an absolute path to + # the file, set it to be the same as the ini_restart_dir + if not restart_file_path or restart_file_path == restart_file: + restart_file_path = f"{config['ini_restart_dir']}/{restart_file}" # If set in config (oasis): if "ini_restart_dir" in config and "ini_restart_date" in config: # check if restart file with ini_restart_date in filename is in the restart # folder of the parent experiment to be branched off from: glob_search_file = ( - f"{config['ini_restart_dir']}{restart_file}_????????-" + f"{restart_file_path}_????????-" f"{config['ini_restart_date'].year}" f"{config['ini_restart_date'].month:02}" f"{config['ini_restart_date'].day:02}" ) - glob_restart_file = glob.glob(glob_search_file) - glob_restart_file.sort() - if restart_file and is_runtime: - # If there are more than one file found let the user decide which one to take - if len(glob_restart_file) == 1: - restart_file = os.path.basename(glob_restart_file[0]) - elif len(glob_restart_file) == 0: + else: + glob_search_file = restart_file_path + + glob_restart_file = glob.glob(glob_search_file) + glob_restart_file.sort() + if restart_file and is_runtime: + # If there are more than one file found let the user decide which one to take + if len(glob_restart_file) == 1: + restart_file = os.path.basename(glob_restart_file[0]) + elif len(glob_restart_file) == 0: + restart_file = restart_file_path + if not os.path.isfile(restart_file): user_error( "Restart file missing", - f"No OASIS restart file for ``{restart_file}`` found " - f"matching the pattern ``{glob_search_file}``" + f"No OASIS restart file for ``{restart_file_label}`` found " + f"matching the pattern ``{glob_search_file}`` nor " + f"``{restart_file}``" + ) + else: + if not gconfig["isinteractive"]: + # If more than one restart file found that matches ini_restart_date, + # ask the user to select from the result list: + message = ( + "More than one OASIS restart file was found for " + "your branchoff experiment that matches the " + "ini_restart_date you selected. Please select " + "one of the following OASIS restart files:" ) - else: - if not gconfig["isinteractive"]: - # If more than one restart file found that matches ini_restart_date, - # ask the user to select from the result list: - message = ( - "More than one OASIS restart file was found for " - "your branchoff experiment that matches the " - "ini_restart_date you selected. Please select " - "one of the following OASIS restart files:" - ) - answers = questionary.form( - restarts = questionary.select(message, choices=glob_restart_file) - ).ask() - restart_file = os.path.basename(answers["restarts"]) - - config["restart_in_sources"][restart_file_label] = restart_file - - if restart_file not in config["restart_in_sources"]: + answers = questionary.form( + restarts = questionary.select(message, choices=glob_restart_file) + ).ask() + restart_file = answers["restarts"] + + config["restart_in_sources"][restart_file_label] = restart_file + + if restart_file_label not in config["restart_in_sources"]: config["restart_in_sources"][restart_file_label] = restart_file diff --git a/src/esm_runscripts/prepcompute.py b/src/esm_runscripts/prepcompute.py index 040a3e63a..62ef37c97 100644 --- a/src/esm_runscripts/prepcompute.py +++ b/src/esm_runscripts/prepcompute.py @@ -280,6 +280,17 @@ def oasis_representer(dumper, oasis): def namelist_representer(dumper, f90nml): return dumper.represent_str(f"f90nml.name") + def listwithprov_representer(dumper, listwithprov): + return dumper.represent_sequence("tag:yaml.org,2002:seq", listwithprov) + + def dictwithprov_representer(dumper, dictwithprov): + return dumper.represent_mapping("tag:yaml.org,2002:map", dictwithprov) + + # @Paul: this is me just playing around with things, this should be included maybe + # somewhere else and generalized for Str, Int, Bool... + def strwithprov_representer(dumper, strwithprov): + return dumper.represent_str(strwithprov) + # dumper object for the ESM-Tools configuration class EsmConfigDumper(yaml.dumper.Dumper): pass @@ -313,6 +324,19 @@ class EsmConfigDumper(yaml.dumper.Dumper): f90nml.namelist.Namelist, namelist_representer ) + # Provenance representers + EsmConfigDumper.add_representer( + esm_parser.provenance.ListWithProvenance, listwithprov_representer + ) + EsmConfigDumper.add_representer( + esm_parser.provenance.DictWithProvenance, dictwithprov_representer + ) + # @Paul: this is me just playing around with things, this should be included maybe + # somewhere else and generalized for Str, Int, Bool... + EsmConfigDumper.add_representer( + esm_parser.provenance.StrWithProvenance, strwithprov_representer + ) + if "oasis3mct" in config: EsmConfigDumper.add_representer(esm_runscripts.oasis.oasis, oasis_representer) @@ -328,6 +352,8 @@ class EsmConfigDumper(yaml.dumper.Dumper): config_final = copy.deepcopy(config) # PrevRunInfo del config_final["prev_run"] # PrevRunInfo + config_final = esm_parser.provenance.clean_provenance(config_final) + out = yaml.dump( config_final, Dumper=EsmConfigDumper, width=10000, indent=4 ) # PrevRunInfo diff --git a/src/esm_runscripts/tidy.py b/src/esm_runscripts/tidy.py index aeeb5109a..ee2d8857b 100644 --- a/src/esm_runscripts/tidy.py +++ b/src/esm_runscripts/tidy.py @@ -362,12 +362,12 @@ def copy_all_results_to_exp(config): + destination ) else: - linkdest = resolve_symlinks(source) + linkdest = resolve_symlinks(config, source) # newlinkdest = ( # destination.rsplit("/", 1)[0] + "/" + linkdest.rsplit("/", 1)[-1] # ) if os.path.islink(destination): - destdest = resolve_symlinks(source) + destdest = resolve_symlinks(config, source) if linkdest == destdest: # both links are identical, skip continue diff --git a/src/esm_tests/__init__.py b/src/esm_tests/__init__.py index 5a8e87a46..5d96e50c7 100644 --- a/src/esm_tests/__init__.py +++ b/src/esm_tests/__init__.py @@ -2,7 +2,7 @@ __author__ = """Miguel Andres-Martinez""" __email__ = "miguel.andres-martinez@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .initialization import * from .read_shipped_data import * diff --git a/src/esm_tools/__init__.py b/src/esm_tools/__init__.py index 08e86c3ef..809a31bd7 100644 --- a/src/esm_tools/__init__.py +++ b/src/esm_tools/__init__.py @@ -23,7 +23,7 @@ __author__ = """Dirk Barbi, Paul Gierz""" __email__ = "dirk.barbi@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" import functools import inspect diff --git a/src/esm_tools/motd/motd.yaml b/src/esm_tools/motd/motd.yaml index c2a652a84..01a6c248c 100644 --- a/src/esm_tools/motd/motd.yaml +++ b/src/esm_tools/motd/motd.yaml @@ -134,9 +134,9 @@ release6.7: action: DELAY(1) announcement: true -oasis_branchoff6.21.17: +oasis_branchoff6.21.23: package: "esm_tools" - versions: "<6.21.17" + versions: "<6.21.23" message: "\x1b[1m\x1b[96mOASIS BRANCH-OFF RESTARTS FIXED!\x1b[0m\n The bug on the copying of the wrong flux files from OASIS \x1b[96mwhen running\n diff --git a/src/esm_utilities/__init__.py b/src/esm_utilities/__init__.py index 04a4297cc..6d6b45c26 100644 --- a/src/esm_utilities/__init__.py +++ b/src/esm_utilities/__init__.py @@ -2,6 +2,6 @@ __author__ = """Paul Gierz""" __email__ = "pgierz@awi.de" -__version__ = "6.21.19" +__version__ = "6.23.4" from .utils import * diff --git a/tests/test_esm_parser/example.yaml b/tests/test_esm_parser/example.yaml index c5cc69584..9b96bc7cf 100644 --- a/tests/test_esm_parser/example.yaml +++ b/tests/test_esm_parser/example.yaml @@ -1,6 +1,6 @@ person: name: Paul Gierz - username: !ENV USER + username: !ENV ${USER} a_string: " hello world I am here to make your life impossible diff --git a/tests/test_esm_parser/example2.yaml b/tests/test_esm_parser/example2.yaml new file mode 100644 index 000000000..aa6c19cc0 --- /dev/null +++ b/tests/test_esm_parser/example2.yaml @@ -0,0 +1,10 @@ +echam: + type: "atmosphere" + files: + greenhouse: + kind: "input" + path_in_computer: "/my/path/in/computer" + a_list: + - 1 + - 2 + - 3 diff --git a/tests/test_esm_parser/test_provenance.py b/tests/test_esm_parser/test_provenance.py index 9c8d55c6f..b252a0beb 100644 --- a/tests/test_esm_parser/test_provenance.py +++ b/tests/test_esm_parser/test_provenance.py @@ -3,164 +3,268 @@ """ import os import pathlib -import sys - import pytest -from ruamel.yaml import YAML -import esm_parser import esm_parser.provenance as provenance +import esm_parser + from esm_parser import yaml_to_dict -@pytest.fixture -def yaml(): - yaml = YAML() - return yaml +@pytest.fixture() +def example_path1(): + return f"{os.path.dirname(__file__)}/example.yaml" -@pytest.fixture -def config(): - config_dict = { - "echam": { - "type": "atmosphere", - "files": { - "greenhouse": { - "kind": "input", - "path_in_computer": "/my/path/in/computer", - } - }, - } - } +@pytest.fixture() +def example_path2(): + return f"{os.path.dirname(__file__)}/example2.yaml" - my_provenance = { - "from_file": None, - "type": None, - } - config = provenance.DictWithProvenance(config_dict, my_provenance) - return config +@pytest.fixture() +def config(example_path2): + return yaml_to_dict.yaml_file_to_dict(example_path2) -# Test 1 (should give you a provenance of None for the key ["fesom"]["asd"]) -def test_get_provenance_1(config): - config["fesom"] = {"asd": 0} + +@pytest.fixture() +def check_provenance(example_path2): check_provenance = { "echam": { - "type": {"from_file": None, "type": None}, + "type": { + "line": 2, + "col": 11, + "yaml_file": example_path2, + "category": "runscript", + }, "files": { "greenhouse": { - "kind": {"from_file": None, "type": None}, - "path_in_computer": {"from_file": None, "type": None}, + "kind": { + "line": 5, + "col": 19, + "yaml_file": example_path2, + "category": "runscript", + }, + "path_in_computer": { + "line": 6, + "col": 31, + "yaml_file": example_path2, + "category": "runscript", + }, + "a_list": [ + { + "line": 8, + "col": 19, + "yaml_file": example_path2, + "category": "runscript", + }, + { + "line": 9, + "col": 19, + "yaml_file": example_path2, + "category": "runscript", + }, + { + "line": 10, + "col": 19, + "yaml_file": example_path2, + "category": "runscript", + }, + ], } }, }, - "fesom": {"asd": None}, + "debug_info": {"loaded_from_file": None}, } + + return check_provenance + + +def test_get_provenance_from_yaml_to_dict(config, check_provenance): + """ + Test 1: Checks for correct provenance entries from example2.yaml file. + """ + assert config.get_provenance() == check_provenance -# Test 2 (should give you a provenance of None for the key "computer") -def test_get_provenance_2(config): - config["computer"] = 0 - check_provenance = { - "echam": { - "type": {"from_file": None, "type": None}, - "files": { - "greenhouse": { - "kind": {"from_file": None, "type": None}, - "path_in_computer": {"from_file": None, "type": None}, - } - }, - }, - "fesom": {"asd": None}, - "computer": None, - } +def test_get_provenance_of_added_entry(config, check_provenance): + """ + Test 2: Checks whether the provenance of an added config entry is None. + """ + + config["fesom"] = True + check_provenance["fesom"] = None assert config.get_provenance() == check_provenance -# Test 3 (should give you a provenance of 2 for the leaf keys inside "fesom") -def test_get_provenance_3(config): - config_fesom = provenance.DictWithProvenance( - {"fesom": {"asd": 0, "model": "ocean"}}, 2 - ) - config.update(config_fesom) - check_provenance = { - "echam": { - "type": {"from_file": None, "type": None}, - "files": { - "greenhouse": { - "kind": {"from_file": None, "type": None}, - "path_in_computer": {"from_file": None, "type": None}, - } - }, - }, - "fesom": {"asd": 2, "model": 2}, - "computer": None, +def test_get_provenance_of_added_nested_entry(config, check_provenance): + """ + Test 3: Checks whether the provenance of an added nested config entry is None. + """ + + config["fesom"] = {"asd": 0} + check_provenance["fesom"] = None + assert config.get_provenance() == check_provenance + + +def test_get_provenance_of_added_nested_entry_2(config, check_provenance): + """ + Test 4: Checks whether the provenance of an added nested config entry is None. + """ + + config["echam"]["test1"] = 17.0 + check_provenance["echam"]["test1"] = None + assert config.get_provenance() == check_provenance + + +def test_set_provenance_for_leaf(config, check_provenance): + """ + Test 5: Reset the provenance of an ``echam`` leave. + """ + + new_prov = { + "line": 2, + "col": 11, + "yaml_file": "someother.yaml", + "category": "userdefined", } + print(config["echam"].get_provenance()) + config["echam"].set_provenance(new_prov) + print(config) + check_provenance["echam"]["type"] = new_prov + check_provenance["echam"]["files"]["greenhouse"]["kind"] = new_prov + check_provenance["echam"]["files"]["greenhouse"]["path_in_computer"] = new_prov + check_provenance["echam"]["files"]["greenhouse"]["a_list"] = [ + new_prov, + new_prov, + new_prov, + ] assert config.get_provenance() == check_provenance -# Test 4 (should give you a provenance of None for the key True) -def test_get_provenance_4(config): - config[True] = "boolean" - check_provenance = { - "echam": { - "type": {"from_file": None, "type": None}, - "files": { - "greenhouse": { - "kind": {"from_file": None, "type": None}, - "path_in_computer": {"from_file": None, "type": None}, - } - }, - }, - "fesom": {"asd": 2, "model": 2}, - "computer": None, - True: None, +def test_set_provenance_for_leaf_of_new_branch(config, check_provenance): + """ + Test 6: Reset the provenance of leaves for an later added branch ``debug_info``. + """ + + new_prov = { + "line": 2, + "col": 11, + "yaml_file": "someother.yaml", + "category": "debuginfo", } + config["debug_info"].set_provenance(new_prov) + check_provenance["debug_info"]["loaded_from_file"] = new_prov assert config.get_provenance() == check_provenance -# Test 5 (reset the provenance of all ``echam`` leaves to "a_string") -def test_get_provenance_5(config): - config["echam"].set_provenance("a_string") - check_provenance = { - "echam": { - "type": "a_string", - "files": { - "greenhouse": {"kind": "a_string", "path_in_computer": "a_string"} - }, - }, - "fesom": {"asd": 2, "model": 2}, - "computer": None, - True: None, +def test_set_provenance_for_leaf_to_a_string(config, check_provenance): + """ + Test 7: Reset the provenance of all ``echam`` leaves to "a_string") + """ + + new_prov = "a_string" + config["echam"].set_provenance(new_prov) + check_provenance["echam"]["type"] = new_prov + check_provenance["echam"]["files"]["greenhouse"]["kind"] = new_prov + check_provenance["echam"]["files"]["greenhouse"]["path_in_computer"] = new_prov + check_provenance["echam"]["files"]["greenhouse"]["a_list"] = [ + new_prov, + new_prov, + new_prov, + ] + assert config.get_provenance() == check_provenance + + +def test_set_provenance_for_a_new_leaf(config, check_provenance): + """ + Test 8: Rest the provenanve of fesom entry. + """ + + config["fesom"] = {"asd": 0} + new_prov = { + "line": 2, + "col": 11, + "yaml_file": "someother.yaml", + "category": "set_for_unknown_leaf", } + config["fesom"] = provenance.DictWithProvenance(config["fesom"], {}) + config["fesom"].set_provenance(new_prov) + check_provenance["fesom"] = {"asd": None} + check_provenance["fesom"]["asd"] = new_prov assert config.get_provenance() == check_provenance -# Test 6 (reset the provenance of a leaf) -def test_get_provenance_6(config): - config["echam"]["files"]["greenhouse"].provenance["kind"] = "a_new_string" - check_provenance = { - "echam": { - "type": "a_string", - "files": { - "greenhouse": {"kind": "a_new_string", "path_in_computer": "a_string"} - }, +def test_provenance_update(config, check_provenance): + """ + Test 9: Checks that the method update preserves the provenance history + """ + + new_prov = { + "line": 2, + "col": 11, + "yaml_file": "someother.yaml", + "category": "set_for_unknown_leaf", + } + new_config = { + "echam": provenance.DictWithProvenance({"type": "mpi_atmosphere"}, {}) + } + new_config["echam"].set_provenance(new_prov) + + config["echam"].update(new_config["echam"]) + check_provenance["echam"]["type"] = new_prov + assert config.get_provenance() == check_provenance + # Checks that update preserves provenance history + assert config["echam"]["type"].provenance == [ + { + "line": 2, + "col": 11, + "yaml_file": "/Users/mandresm/Codes/esm_tools/tests/test_esm_parser/example2.yaml", + "category": "runscript", }, - "fesom": {"asd": 2, "model": 2}, - "computer": None, - True: None, + None, + { + "line": 2, + "col": 11, + "extended_by": "dict.update", + "yaml_file": "someother.yaml", + "category": "set_for_unknown_leaf", + }, + ] + + +def test_set_provenance_for_a_list_leaf(config, check_provenance): + """ + Test 9: Reset the provenance of a list") + """ + + new_prov = { + "line": 2, + "col": 11, + "yaml_file": "someother.yaml", + "category": "this_is_for_a_list", } + config["fesom"] = {"asd": 0} + config["fesom"]["list"] = [30, 19] + config["fesom"] = provenance.DictWithProvenance(config["fesom"], {}) + config["fesom"]["list"].set_provenance(new_prov) + check_provenance["fesom"] = {} + check_provenance["fesom"]["list"] = [new_prov, new_prov] + check_provenance["fesom"]["asd"] = None assert config.get_provenance() == check_provenance -def test_extract_dict(): +def test_extract_dict_config(example_path1): + """ + Test 10: Test the extraction of config for all allowed variable types. + """ + esm_tools_loader = yaml_to_dict.EsmToolsLoader() - file_path = pathlib.Path("example.yaml") + os.environ["USER"] = "some_user" config = { "person": { "name": "Paul Gierz", - "username": "nwieters", + "username": os.environ["USER"], "a_string": " hello world I am here to make your life impossible ", "my_var": "MY_VAR", "my_other_var": ["a", "b", "c"], @@ -176,132 +280,160 @@ def test_extract_dict(): } } - with open(file_path, "r") as file: - esm_tools_loader.set_filename(file_path) + with open(example_path1, "r") as file: + esm_tools_loader.set_filename(example_path1) data, data2 = esm_tools_loader.load(file) assert data == config -def test_extract_provenance(): +def test_check_provenance_list(example_path1): + """ + Test 11: Check provenance of a list entry + """ + + os.environ["USER"] = "some_user" esm_tools_loader = yaml_to_dict.EsmToolsLoader() - file_path = pathlib.Path("example.yaml") - file_path = os.path.abspath(file_path) - provenance = { - "person": { - "name": {"line": 1, "col": 8, "yaml_file": file_path, "category": "None"}, - "username": { - "line": 2, - "col": 12, - "yaml_file": file_path, - "category": "None", - }, - "a_string": { - "line": 3, - "col": 12, - "yaml_file": file_path, - "category": "None", - }, - "my_var": { - "line": 8, - "col": 10, - "yaml_file": file_path, - "category": "None", - }, - "my_other_var": [ - {"line": 10, "col": 8, "yaml_file": file_path, "category": "None"}, - {"line": 12, "col": 8, "yaml_file": file_path, "category": "None"}, - {"line": 13, "col": 8, "yaml_file": file_path, "category": "None"}, - ], - "my_other_list": [ - {"line": 14, "col": 18, "yaml_file": file_path, "category": "None"}, - {"line": 14, "col": 21, "yaml_file": file_path, "category": "None"}, - {"line": 14, "col": 24, "yaml_file": file_path, "category": "None"}, - ], - "my_bolean": { - "line": 16, - "col": 13, - "yaml_file": file_path, - "category": "None", - }, - "my_int": { - "line": 17, - "col": 10, - "yaml_file": file_path, - "category": "None", - }, - "my_int2": { - "line": 18, - "col": 11, - "yaml_file": file_path, - "category": "None", - }, - "list_with_dict_inside": [ - {"line": 20, "col": 4, "yaml_file": file_path, "category": "None"}, - {"line": 21, "col": 4, "yaml_file": file_path, "category": "None"}, - { - "my_dict": { - "foo": [ - { - "line": 24, - "col": 10, - "yaml_file": file_path, - "category": "None", - }, - { - "line": 25, - "col": 10, - "yaml_file": file_path, - "category": "None", - }, - { - "my_dict": { - "foo": { - "line": 27, - "col": 17, - "yaml_file": file_path, - "category": "None", - } - } - }, - ] - } - }, - ], - } + check_prov = [ + {"line": 15, "col": 19, "yaml_file": example_path1, "category": "runscript"}, + {"line": 15, "col": 22, "yaml_file": example_path1, "category": "runscript"}, + {"line": 15, "col": 25, "yaml_file": example_path1, "category": "runscript"}, + ] + + with open(example_path1, "r") as file: + esm_tools_loader.set_filename(file) + data, data2 = esm_tools_loader.load(file) + + config = provenance.DictWithProvenance(data, data2) + assert config["person"]["my_other_list"].get_provenance() == check_prov + + +def test_check_set_provenance_list(example_path1): + """ + Test 12: Check set_provenance of a list entry + """ + + os.environ["USER"] = "some_user" + esm_tools_loader = yaml_to_dict.EsmToolsLoader() + new_prov = { + "line": 15, + "col": 25, + "yaml_file": "example.yaml", + "category": "from_a_list", } + check_prov = [new_prov, new_prov, new_prov] - with open(file_path, "r") as file: - esm_tools_loader.set_filename(file_path) + with open(example_path1, "r") as file: + esm_tools_loader.set_filename(file) data, data2 = esm_tools_loader.load(file) - assert data2 == provenance + config = provenance.DictWithProvenance(data, data2) + config["person"]["my_other_list"].set_provenance(new_prov) + assert config["person"]["my_other_list"].get_provenance() == check_prov + +def test_check_set_provenance_of_single_list_entry(example_path1): + """ + Test 13: Check set_provenance of a single list entry + """ -# Test 7 (reset the provenance of a leaf) -def test_get_provenance_7(config): - config_fesom = provenance.DictWithProvenance( - {"fesom": {"update_test": True}}, "new_provenance" - ) + os.environ["USER"] = "some_user" + esm_tools_loader = yaml_to_dict.EsmToolsLoader() + old_prov1 = { + "line": 15, + "col": 19, + "yaml_file": example_path1, + "category": "runscript", + } + old_prov2 = { + "line": 15, + "col": 22, + "yaml_file": example_path1, + "category": "runscript", + } + new_prov = { + "line": 15, + "col": 25, + "yaml_file": "example.yaml", + "category": "from_a_second_list", + } + check_prov = [old_prov1, old_prov2, new_prov] - config["fesom"].update(config_fesom["fesom"]) + with open(example_path1, "r") as file: + esm_tools_loader.set_filename(file) + data, data2 = esm_tools_loader.load(file) - check_provenance = { - "echam": { - "type": "a_string", - "files": { - "greenhouse": {"kind": "a_new_string", "path_in_computer": "a_string"} - }, - }, - "fesom": {"asd": 2, "model": 2, "update_test": "new_provenance"}, - "computer": None, - True: None, + config = provenance.DictWithProvenance(data, data2) + config["person"]["my_other_list"][2].provenance = provenance.Provenance(new_prov) + assert config["person"]["my_other_list"].get_provenance() == check_prov + + +def test_check_set_provenance_of_single_list_entry(example_path1): + """ + Test 14: Check get_provenance raises the correct error when the provenance of an + item is not of the type provenance.Provenance + """ + + os.environ["USER"] = "some_user" + esm_tools_loader = yaml_to_dict.EsmToolsLoader() + new_prov = { + "line": 15, + "col": 25, + "yaml_file": "example.yaml", + "category": "from_a_list", } - assert config.get_provenance() == check_provenance + with open(example_path1, "r") as file: + esm_tools_loader.set_filename(file) + data, data2 = esm_tools_loader.load(file) + + config = provenance.DictWithProvenance(data, data2) + + with pytest.raises( + ValueError, + match="Provenance must be an instance of the provenance.Provenance class!", + ): + config["person"]["my_other_list"][2].provenance = new_prov + + +def test_keep_provenance_in_recursive_function(config): + """ + Test 15: Test that provenance is not modified in a method when it has the + keep_provenance_in_recursive_function decorator + """ + @provenance.keep_provenance_in_recursive_function + def change_elem(tree, rhs): + return provenance.wrapper_with_provenance_factory("new_val", {"modified": True}) + + tree = [] + + check_provenance1 = [ + { + "line": 2, + "col": 11, + "yaml_file": "/Users/mandresm/Codes/esm_tools/tests/test_esm_parser/example2.yaml", + "category": "runscript", + }, + { + "modified": True, + "extended_by": ".change_elem at 0x11433f8b0>", + }, + ] + check_provenance2 = [ + { + "line": 9, + "col": 19, + "yaml_file": "/Users/mandresm/Codes/esm_tools/tests/test_esm_parser/example2.yaml", + "category": "runscript", + }, + { + "modified": True, + "extended_by": ".change_elem at 0x11433f8b0>", + }, + ] -def test_dump_config_with_provenance(config, yaml): - """Prints out the dictionary with comments""" - breakpoint() + rhs1 = change_elem(tree, config["echam"]["type"]) + rhs2 = change_elem(tree, config["echam"]["files"]["greenhouse"]["a_list"][1]) - print(yaml.dump(config.get_provenance(), sys.stdout)) + assert rhs1 == "new_val" and rhs1.provenance[0] == check_provenance1[0] + assert rhs2 == "new_val" and rhs2.provenance[0] == check_provenance2[0] diff --git a/utils/environment.yaml b/utils/environment.yaml index 48fe558a1..d4c3ca285 100644 --- a/utils/environment.yaml +++ b/utils/environment.yaml @@ -30,9 +30,11 @@ dependencies: - packaging - pandas>=1.0 - psutil + - pytest - pyyaml - pyyaml>=5.1 - questionary + - ruamel.yaml==0.17.32 - semver - sqlalchemy - tabulate