diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..61a3dd3975 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,27 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + # Only update major versions + ignore: + - dependency-name: "*" + update-types: + - "version-update:semver-minor" + - "version-update:semver-patch" + # Below config mirrors the example at + # https://github.com/dependabot/dependabot-core/blob/main/.github/dependabot.yml + directory: "/" + schedule: + interval: "weekly" + day: "sunday" + time: "16:00" + groups: + all-actions: + patterns: [ "*" ] + assignees: + - "glatterf42" + - "khaeru" + labels: + - "dependencies" + reviewers: + - "glatterf42" + - "khaeru" diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 86aef3c2c9..c2eb20af2c 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -1,8 +1,6 @@ name: Test on: - push: - branches: [ main ] pull_request: branches: [ main , "migrate-*"] schedule: @@ -18,42 +16,65 @@ jobs: pytest: strategy: matrix: - # One job per OS; latest python version testable on GitHub actions. - # These should match the versions used in the "pytest" workflows of both - # ixmp and message_ix. - version: - - { os: macos-latest, python: "3.11" } - - { os: ubuntu-latest, python: "3.11" } - - { os: windows-latest, python: "3.11" } - # Versions of both ixmp and message_ix to use + os: + - macos-latest + - ubuntu-latest + - windows-latest upstream: - - { version: v3.4.0, extra-deps: '"pandas<2.0"' } # Minimum version given in setup.cfg - - { version: v3.5.0, extra-deps: '"pandas<2.0"' } - - { version: v3.6.0, extra-deps: '"pandas<2.0"' } - - { version: v3.7.0, extra-deps: "" } # Latest released version - - { version: main, extra-deps: "" } # Development version + # In each group: + # - Versions of ixmp and message_ix to test. + # - Latest supported Python version for those or other dependencies. + # - Extra dependencies, in particular fixed/maximum versions to resolve conficts. + # - dask[dataframe] >= 2024.3.0 requires dask-expr and in turn pandas >= 2.0. + # https://github.com/iiasa/message-ix-models/pull/156#issuecomment-2020152360 + # - genno: upstream versions < 3.8.0 import genno.computations, removed in 1.25.0. + # https://github.com/iiasa/message-ix-models/pull/156 + # - pytest: upstream versions < 3.9.0 use a hook argument removed in pytest 8.1.0. + # https://github.com/iiasa/message-ix-models/pull/155 + # + # Minimum version given in pyproject.toml + - version: v3.4.0 + python-version: "3.11" + extra-deps: '"dask < 2024.3.0" "genno < 1.25" "pandas < 2.0" "pytest == 8.0.0"' # + - version: v3.5.0 + python-version: "3.11" + extra-deps: '"dask < 2024.3.0" "genno < 1.25" "pandas < 2.0" "pytest == 8.0.0"' # + - version: v3.6.0 + python-version: "3.11" + extra-deps: '"dask < 2024.3.0" "genno < 1.25" "pandas < 2.0" "pytest == 8.0.0"' # + - version: v3.7.0 + python-version: "3.11" + extra-deps: 'dask[dataframe] "genno < 1.25" "pytest == 8.0.0"' # + # Latest released version + - version: v3.8.0 + python-version: "3.12" + extra-deps: 'dask[dataframe] "pytest == 8.0.0"' # + # Development version + - version: main + python-version: "3.12" + extra-deps: 'dask[dataframe]' # fail-fast: false - runs-on: ${{ matrix.version.os }} - name: ${{ matrix.version.os }}-py${{ matrix.version.python }}-upstream-${{ matrix.upstream.version }} + runs-on: ${{ matrix.os }} + name: ${{ matrix.os }}-py${{ matrix.upstream.python-version }}-upstream-${{ matrix.upstream.version }} steps: - name: Cache test data - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: message-local-data - key: ${{ matrix.version.os }} + key: ${{ matrix.os }} - name: Check out message-ix-models - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: lfs: true fetch-depth: ${{ env.depth }} - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: - python-version: ${{ matrix.version.python }} + python-version: ${{ matrix.upstream.python-version }} cache: pip cache-dependency-path: "**/pyproject.toml" @@ -62,7 +83,7 @@ jobs: version: 25.1.1 license: ${{ secrets.GAMS_LICENSE }} - - uses: ts-graphviz/setup-graphviz@v1 + - uses: ts-graphviz/setup-graphviz@v2 with: macos-skip-brew-update: true @@ -98,7 +119,9 @@ jobs: shell: bash - name: Upload test coverage to Codecov.io - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} # required pre-commit: name: Code quality @@ -106,13 +129,13 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: { python-version: "3.11" } + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: "3.12" } - name: Force recreation of pre-commit virtual environment for mypy if: github.event_name == 'schedule' # Comment this line to run on a PR run: gh cache list -L 999 | cut -f2 | grep pre-commit | xargs -I{} gh cache delete "{}" || true env: { GH_TOKEN: "${{ github.token }}" } - - uses: pre-commit/action@v3.0.0 + - uses: pre-commit/action@v3.0.1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b5f01abaf..8c5cf505ab 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,9 +8,9 @@ repos: pass_filenames: false language: python - entry: bash -c ". ${PRE_COMMIT_MYPY_VENV:-/dev/null}/bin/activate 2>/dev/null; mypy $0 $@" + entry: bash -c ". ${PRE_COMMIT_MYPY_VENV:-/dev/null}/bin/activate 2>/dev/null; mypy $0 $@; python -m pip list" additional_dependencies: - - mypy >= 1.8.0 + - mypy >= 1.9.0 - plotnine - pytest - sdmx1 @@ -20,7 +20,7 @@ repos: - "message-ix @ git+https://github.com/iiasa/message_ix.git@main" args: ["."] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.3.2 hooks: - id: ruff - id: ruff-format diff --git a/MANIFEST.in b/MANIFEST.in index ae5bffc24a..46b272c504 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,8 @@ prune message_ix_models/data/test/iea prune message_ix_models/data/test/MESSAGEix-GLOBIOM_1.1_R11_no-policy_baseline prune message_ix_models/data/test/ssp prune message_ix_models/data/water/* +# Larger package data +# - Not distributed on PyPI. +# - Should be fetched with Pooch from GitHub. +exclude message_ix_models/data/ssp/*.gz exclude message_ix_models/data/water/*.tar.xz diff --git a/doc/api/util.rst b/doc/api/util.rst index 87800c156a..3fe5aaeeea 100644 --- a/doc/api/util.rst +++ b/doc/api/util.rst @@ -34,6 +34,7 @@ Commonly used: check_support convert_units copy_column + datetime_now_with_tz ffill identify_nodes iter_keys @@ -52,6 +53,7 @@ Commonly used: same_node same_time series_of_pint_quantity + show_versions .. automodule:: message_ix_models.util :members: diff --git a/doc/cli.rst b/doc/cli.rst index 7a1393a859..7341da56a8 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -8,7 +8,6 @@ Our goal is that the *semantics* of all commands are similar, so that interactin .. contents:: :local: - Controlling CLI behaviour ========================= @@ -16,8 +15,9 @@ To support a variety of complex use-cases, the MESSAGEix stack takes configurati :mod:`ixmp` configuration file: :file:`config.json` --------------------------------------------------- + :mod:`ixmp` keeps track of named Platforms and their associated databases, and stores information in its :file:`config.json` file. -See :mod:`ixmp.config`. +See :ref:`ixmp:configuration` in the documentation. List existing platforms:: $ ixmp platform list @@ -27,15 +27,29 @@ To add a specific database, you can use the ixmp CLI [1]_:: $ ixmp platform add [PLATFORMNAME] jdbc oracle [COMPUTER]:[PORT]/[SERVICENAME] [USERNAME] [PASSWORD] You may also want to make this the *default* platform. -Unless told otherwise, :mod:`message_ix_models` creates :class:`~ixmp.Platform` objects without any arguments (``mp = ixmp.Platform()``); this loads the default platform. +Unless told otherwise, :mod:`message_ix_models` creates :class:`~ixmp.Platform` objects without any arguments (:py:`mp = ixmp.Platform()`); this loads the default platform. Set the default:: $ ixmp platform add default [PLATFORMNAME] -:mod:`message_ix` stores only one configuration value in :file:`config.json`: ``'message model dir'``, the path to the GAMS model files. -MESSAGEix-GLOBIOM uses the GAMS model files from the current :mod:`message_ix` ``master`` branch, so you should not set this, or unset it when using :mod:`message_ix_models`. +:mod:`message_ix` recognizes the following :file:`config.json` value: + +``message_model_dir`` + Path to the GAMS model files. + Most code in MESSAGEix-GLOBIOM expects the GAMS model files from the current :mod:`message_ix` ``main`` branch, so you should not set this, or unset it when using :mod:`message_ix_models`. + +:mod:`message_ix_models` recognizes the following 2 :file:`config.json` values: + +``message_local_data`` + Path to local data, if it is set and not overridden. +``no_message_data`` + If not set or :any:`False`, then the CLI displays a warning message if the private :mod:`message_data` package is not installed:: -:mod:`message_ix_models` will use the :file:`config.json` value ``"message_local_data"`` for local data, if it is set and not overridden. + Warning: message_data is not installed or cannot be imported; see the documentation via --help + + If set to :any:`True`, then the message is suppressed:: + + $ mix-models config set no_message_data true .. [1] ``[COMPUTER]`` is in this case either the hostname or the IP address. @@ -46,7 +60,6 @@ For example, ixmp responds to ``IXMP_DATA``, which tells it where to find the fi :mod:`message_ix_models` responds to ``MESSAGE_LOCAL_DATA``; see :ref:`the discussion of local data `. - CLI parameters (arguments and options) -------------------------------------- @@ -63,12 +76,12 @@ Consider the following examples:: In these examples: -- ``--opt0`` is an option that (potentially) affects **any** command, including the subcommands ``cmd2`` or ``cmd3``. -- ``--opt1`` and ``arg1`` are an option and mandatory argument to the command ``cmd1``. - They might not have any relevance to other ``mix-data`` commands. -- ``cmd2`` and ``cmd3`` are distinct subcommands of ``cmd1``. +- :program:`--opt0` is an option that (potentially) affects **any** command, including the subcommands :program:`cmd2` or :program:`cmd3`. +- :program:`--opt1` and :program:`arg1` are an option and mandatory argument to the command :program:`cmd1`. + They might not have any relevance to other :program:`mix-models` commands. +- :program:`cmd2` and :program:`cmd3` are distinct subcommands of :program:`cmd1`. - - They *may* respond to ``--opt1`` and ``arg1``, and to ``--opt0``; at least, they *must* not contradict them. + - They *may* respond to :program:`--opt1` and :program:`arg1`, and to :program:`--opt0`; at least, they *must* not contradict them. - They each may have their own options and arguments, which can be distinct. .. tip:: Use ``--help`` for any (sub)command to read about its behaviour. @@ -80,7 +93,7 @@ For some features of the code, the default behaviour is very elaborate and serve This default behaviour or optional behaviour is defined by reading an input file. These are stored in the :ref:`package data ` directory. -For example, ``mix-models report`` loads reporting configuration from :file:`message_ix_models/data/report/global.yaml`, a YAML file with hundreds of lines. +For example, :program:`mix-models report` loads reporting configuration from :file:`message_ix_models/data/report/global.yaml`, a YAML file with hundreds of lines. Optionally, a different file can be used:: $ mix-models report --config other @@ -99,77 +112,86 @@ Important CLI options and commands Top-level options and commands ------------------------------ -``mix-models --help`` describes these:: +:program:`mix-models --help` describes these:: - $ mix-models --help Usage: mix-models [OPTIONS] COMMAND [ARGS]... Command-line interface for MESSAGEix-GLOBIOM model tools. Every tool and script in this repository is accessible through this CLI. Scripts are grouped into commands and sub-commands. For help on specific - (sub)commands, use --help, e.g.: + (sub)commands, use --help, for instance: - mix-models cd-links --help - mix-models cd-links run --help + mix-models report --help + mix-models ssp gen-structures --help The top-level options --platform, --model, and --scenario are used by - commands that access specific message_ix scenarios; these can also be - specified with --url. + commands that access specific MESSAGEix scenarios in a specific ixmp + platform/database; these can also be specified with --url. - For more information, see - https://docs.messageix.org/projects/models2/en/latest/cli.html + For complete documentation, see + https://docs.messageix.org/projects/models/en/latest/cli.html Options: --url ixmp://PLATFORM/MODEL/SCENARIO[#VERSION] Scenario URL. - --platform PLATFORM Configured platform name. + --platform PLATFORM ixmp platform name. --model MODEL Model name for some commands. --scenario SCENARIO Scenario name for some commands. - --version INTEGER Scenario version. + --version INTEGER Scenario version for some commands. --local-data PATH Base path for local data. -v, --verbose Print DEBUG-level log messages. --help Show this message and exit. Commands: - cd-links CD-LINKS project. - dl Retrieve data from primary sources. - engage ENGAGE project. - iiasapp Import power plant capacity. - material Model with materials accounting. - prep-submission Prepare scenarios for submission to database. - report Postprocess results. - res MESSAGE-GLOBIOM reference energy system (RES). - techs Export data from data/technology.yaml to CSV. - transport MESSAGEix-Transport variant. - -To explain further: - -``--platform PLATFORM`` or ``--url`` + buildings MESSAGEix-Buildings model. + cd-links CD-LINKS project. + config Get and set configuration keys. + covid COVID project. + engage ENGAGE project. + export-test-data Prepare data for testing. + fetch Retrieve data from primary sources. + iiasapp Import power plant capacity. + last-log Show the location of the last log file, if any. + material Model with materials accounting. + model MESSAGEix-GLOBIOM reference energy system (RES). + navigate NAVIGATE project. + prep-submission Prepare scenarios for submission to an IIASA Scenario... + report Postprocess results. + res MESSAGEix-GLOBIOM reference energy system (RES). + ssp Shared Socioeconomic Pathways (SSP) project. + techs Export metadata to technology.csv. + testing Manipulate test data. + transport MESSAGEix-Transport variant. + water-ix MESSAGEix-Water and Nexus variant. + +Further information about the top-level options: + +:program:`--platform PLATFORM` or :program:`--url` By default, message_data connects to the default ixmp Platform. These options direct it to work with a different Platform. -``--model MODEL --scenario SCENARIO`` or ``--url`` +:program:`--model MODEL --scenario SCENARIO` or :program:`--url` Many commands use an *existing* |Scenario| as a starting point, and begin by cloning that Scenario to a new (model name, scenario name). For any such command, these top-level options define the starting point/initial Scenario to clone/‘baseline’. - In contrast, see ``--output-model``, below. - + In contrast, see :program:`--output-model`, below. Common options -------------- + Since :mod:`message_ix_models.model` and :mod:`message_ix_models.project` codes often perform similar tasks, their CLI options and arguments are provided in :mod:`.util.click` for easy re-use. These include: -``ssp`` argument +:program:`SSP` argument This takes one of the values 'SSP1', 'SSP2', or 'SSP3'. Commands that will not work for one or more of the SSPs should check the argument value given by the user and raise :class:`NotImplementedError`. -``--output-model NAME`` option - This option is a counterpart to the top-level ``--url/--model/--scenario`` options. +:program:`--output-model NAME` option + This option is a counterpart to the top-level :program:`--url`, :program:`--model`, or :program:`--scenario` options. A command that starts from one Scenario, and builds one or more Scenarios from it will clone *to* a new (model name, scenario name); - ``--output-model`` gives the model name. + :program:`--output-model` gives the model name. Current code generates a variety of fixed (non-configurable) scenario names; use ``--help`` for each command to see which. diff --git a/doc/conf.py b/doc/conf.py index cf4c958948..5bcb32c841 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -143,6 +143,7 @@ def local_inv(name: str, *parts: str) -> Optional[str]: ), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "pint": ("https://pint.readthedocs.io/en/stable/", None), + "platformdirs": ("https://platformdirs.readthedocs.io/en/latest", None), "pooch": ("https://www.fatiando.org/pooch/latest/", None), "pytest": ("https://docs.pytest.org/en/stable/", None), "python": ("https://docs.python.org/3/", None), diff --git a/doc/pkg-data/codelists.rst b/doc/pkg-data/codelists.rst index 95188bc151..114e1734b3 100644 --- a/doc/pkg-data/codelists.rst +++ b/doc/pkg-data/codelists.rst @@ -11,12 +11,14 @@ These codelists correspond to :doc:`sets in the generic MESSAGE IAM formulation Commodities (``commodity.yaml``) ================================ -These codes hav the following annotations: +These codes have the following annotations: ``level`` (mandatory) Level where this commodity typically (not exclusively) occurs. ``units`` (mandatory) Units typically associated with this commodity. +``iea-eweb-flow`` (optional) + List of ``FLOW`` codes from the IEA :ref:`tools-iea-web` associated with this MESSAGEix-GLOBIOM commodity. ``iea-eweb-product`` (optional) List of ``PRODUCT`` codes from the IEA :ref:`tools-iea-web` associated with this MESSAGEix-GLOBIOM commodity. diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index 79b30867bd..36f8ababa3 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -4,6 +4,29 @@ What's new .. Next release .. ============ +v2024.4.2 +========= + +- The :class:`.SSPUpdate` data provider pulls data from the SSP 2024 "Release 3.0" data files, and handles both the earlier and current structures (:pull:`156`). +- Improve :class:`.ExoDataSource` with :meth:`.raise_on_extra_kw` utility method, automatic copy of source keyword arguments (:pull:`156`). +- Expose :func:`.node.nodes_ex_world` for use as a genno (reporting) operator. +- Raise DeprecationWarning from :func:`.util.sdmx.eval_anno`; remove internal usage of this deprecated method (:pull:`156`). +- Reduce verbosity when using the :program:`mix-models` CLI when :mod:`message_data` is not installed (:issue:`37`, :pull:`156`). +- Improve logging (:pull:`156`). + + - Use multi-threaded logging for better performance. + Logging to stdout and file is on a separate thread and does not block operations on the main thread. + - Add automatic file logging. + Log versions of packages to file when using :func:`.workflow.make_click_command`. + - New CLI command :program:`mix-models last-log` to retrieve the location of the latest log file. +- Update :doc:`cli` (:pull:`156`). +- Improve performance in :func:`.disutility.data_conversion` (:pull:`156`). +- Use :func:`platformdirs.user_cache_path` in more places; remove cache-path handling code (:pull:`156`). +- Add :func:`.util.datetime_now_with_tz` (:pull:`156`). +- Add :func:`.util.show_versions`, wrapping :func:`ixmp.util.show_versions` and returning its output as :class:`str` (:pull:`156`). +- :func:`.util.private_data_path` returns an alternate, local data path if :mod:`message_data` is not installed (:pull:`156`). +- Annotate :py:`c="transport"` in :ref:`the commodity code list ` with associated :ref:`IEA (E)WEB ` flows (:pull:`153`). + v2024.1.29 ========== diff --git a/message_ix_models/__init__.py b/message_ix_models/__init__.py index 9c120879ef..ad8863248c 100644 --- a/message_ix_models/__init__.py +++ b/message_ix_models/__init__.py @@ -20,8 +20,8 @@ # Package is not installed __version__ = "999" -# No logging to stdout (console) by default -setup_logging(console=False) +# By default, no logging to console/stdout or to file +setup_logging(console=False, file=False) # Use iam_units.registry as the default pint.UnitsRegistry pint.set_application_registry(registry) diff --git a/message_ix_models/cli.py b/message_ix_models/cli.py index 508696dbcc..a80791e4de 100644 --- a/message_ix_models/cli.py +++ b/message_ix_models/cli.py @@ -2,17 +2,20 @@ Every tool and script in this repository is accessible through this CLI. Scripts are grouped into commands and sub-commands. For help on specific (sub)commands, use --help, -e.g.: +for instance: \b - mix-models cd-links --help - mix-models cd-links run --help + mix-models report --help + mix-models ssp gen-structures --help The top-level options --platform, --model, and --scenario are used by commands that -access specific message_ix scenarios; these can also be specified with --url. +access specific MESSAGEix scenarios in a specific ixmp platform/database; these can also +be specified with --url. -For more information, see https://docs.messageix.org/projects/models2/en/latest/cli.html +For complete documentation, see +https://docs.messageix.org/projects/models/en/latest/cli.html """ + import logging import sys from pathlib import Path @@ -20,7 +23,7 @@ import click from ixmp.cli import main as ixmp_cli -from message_ix_models.util._logging import mark_time +from message_ix_models.util._logging import flush, mark_time from message_ix_models.util._logging import setup as setup_logging from message_ix_models.util.click import common_params from message_ix_models.util.context import Context @@ -34,7 +37,7 @@ @click.option( "--url", metavar="ixmp://PLATFORM/MODEL/SCENARIO[#VERSION]", help="Scenario URL." ) -@click.option("--platform", metavar="PLATFORM", help="Configured platform name.") +@click.option("--platform", metavar="PLATFORM", help="ixmp platform name.") @click.option( "--model", "model_name", metavar="MODEL", help="Model name for some commands." ) @@ -44,7 +47,7 @@ metavar="SCENARIO", help="Scenario name for some commands.", ) -@click.option("--version", type=int, help="Scenario version.") +@click.option("--version", type=int, help="Scenario version for some commands.") @click.option("--local-data", type=Path, help="Base path for local data.") @common_params("verbose") @click.pass_context @@ -52,8 +55,19 @@ def main(click_ctx, **kwargs): # Start timer mark_time(quiet=True) - # Log to console - setup_logging(level="DEBUG" if kwargs.pop("verbose") else "INFO", console=True) + # Check for a non-trivial execution of the CLI + non_trivial = ( + not any(s in sys.argv for s in {"last-log", "--help"}) + and click_ctx.invoked_subcommand != "_test" + and "pytest" not in sys.argv[0] + ) + + # Log to console: either DEBUG or INFO. + # Don't start file logging for a non-trivial execution. + setup_logging(level="DEBUG" if kwargs.pop("verbose") else "INFO", file=non_trivial) + + if "pytest" not in sys.argv[0]: + log.debug("CLI invoked with:\n" + "\n ".join(sys.argv)) # Store the most recently created instance of message_ix_models.Context. click # carries this object to any subcommand decorated with @click.pass_obj. @@ -68,6 +82,9 @@ def main(click_ctx, **kwargs): # Close any database connections when the CLI exits click_ctx.call_on_close(click_ctx.obj.close_db) + # Ensure all log messages are handled + click_ctx.call_on_close(flush) + @main.command("export-test-data") @click.option("--exclude", default="", help="Sheets to exclude.") @@ -94,6 +111,17 @@ def export_test_data_cmd(ctx, exclude, nodes, techs): mark_time() +@main.command("last-log") +@click.pass_obj +def last_log(ctx): + """Show the location of the last log file, if any.""" + from platformdirs import user_log_path + + log_dir = user_log_path("message-ix-models") + if log_files := sorted(log_dir.glob("*T*")): + print(log_files[-1]) + + @main.command(hidden=True) @click.pass_obj def debug(ctx): @@ -102,6 +130,26 @@ def debug(ctx): log.debug(ctx.local_data) +@main.group("_test", hidden=True) +def cli_test_group(): + """Hidden group of CLI commands. + + Other code which needs to test CLI behaviour **may** attach temporary/throw-away + commands to this group and then invoke them using :func:`mix_models_cli`. This + avoids the need to expose additional commands for testing purposes only. + """ + + +@cli_test_group.command("log-threads") +@click.argument("k", type=int) +@click.argument("N", type=int) +def _log_threads(k: int, n: int): + # Emit many log records + log = logging.getLogger("message_ix_models") + for i in range(n): + log.info(f"{k = } {i = }") + + # Attach the ixmp "config" CLI main.add_command(ixmp_cli.commands["config"]) @@ -123,11 +171,23 @@ def debug(ctx): import message_data.cli except ImportError: # message_data is not installed or contains some ImportError of its own - from traceback import format_exception - - # Display information for debugging - etype, value, tb = sys.exc_info() - print("", *format_exception(etype, value, tb, limit=-1, chain=False)[1:], sep="\n") + import ixmp + + if ixmp.config.get("no message_data") is not True: + print( + "Warning: message_data is not installed or cannot be imported; see the " + "documentation via --help" + ) + + # commented: Display verbose information for debugging + # from traceback import format_exception + # + # etype, value, tb = sys.exc_info() + # print( + # "", + # *format_exception(etype, value, tb, limit=-1, chain=False)[1:], + # sep="\n", + # ) else: # pragma: no cover (needs message_data) # Also add message_data submodules submodules.extend( @@ -145,3 +205,7 @@ def debug(ctx): continue main.add_command(cmd) + + +if __name__ == "__main__": + main() diff --git a/message_ix_models/data/commodity.yaml b/message_ix_models/data/commodity.yaml index 0c41b45d31..bf257ccbd2 100644 --- a/message_ix_models/data/commodity.yaml +++ b/message_ix_models/data/commodity.yaml @@ -141,6 +141,7 @@ transport: duty vehicles, civil aviation, freight transport, etc.) level: useful units: GWa + iea-eweb-flow: [DOMESAIR, DOMESNAV, RAIL, ROAD, TRNONSPE] # The following codes also appear in a recent (2020-02-28) SSP2 scenario, but # are not currently used by model.bare.create_res. diff --git a/message_ix_models/data/ssp/1706548837040-ssp_basic_drivers_release_3.0_full.csv.gz b/message_ix_models/data/ssp/1706548837040-ssp_basic_drivers_release_3.0_full.csv.gz new file mode 100644 index 0000000000..c7dece1260 --- /dev/null +++ b/message_ix_models/data/ssp/1706548837040-ssp_basic_drivers_release_3.0_full.csv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fffa5c7f87155d4476cd581771a496048e4e96cf4b59a7496cbb4f4fd79394f +size 20847285 diff --git a/message_ix_models/model/__init__.py b/message_ix_models/model/__init__.py index a96809062c..08625fb96b 100644 --- a/message_ix_models/model/__init__.py +++ b/message_ix_models/model/__init__.py @@ -1,4 +1,5 @@ """Code for constructing models/scenarios in the MESSAGEix-GLOBIOM model family.""" + from .config import Config __all__ = ["Config"] diff --git a/message_ix_models/model/bare.py b/message_ix_models/model/bare.py index c5020341dc..9da3d4c348 100644 --- a/message_ix_models/model/bare.py +++ b/message_ix_models/model/bare.py @@ -8,7 +8,6 @@ import message_ix_models from message_ix_models import ScenarioInfo, Spec -from message_ix_models.util import eval_anno from .build import apply_spec from .config import Config @@ -134,7 +133,9 @@ def get_spec(context) -> Spec: add.set["commodity"] = get_codes("commodity") # Add units, associated with commodities - units = set(eval_anno(commodity, "unit") for commodity in add.set["commodity"]) + units = set( + commodity.eval_annotation(id="unit") for commodity in add.set["commodity"] + ) # Deduplicate by converting to a set and then back; not strictly necessary, # but reduces duplicate log entries add.set["unit"] = sorted(filter(None, units)) diff --git a/message_ix_models/model/disutility.py b/message_ix_models/model/disutility.py index 7ff71580f3..f4e8d2a267 100644 --- a/message_ix_models/model/disutility.py +++ b/message_ix_models/model/disutility.py @@ -3,17 +3,16 @@ from copy import copy from functools import partial from itertools import product -from typing import List, Mapping, MutableMapping, Sequence, Union +from typing import List, Mapping, MutableMapping, Sequence import message_ix import pandas as pd -from sdmx.model.v21 import Annotation, Code +from sdmx.model.common import Annotation, Code from message_ix_models import ScenarioInfo, Spec from message_ix_models.model.build import apply_spec from message_ix_models.util import ( broadcast, - eval_anno, make_io, make_matched_dfs, make_source_tech, @@ -24,8 +23,6 @@ log = logging.getLogger(__name__) -CodeLike = Union[str, Code] - def add( scenario: message_ix.Scenario, @@ -81,9 +78,11 @@ def get_spec( fmt = dict(technology=t, group=g) # Format each field in the "input" and "output" annotations - input = {k: v.format(**fmt) for k, v in eval_anno(template, id="input").items()} + input = { + k: v.format(**fmt) for k, v in template.eval_annotation(id="input").items() + } output = { - k: v.format(**fmt) for k, v in eval_anno(template, id="output").items() + k: v.format(**fmt) for k, v in template.eval_annotation(id="output").items() } # - Format the ID string from the template @@ -154,7 +153,7 @@ def func(df): return func -def data_conversion(info, spec) -> MutableMapping[str, pd.DataFrame]: +def data_conversion(info, spec: Spec) -> MutableMapping[str, pd.DataFrame]: """Generate input and output data for disutility conversion technologies.""" common = dict( mode="all", @@ -167,7 +166,7 @@ def data_conversion(info, spec) -> MutableMapping[str, pd.DataFrame]: ) # Use the spec to retrieve information - technology = spec["add"].set["technology"] + technology: List[Code] = spec.add.set["technology"] # Data to return data0: Mapping[str, List[pd.DataFrame]] = defaultdict(list) @@ -176,8 +175,8 @@ def data_conversion(info, spec) -> MutableMapping[str, pd.DataFrame]: for t in technology: # Use the annotations on the technology Code to get information about the # commodity, level, and unit - input = eval_anno(t, "input") - output = eval_anno(t, "output") + input = t.eval_annotation(id="input") + output = t.eval_annotation(id="output") if None in (input, output): if t.id == "disutility source": continue # Data for this tech is from data_source() @@ -194,9 +193,6 @@ def data_conversion(info, spec) -> MutableMapping[str, pd.DataFrame]: **common, ) for par, df in i_o.items(): - # Broadcast across nodes - df = df.pipe(broadcast, node_loc=nodes_ex_world(info.N)).pipe(same_node) - if par == "input": # Add input of disutility df = pd.concat( @@ -205,8 +201,14 @@ def data_conversion(info, spec) -> MutableMapping[str, pd.DataFrame]: data0[par].append(df) - # Concatenate to a single data frame per parameter - data = {par: pd.concat(dfs, ignore_index=True) for par, dfs in data0.items()} + # - Concatenate to a single data frame per parameter + # - Broadcast across nodes + data = { + par: pd.concat(dfs, ignore_index=True) + .pipe(broadcast, node_loc=nodes_ex_world(info.N)) + .pipe(same_node) + for par, dfs in data0.items() + } # Create data for capacity_factor data.update(make_matched_dfs(base=data["input"], capacity_factor=1.0)) @@ -219,7 +221,7 @@ def data_source(info, spec) -> Mapping[str, pd.DataFrame]: # List of input levels where disutility commodity must exist levels = set() for t in spec["add"].set["technology"]: - input = eval_anno(t, "input") + input = t.eval_annotation(id="input") if input: levels.add(input["level"]) diff --git a/message_ix_models/model/macro.py b/message_ix_models/model/macro.py index 57419a9610..37ac1177f9 100644 --- a/message_ix_models/model/macro.py +++ b/message_ix_models/model/macro.py @@ -3,6 +3,7 @@ See :doc:`message-ix:macro` for *general* documentation on MACRO and MESSAGE-MACRO. This module contains tools specifically for using these models with MESSAGEix-GLOBIOM. """ + import logging from functools import lru_cache from itertools import product diff --git a/message_ix_models/model/snapshot.py b/message_ix_models/model/snapshot.py index f1afb820bd..60600c4cb9 100644 --- a/message_ix_models/model/snapshot.py +++ b/message_ix_models/model/snapshot.py @@ -1,4 +1,5 @@ """Prepare base models from snapshot data.""" + import logging from pathlib import Path diff --git a/message_ix_models/model/structure.py b/message_ix_models/model/structure.py index 1539672a87..20d9d492f9 100644 --- a/message_ix_models/model/structure.py +++ b/message_ix_models/model/structure.py @@ -13,7 +13,7 @@ from iam_units import registry from sdmx.model.v21 import Annotation, Code, Codelist -from message_ix_models.util import eval_anno, load_package_data, package_data_path +from message_ix_models.util import load_package_data, package_data_path from message_ix_models.util.sdmx import as_codes log = logging.getLogger(__name__) @@ -126,7 +126,7 @@ def generate_product( attributes. """ # eval() and remove the original annotation - dims = eval_anno(template, "_generate") + dims = template.eval_annotation(id="_generate") template.pop_annotation(id="_generate") def _base(dim, match): @@ -191,7 +191,7 @@ def generate_set_elements(data: MutableMapping, name) -> None: if name in {"commodity", "technology"}: process_units_anno(name, code, quiet=True) - if eval_anno(code, "_generate"): + if code.eval_annotation(id="_generate"): # Requires a call to generate_product(); do these last deferred.append(code) continue diff --git a/message_ix_models/model/water/build.py b/message_ix_models/model/water/build.py index b32f836106..7b2ffc6e23 100644 --- a/message_ix_models/model/water/build.py +++ b/message_ix_models/model/water/build.py @@ -97,7 +97,7 @@ def get_spec(context: Context) -> Mapping[str, ScenarioInfo]: df_share2 = pd.DataFrame(data=d2) - df_share = df_share.append(df_share2) + df_share = pd.concat([df_share, df_share2]) df_list = df_share.values.tolist() results["map_shares_commodity_total"] = df_list @@ -147,7 +147,7 @@ def get_spec(context: Context) -> Mapping[str, ScenarioInfo]: df_share2 = pd.DataFrame(data=d2) - df_share = df_share.append(df_share2) + df_share = pd.concat([df_share, df_share2]) df_list = df_share.values.tolist() results["map_shares_commodity_total"] = df_list diff --git a/message_ix_models/model/water/cli.py b/message_ix_models/model/water/cli.py index 63fbe8b68a..5c60207423 100644 --- a/message_ix_models/model/water/cli.py +++ b/message_ix_models/model/water/cli.py @@ -263,7 +263,7 @@ def cooling(context, regions, rcps, rels): help="Default running legacy and water (full) otherwise only water, if specified", ) @common_params("output_model") -def report_cli(context, output_model, sdgs, water=False): +def report_cli(context: "Context", output_model, sdgs, water=False): """function to run the water report_full from cli to the scenario defined by the user with --url @@ -276,7 +276,7 @@ def report_cli(context, output_model, sdgs, water=False): SDG : Str Defines if and what water SDG measures are activated """ - reg = context.regions + reg = context.model.regions sc = context.get_scenario() if water: from message_ix_models.model.water.reporting import report diff --git a/message_ix_models/model/water/data/demands.py b/message_ix_models/model/water/data/demands.py index 821d678b77..8ec97fad16 100644 --- a/message_ix_models/model/water/data/demands.py +++ b/message_ix_models/model/water/data/demands.py @@ -388,17 +388,20 @@ def add_sectoral_demands(context: "Context"): ) urban_dis["value"] = (1e-3 * urban_dis["value"]) * (1 - urban_dis["rate"]) - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + urban_dis["node"], - commodity="urban_disconnected", - level="final", - year=urban_dis["year"], - time=urban_dis["time"], - value=urban_dis["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + urban_dis["node"], + commodity="urban_disconnected", + level="final", + year=urban_dis["year"], + time=urban_dis["time"], + value=urban_dis["value"], + unit="km3/year", + ), + ] ) # rural water demand and return rural_mw = rual_withdrawal_df.reset_index(drop=True) @@ -409,17 +412,20 @@ def add_sectoral_demands(context: "Context"): ) rural_mw["value"] = (1e-3 * rural_mw["value"]) * rural_mw["rate"] - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + rural_mw["node"], - commodity="rural_mw", - level="final", - year=rural_mw["year"], - time=rural_mw["time"], - value=rural_mw["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + rural_mw["node"], + commodity="rural_mw", + level="final", + year=rural_mw["year"], + time=rural_mw["time"], + value=rural_mw["value"], + unit="km3/year", + ), + ] ) rural_dis = rual_withdrawal_df.reset_index(drop=True) @@ -430,50 +436,59 @@ def add_sectoral_demands(context: "Context"): ) rural_dis["value"] = (1e-3 * rural_dis["value"]) * (1 - rural_dis["rate"]) - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + rural_dis["node"], - commodity="rural_disconnected", - level="final", - year=rural_dis["year"], - time=rural_dis["time"], - value=rural_dis["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + rural_dis["node"], + commodity="rural_disconnected", + level="final", + year=rural_dis["year"], + time=rural_dis["time"], + value=rural_dis["value"], + unit="km3/year", + ), + ] ) # manufactury/ industry water demand and return manuf_mw = industrial_withdrawals_df.reset_index(drop=True) manuf_mw["value"] = 1e-3 * manuf_mw["value"] - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + manuf_mw["node"], - commodity="industry_mw", - level="final", - year=manuf_mw["year"], - time=manuf_mw["time"], - value=manuf_mw["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + manuf_mw["node"], + commodity="industry_mw", + level="final", + year=manuf_mw["year"], + time=manuf_mw["time"], + value=manuf_mw["value"], + unit="km3/year", + ), + ] ) manuf_uncollected_wst = industrial_return_df.reset_index(drop=True) manuf_uncollected_wst["value"] = 1e-3 * manuf_uncollected_wst["value"] - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + manuf_uncollected_wst["node"], - commodity="industry_uncollected_wst", - level="final", - year=manuf_uncollected_wst["year"], - time=manuf_uncollected_wst["time"], - value=-manuf_uncollected_wst["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + manuf_uncollected_wst["node"], + commodity="industry_uncollected_wst", + level="final", + year=manuf_uncollected_wst["year"], + time=manuf_uncollected_wst["time"], + value=-manuf_uncollected_wst["value"], + unit="km3/year", + ), + ] ) urban_collected_wst = urban_return_df.reset_index(drop=True) @@ -486,17 +501,20 @@ def add_sectoral_demands(context: "Context"): 1e-3 * urban_collected_wst["value"] ) * urban_collected_wst["rate"] - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + urban_collected_wst["node"], - commodity="urban_collected_wst", - level="final", - year=urban_collected_wst["year"], - time=urban_collected_wst["time"], - value=-urban_collected_wst["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + urban_collected_wst["node"], + commodity="urban_collected_wst", + level="final", + year=urban_collected_wst["year"], + time=urban_collected_wst["time"], + value=-urban_collected_wst["value"], + unit="km3/year", + ), + ] ) rural_collected_wst = rural_return_df.reset_index(drop=True) @@ -509,17 +527,20 @@ def add_sectoral_demands(context: "Context"): 1e-3 * rural_collected_wst["value"] ) * rural_collected_wst["rate"] - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + rural_collected_wst["node"], - commodity="rural_collected_wst", - level="final", - year=rural_collected_wst["year"], - time=rural_collected_wst["time"], - value=-rural_collected_wst["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + rural_collected_wst["node"], + commodity="rural_collected_wst", + level="final", + year=rural_collected_wst["year"], + time=rural_collected_wst["time"], + value=-rural_collected_wst["value"], + unit="km3/year", + ), + ] ) urban_uncollected_wst = urban_return_df.reset_index(drop=True) urban_uncollected_wst = urban_uncollected_wst.merge( @@ -531,17 +552,20 @@ def add_sectoral_demands(context: "Context"): 1 - urban_uncollected_wst["rate"] ) - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + urban_uncollected_wst["node"], - commodity="urban_uncollected_wst", - level="final", - year=urban_uncollected_wst["year"], - time=urban_uncollected_wst["time"], - value=-urban_uncollected_wst["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + urban_uncollected_wst["node"], + commodity="urban_uncollected_wst", + level="final", + year=urban_uncollected_wst["year"], + time=urban_uncollected_wst["time"], + value=-urban_uncollected_wst["value"], + unit="km3/year", + ), + ] ) rural_uncollected_wst = rural_return_df.reset_index(drop=True) @@ -554,17 +578,20 @@ def add_sectoral_demands(context: "Context"): 1 - rural_uncollected_wst["rate"] ) - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + rural_uncollected_wst["node"], - commodity="rural_uncollected_wst", - level="final", - year=rural_uncollected_wst["year"], - time=rural_uncollected_wst["time"], - value=-rural_uncollected_wst["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + rural_uncollected_wst["node"], + commodity="rural_uncollected_wst", + level="final", + year=rural_uncollected_wst["year"], + time=rural_uncollected_wst["time"], + value=-rural_uncollected_wst["value"], + unit="km3/year", + ), + ] ) # Add 2010 & 2015 values as historical activities to corresponding technologies h_act = dmd_df[dmd_df["year"].isin([2010, 2015])] @@ -859,17 +886,20 @@ def add_water_availability(context: "Context"): unit="km3/year", ) - dmd_df = dmd_df.append( - make_df( - "demand", - node="B" + df_gw["Region"].astype(str), - commodity="groundwater_basin", - level="water_avail_basin", - year=df_gw["year"], - time=df_gw["time"], - value=-df_gw["value"], - unit="km3/year", - ) + dmd_df = pd.concat( + [ + dmd_df, + make_df( + "demand", + node="B" + df_gw["Region"].astype(str), + commodity="groundwater_basin", + level="water_avail_basin", + year=df_gw["year"], + time=df_gw["time"], + value=-df_gw["value"], + unit="km3/year", + ), + ] ) dmd_df["value"] = dmd_df["value"].apply(lambda x: x if x <= 0 else 0) diff --git a/message_ix_models/model/water/data/infrastructure.py b/message_ix_models/model/water/data/infrastructure.py index 68e87752a7..4c09ff839d 100644 --- a/message_ix_models/model/water/data/infrastructure.py +++ b/message_ix_models/model/water/data/infrastructure.py @@ -1,8 +1,8 @@ """Prepare data for adding techs related to water distribution, - treatment in urban & rural""" +treatment in urban & rural""" from collections import defaultdict -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pandas as pd from message_ix import make_df @@ -20,66 +20,17 @@ from message_ix_models import Context -def add_infrastructure_techs(context: "Context"): - """Process water distribution data for a scenario instance. - Parameters - ---------- - context : .Context - Returns - ------- - data : dict of (str -> pandas.DataFrame) - Keys are MESSAGE parameter names such as 'input', 'fix_cost'. - Values are data frames ready for :meth:`~.Scenario.add_par`. - Years in the data include the model horizon indicated by - ``context["water build info"]``, plus the additional year 2010. - """ - # TODO reduce complexity of this function from 18 to 15 or less - # Reference to the water configuration - info = context["water build info"] - - # define an empty dictionary - results = {} - sub_time = context.time - # load the scenario from context - scen = context.get_scenario() - - year_wat = (2010, 2015, *info.Y) - - # first activity year for all water technologies is 2020 - first_year = scen.firstmodelyear - - # reading basin_delineation - FILE2 = f"basins_by_region_simpl_{context.regions}.csv" - PATH = package_data_path("water", "delineation", FILE2) - - df_node = pd.read_csv(PATH) - # Assigning proper nomenclature - df_node["node"] = "B" + df_node["BCU_name"].astype(str) - df_node["mode"] = "M" + df_node["BCU_name"].astype(str) - if context.type_reg == "country": - df_node["region"] = context.map_ISO_c[context.regions] - else: - df_node["region"] = f"{context.regions}_" + df_node["REGION"].astype(str) - - # Reading water distribution mapping from csv - path = package_data_path("water", "infrastructure", "water_distribution.xlsx") - df = pd.read_excel(path) - - techs = [ - "urban_t_d", - "urban_unconnected", - "industry_unconnected", - "rural_t_d", - "rural_unconnected", - ] - - df_non_elec = df[df["incmd"] != "electr"].reset_index() - df_dist = df_non_elec[df_non_elec["tec"].isin(techs)] - df_non_elec = df_non_elec[~df_non_elec["tec"].isin(techs)] - df_elec = df[df["incmd"] == "electr"].reset_index() - +def start_creating_input_dataframe( + sdg: str, + df_node: pd.DataFrame, + df_non_elec: pd.DataFrame, + df_dist: pd.DataFrame, + year_wat: tuple, + first_year: int, + sub_time, +) -> pd.DataFrame: + """Creates an input pd.DataFrame and adds some data to it.""" inp_df = pd.DataFrame([]) - # Input Dataframe for non elec commodities for index, rows in df_non_elec.iterrows(): inp_df = pd.concat( @@ -108,10 +59,9 @@ def add_infrastructure_techs(context: "Context"): ), ] ) - - if context.SDG != "baseline": + if sdg != "baseline": for index, rows in df_dist.iterrows(): - inp_df = pd.concat( + return pd.concat( [ inp_df, ( @@ -166,16 +116,138 @@ def add_infrastructure_techs(context: "Context"): ] ) - inp_df = inp_df.append( + return pd.concat( + [ + inp_df, + ( + make_df( + "input", + technology=rows["tec"], + value=rows["value_high"], + unit="-", + level=rows["inlvl"], + commodity=rows["incmd"], + mode="Mf", + ) + .pipe( + broadcast, + map_yv_ya_lt( + year_wat, rows["technical_lifetime_mid"], first_year + ), + node_loc=df_node["node"], + time=sub_time, + ) + .pipe(same_node) + .pipe(same_time) + ), + ] + ) + + +def add_infrastructure_techs(context: "Context"): + """Process water distribution data for a scenario instance. + Parameters + ---------- + context : .Context + Returns + ------- + data : dict of (str -> pandas.DataFrame) + Keys are MESSAGE parameter names such as 'input', 'fix_cost'. + Values are data frames ready for :meth:`~.Scenario.add_par`. + Years in the data include the model horizon indicated by + ``context["water build info"]``, plus the additional year 2010. + """ + # TODO reduce complexity of this function from 18 to 15 or less + # Reference to the water configuration + info = context["water build info"] + + # define an empty dictionary + results = {} + sub_time = context.time + # load the scenario from context + scen = context.get_scenario() + + year_wat = (2010, 2015, *info.Y) + + # first activity year for all water technologies is 2020 + first_year = scen.firstmodelyear + + # reading basin_delineation + FILE2 = f"basins_by_region_simpl_{context.regions}.csv" + PATH = package_data_path("water", "delineation", FILE2) + + df_node = pd.read_csv(PATH) + # Assigning proper nomenclature + df_node["node"] = "B" + df_node["BCU_name"].astype(str) + df_node["mode"] = "M" + df_node["BCU_name"].astype(str) + df_node["region"] = ( + context.map_ISO_c[context.regions] + if context.type_reg == "country" + else f"{context.regions}_" + df_node["REGION"].astype(str) + ) + + # Reading water distribution mapping from csv + path = package_data_path("water", "infrastructure", "water_distribution.xlsx") + df = pd.read_excel(path) + + techs = [ + "urban_t_d", + "urban_unconnected", + "industry_unconnected", + "rural_t_d", + "rural_unconnected", + ] + + df_non_elec = df[df["incmd"] != "electr"].reset_index() + df_dist = df_non_elec[df_non_elec["tec"].isin(techs)] + df_non_elec = df_non_elec[~df_non_elec["tec"].isin(techs)] + df_elec = df[df["incmd"] == "electr"].reset_index() + + inp_df = start_creating_input_dataframe( + sdg=context.SDG, + df_node=df_node, + df_non_elec=df_non_elec, + df_dist=df_dist, + year_wat=year_wat, + first_year=first_year, + sub_time=sub_time, + ) + + result_dc = prepare_input_dataframe( + context=context, + sub_time=sub_time, + year_wat=year_wat, + first_year=first_year, + df_node=df_node, + techs=techs, + df_elec=df_elec, + ) + + results_new = {par_name: pd.concat(dfs) for par_name, dfs in result_dc.items()} + + inp_df = pd.concat([inp_df, results_new["input"]]) + # inp_df.dropna(inplace = True) + results["input"] = inp_df + + # add output dataframe + df_out = df[~df["outcmd"].isna()] + df_out_dist = df_out[df_out["tec"].isin(techs)] + df_out = df_out[~df_out["tec"].isin(techs)] + + out_df = pd.DataFrame([]) + for index, rows in df_out.iterrows(): + out_df = pd.concat( + [ + out_df, ( make_df( - "input", + "output", technology=rows["tec"], - value=rows["value_high"], + value=rows["out_value_mid"], unit="-", - level=rows["inlvl"], - commodity=rows["incmd"], - mode="Mf", + level=rows["outlvl"], + commodity=rows["outcmd"], + mode="M1", ) .pipe( broadcast, @@ -187,123 +259,67 @@ def add_infrastructure_techs(context: "Context"): ) .pipe(same_node) .pipe(same_time) - ) - ) - result_dc = defaultdict(list) + ), + ] + ) - for index, rows in df_elec.iterrows(): - if rows["tec"] in techs: - if context.SDG != "baseline": - inp = make_df( - "input", - technology=rows["tec"], - value=rows["value_high"], + if context.SDG != "baseline": + out_df = pd.concat( + [ + out_df, + make_df( + "output", + technology=df_out_dist["tec"], + value=df_out_dist["out_value_mid"], unit="-", - level="final", - commodity="electr", + level=df_out_dist["outlvl"], + commodity=df_out_dist["outcmd"], mode="Mf", - time_origin="year", - node_loc=df_node["node"], - node_origin=df_node["region"], - ).pipe( + ) + .pipe( broadcast, - map_yv_ya_lt( - year_wat, - # 1 because elec commodities don't have technical lifetime - 1, - first_year, - ), + map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), + node_loc=df_node["node"], time=sub_time, ) - - result_dc["input"].append(inp) - else: - inp = make_df( - "input", - technology=rows["tec"], - value=rows["value_high"], + .pipe(same_node) + .pipe(same_time), + ] + ) + else: + out_df = pd.concat( + [ + out_df, + make_df( + "output", + technology=df_out_dist["tec"], + value=df_out_dist["out_value_mid"], unit="-", - level="final", - commodity="electr", - mode="Mf", - time_origin="year", - node_loc=df_node["node"], - node_origin=df_node["region"], - ).pipe( + level=df_out_dist["outlvl"], + commodity=df_out_dist["outcmd"], + mode="M1", + ) + .pipe( broadcast, - map_yv_ya_lt( - year_wat, - # 1 because elec commodities don't have technical lifetime - 1, - first_year, - ), + map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), + node_loc=df_node["node"], time=sub_time, ) - - inp = inp.append( - make_df( - "input", - technology=rows["tec"], - value=rows["value_mid"], - unit="-", - level="final", - commodity="electr", - mode="M1", - time_origin="year", - node_loc=df_node["node"], - node_origin=df_node["region"], - ).pipe( - broadcast, - # 1 because elec commodities don't have technical lifetime - map_yv_ya_lt(year_wat, 1, first_year), - time=sub_time, - ) - ) - - result_dc["input"].append(inp) - else: - inp = make_df( - "input", - technology=rows["tec"], - value=rows["value_mid"], - unit="-", - level="final", - commodity="electr", - mode="M1", - time_origin="year", - node_loc=df_node["node"], - node_origin=df_node["region"], - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, 1, first_year), - time=sub_time, - ) - - result_dc["input"].append(inp) - - results_new = {par_name: pd.concat(dfs) for par_name, dfs in result_dc.items()} - - inp_df = inp_df.append(results_new["input"]) - # inp_df.dropna(inplace = True) - results["input"] = inp_df - - # add output dataframe - df_out = df[~df["outcmd"].isna()] - df_out_dist = df_out[df_out["tec"].isin(techs)] - df_out = df_out[~df_out["tec"].isin(techs)] - - out_df = pd.DataFrame([]) - for index, rows in df_out.iterrows(): - out_df = out_df.append( - ( + .pipe(same_node) + .pipe(same_time), + ] + ) + out_df = pd.concat( + [ + out_df, make_df( "output", - technology=rows["tec"], - value=rows["out_value_mid"], + technology=df_out_dist["tec"], + value=df_out_dist["out_value_mid"], unit="-", - level=rows["outlvl"], - commodity=rows["outcmd"], - mode="M1", + level=df_out_dist["outlvl"], + commodity=df_out_dist["outcmd"], + mode="Mf", ) .pipe( broadcast, @@ -312,68 +328,8 @@ def add_infrastructure_techs(context: "Context"): time=sub_time, ) .pipe(same_node) - .pipe(same_time) - ) - ) - - if context.SDG != "baseline": - out_df = out_df.append( - make_df( - "output", - technology=df_out_dist["tec"], - value=df_out_dist["out_value_mid"], - unit="-", - level=df_out_dist["outlvl"], - commodity=df_out_dist["outcmd"], - mode="Mf", - ) - .pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) - .pipe(same_time) - ) - else: - out_df = out_df.append( - make_df( - "output", - technology=df_out_dist["tec"], - value=df_out_dist["out_value_mid"], - unit="-", - level=df_out_dist["outlvl"], - commodity=df_out_dist["outcmd"], - mode="M1", - ) - .pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) - .pipe(same_time) - ) - out_df = out_df.append( - make_df( - "output", - technology=df_out_dist["tec"], - value=df_out_dist["out_value_mid"], - unit="-", - level=df_out_dist["outlvl"], - commodity=df_out_dist["outcmd"], - mode="Mf", - ) - .pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) - .pipe(same_time) + .pipe(same_time), + ] ) results["output"] = out_df @@ -383,20 +339,23 @@ def add_infrastructure_techs(context: "Context"): cap_df = pd.DataFrame([]) # Adding capacity factor dataframe for index, rows in df_cap.iterrows(): - cap_df = cap_df.append( - make_df( - "capacity_factor", - technology=rows["tec"], - value=rows["capacity_factor_mid"], - unit="%", - ) - .pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) + cap_df = pd.concat( + [ + cap_df, + make_df( + "capacity_factor", + technology=rows["tec"], + value=rows["capacity_factor_mid"], + unit="%", + ) + .pipe( + broadcast, + map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), + node_loc=df_node["node"], + time=sub_time, + ) + .pipe(same_node), + ] ) results["capacity_factor"] = cap_df @@ -439,115 +398,249 @@ def add_infrastructure_techs(context: "Context"): var_cost = pd.DataFrame([]) for index, rows in df_inv.iterrows(): - fix_cost = fix_cost.append( - make_df( - "fix_cost", - technology=df_inv["tec"], - value=df_inv["fix_cost_mid"], - unit="USD/km3", - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], + fix_cost = pd.concat( + [ + fix_cost, + make_df( + "fix_cost", + technology=df_inv["tec"], + value=df_inv["fix_cost_mid"], + unit="USD/km3", + ).pipe( + broadcast, + map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), + node_loc=df_node["node"], + ), + ] + ) + + fix_cost = fix_cost[~fix_cost["technology"].isin(techs)] + + results["fix_cost"] = fix_cost + + df_var = df_inv[~df_inv["tec"].isin(techs)] + df_var_dist = df_inv[df_inv["tec"].isin(techs)] + + df_var = df_inv[~df_inv["tec"].isin(techs)] + df_var_dist = df_inv[df_inv["tec"].isin(techs)] + + if context.SDG != "baseline": + for index, rows in df_var.iterrows(): + # Variable cost + var_cost = pd.concat( + [ + var_cost, + make_df( + "var_cost", + technology=rows["tec"], + value=rows["var_cost_mid"], + unit="USD/km3", + mode="M1", + ).pipe( + broadcast, + map_yv_ya_lt( + year_wat, rows["technical_lifetime_mid"], first_year + ), + node_loc=df_node["node"], + time=sub_time, + ), + ] + ) + + # Variable cost for distribution technologies + for index, rows in df_var_dist.iterrows(): + var_cost = pd.concat( + [ + var_cost, + make_df( + "var_cost", + technology=rows["tec"], + value=rows["var_cost_high"], + unit="USD/km3", + mode="Mf", + ).pipe( + broadcast, + map_yv_ya_lt( + year_wat, rows["technical_lifetime_mid"], first_year + ), + node_loc=df_node["node"], + time=sub_time, + ), + ] + ) + results["var_cost"] = var_cost + else: + # Variable cost + for index, rows in df_var.iterrows(): + var_cost = pd.concat( + [ + var_cost, + make_df( + "var_cost", + technology=rows["tec"], + value=df_var["var_cost_mid"], + unit="USD/km3", + mode="M1", + ).pipe( + broadcast, + map_yv_ya_lt( + year_wat, rows["technical_lifetime_mid"], first_year + ), + node_loc=df_node["node"], + time=sub_time, + ), + ] + ) + + for index, rows in df_var_dist.iterrows(): + var_cost = pd.concat( + [ + var_cost, + make_df( + "var_cost", + technology=rows["tec"], + value=rows["var_cost_mid"], + unit="USD/km3", + mode="M1", + ).pipe( + broadcast, + map_yv_ya_lt( + year_wat, rows["technical_lifetime_mid"], first_year + ), + node_loc=df_node["node"], + time=sub_time, + ), + ] ) - ) - fix_cost = fix_cost[~fix_cost["technology"].isin(techs)] + var_cost = pd.concat( + [ + var_cost, + make_df( + "var_cost", + technology=rows["tec"], + value=rows["var_cost_high"], + unit="USD/km3", + mode="Mf", + ).pipe( + broadcast, + map_yv_ya_lt( + year_wat, rows["technical_lifetime_mid"], first_year + ), + node_loc=df_node["node"], + time=sub_time, + ), + ] + ) + results["var_cost"] = var_cost - results["fix_cost"] = fix_cost + return results - df_var = df_inv[~df_inv["tec"].isin(techs)] - df_var_dist = df_inv[df_inv["tec"].isin(techs)] - df_var = df_inv[~df_inv["tec"].isin(techs)] - df_var_dist = df_inv[df_inv["tec"].isin(techs)] +def prepare_input_dataframe( + context: "Context", + sub_time, + year_wat: tuple, + first_year: int, + df_node: pd.DataFrame, + techs: list[str], + df_elec: pd.DataFrame, +) -> defaultdict[Any, list]: + result_dc = defaultdict(list) - if context.SDG != "baseline": - for index, rows in df_var.iterrows(): - # Variable cost - var_cost = var_cost.append( - make_df( - "var_cost", + for _, rows in df_elec.iterrows(): + if rows["tec"] in techs: + if context.SDG != "baseline": + inp = make_df( + "input", technology=rows["tec"], - value=rows["var_cost_mid"], - unit="USD/km3", - mode="M1", + value=rows["value_high"], + unit="-", + level="final", + commodity="electr", + mode="Mf", + time_origin="year", + node_loc=df_node["node"], + node_origin=df_node["region"], ).pipe( broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], + map_yv_ya_lt( + year_wat, + # 1 because elec commodities don't have technical lifetime + 1, + first_year, + ), time=sub_time, ) - ) - # Variable cost for distribution technologies - for index, rows in df_var_dist.iterrows(): - var_cost = var_cost.append( - make_df( - "var_cost", + result_dc["input"].append(inp) + else: + inp = make_df( + "input", technology=rows["tec"], - value=rows["var_cost_high"], - unit="USD/km3", + value=rows["value_high"], + unit="-", + level="final", + commodity="electr", mode="Mf", - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), + time_origin="year", node_loc=df_node["node"], - time=sub_time, - ) - ) - results["var_cost"] = var_cost - else: - # Variable cost - for index, rows in df_var.iterrows(): - var_cost = var_cost.append( - make_df( - "var_cost", - technology=rows["tec"], - value=df_var["var_cost_mid"], - unit="USD/km3", - mode="M1", + node_origin=df_node["region"], ).pipe( broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], + map_yv_ya_lt( + year_wat, + # 1 because elec commodities don't have technical lifetime + 1, + first_year, + ), time=sub_time, ) - ) - for index, rows in df_var_dist.iterrows(): - var_cost = var_cost.append( - make_df( - "var_cost", - technology=rows["tec"], - value=rows["var_cost_mid"], - unit="USD/km3", - mode="M1", - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, + inp = pd.concat( + [ + inp, + make_df( + "input", + technology=rows["tec"], + value=rows["value_mid"], + unit="-", + level="final", + commodity="electr", + mode="M1", + time_origin="year", + node_loc=df_node["node"], + node_origin=df_node["region"], + ).pipe( + broadcast, + # 1 because elec commodities don't have technical lifetime + map_yv_ya_lt(year_wat, 1, first_year), + time=sub_time, + ), + ] ) - ) - var_cost = var_cost.append( - make_df( - "var_cost", - technology=rows["tec"], - value=rows["var_cost_high"], - unit="USD/km3", - mode="Mf", - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["technical_lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) + result_dc["input"].append(inp) + else: + inp = make_df( + "input", + technology=rows["tec"], + value=rows["value_mid"], + unit="-", + level="final", + commodity="electr", + mode="M1", + time_origin="year", + node_loc=df_node["node"], + node_origin=df_node["region"], + ).pipe( + broadcast, + map_yv_ya_lt(year_wat, 1, first_year), + time=sub_time, ) - results["var_cost"] = var_cost - return results + result_dc["input"].append(inp) + return result_dc def add_desalination(context: "Context"): @@ -691,35 +784,41 @@ def add_desalination(context: "Context"): for index, rows in df_desal.iterrows(): # Fixed costs # Prepare dataframe for fix_cost - fix_cost = fix_cost.append( - make_df( - "fix_cost", - technology=rows["tec"], - value=rows["fix_cost_mid"], - unit="USD/km3", - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), - node_loc=df_node["node"], - ) + fix_cost = pd.concat( + [ + fix_cost, + make_df( + "fix_cost", + technology=rows["tec"], + value=rows["fix_cost_mid"], + unit="USD/km3", + ).pipe( + broadcast, + map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), + node_loc=df_node["node"], + ), + ] ) results["fix_cost"] = fix_cost # Variable cost - var_cost = var_cost.append( - make_df( - "var_cost", - technology=rows["tec"], - value=rows["var_cost_mid"], - unit="USD/km3", - mode="M1", - ).pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) + var_cost = pd.concat( + [ + var_cost, + make_df( + "var_cost", + technology=rows["tec"], + value=rows["var_cost_mid"], + unit="USD/km3", + mode="M1", + ).pipe( + broadcast, + map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), + node_loc=df_node["node"], + time=sub_time, + ), + ] ) # Dummy Variable cost for salinewater extrqction @@ -736,17 +835,20 @@ def add_desalination(context: "Context"): results["var_cost"] = var_cost - tl = tl.append( - ( - make_df( - "technical_lifetime", - technology=df_desal["tec"], - value=df_desal["lifetime_mid"], - unit="y", - ) - .pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) - .pipe(same_node) - ) + tl = pd.concat( + [ + tl, + ( + make_df( + "technical_lifetime", + technology=df_desal["tec"], + value=df_desal["lifetime_mid"], + unit="y", + ) + .pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) + .pipe(same_node) + ), + ] ) results["technical_lifetime"] = tl @@ -809,56 +911,62 @@ def add_desalination(context: "Context"): results_new = {par_name: pd.concat(dfs) for par_name, dfs in result_dc.items()} - inp_df = inp_df.append(results_new["input"]) + inp_df = pd.concat([inp_df, results_new["input"]]) # Adding input dataframe for index, rows in df_desal.iterrows(): - inp_df = inp_df.append( - ( - make_df( - "input", - technology=rows["tec"], - value=1, - unit="-", - level=rows["inlvl"], - commodity=rows["incmd"], - mode="M1", - ) - .pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) - .pipe(same_time) - ) + inp_df = pd.concat( + [ + inp_df, + ( + make_df( + "input", + technology=rows["tec"], + value=1, + unit="-", + level=rows["inlvl"], + commodity=rows["incmd"], + mode="M1", + ) + .pipe( + broadcast, + map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), + node_loc=df_node["node"], + time=sub_time, + ) + .pipe(same_node) + .pipe(same_time) + ), + ] ) inp_df.dropna(inplace=True) results["input"] = inp_df - out_df = out_df.append( - ( - make_df( - "output", - technology=rows["tec"], - value=1, - unit="-", - level=rows["outlvl"], - commodity=rows["outcmd"], - mode="M1", - ) - .pipe( - broadcast, - map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) - .pipe(same_time) - ) + out_df = pd.concat( + [ + out_df, + ( + make_df( + "output", + technology=rows["tec"], + value=1, + unit="-", + level=rows["outlvl"], + commodity=rows["outcmd"], + mode="M1", + ) + .pipe( + broadcast, + map_yv_ya_lt(year_wat, rows["lifetime_mid"], first_year), + node_loc=df_node["node"], + time=sub_time, + ) + .pipe(same_node) + .pipe(same_time) + ), + ] ) results["output"] = out_df diff --git a/message_ix_models/model/water/data/irrigation.py b/message_ix_models/model/water/data/irrigation.py index d8cf67259b..484277c347 100644 --- a/message_ix_models/model/water/data/irrigation.py +++ b/message_ix_models/model/water/data/irrigation.py @@ -1,6 +1,8 @@ """Prepare data for water use for cooling & energy technologies.""" from typing import TYPE_CHECKING +from typing import TYPE_CHECKING + import pandas as pd from message_ix import make_df @@ -36,10 +38,11 @@ def add_irr_structure(context: "Context"): # Assigning proper nomenclature df_node["node"] = "B" + df_node["BCU_name"].astype(str) df_node["mode"] = "M" + df_node["BCU_name"].astype(str) - if context.type_reg == "country": - df_node["region"] = context.map_ISO_c[context.regions] - else: - df_node["region"] = f"{context.regions}_" + df_node["REGION"].astype(str) + df_node["region"] = ( + context.map_ISO_c[context.regions] + if context.type_reg == "country" + else f"{context.regions}_" + df_node["REGION"].astype(str) + ) # Reference to the water configuration info = context["water build info"] @@ -62,36 +65,42 @@ def add_irr_structure(context: "Context"): node_loc=df_node["region"], ).pipe(broadcast, year_vtg=info.Y) - inp = inp.append( - make_df( - "input", - technology="irrigation_oilcrops", - value=1, - unit="-", - level="water_supply", - commodity="freshwater", - mode="M1", - time="year", - time_origin="year", - node_origin=df_node["region"], - node_loc=df_node["region"], - ).pipe(broadcast, year_vtg=info.Y) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="irrigation_oilcrops", + value=1, + unit="-", + level="water_supply", + commodity="freshwater", + mode="M1", + time="year", + time_origin="year", + node_origin=df_node["region"], + node_loc=df_node["region"], + ).pipe(broadcast, year_vtg=info.Y), + ] ) - inp = inp.append( - make_df( - "input", - technology="irrigation_sugarcrops", - value=1, - unit="-", - level="water_supply", - commodity="freshwater", - mode="M1", - time="year", - time_origin="year", - node_origin=df_node["region"], - node_loc=df_node["region"], - ).pipe(broadcast, year_vtg=info.Y) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="irrigation_sugarcrops", + value=1, + unit="-", + level="water_supply", + commodity="freshwater", + mode="M1", + time="year", + time_origin="year", + node_origin=df_node["region"], + node_loc=df_node["region"], + ).pipe(broadcast, year_vtg=info.Y), + ] ) # year_act = year_vts for tecs with 1 time-step lifetime inp["year_act"] = inp["year_vtg"] @@ -105,7 +114,7 @@ def add_irr_structure(context: "Context"): # Average Value :0.101598174 # High Value : 0.017123288 - # inp = inp.append( + # inp = pd.concat([inp, # make_df( # "input", # technology="irrigation_sugarcrops", @@ -119,9 +128,9 @@ def add_irr_structure(context: "Context"): # node_origin=df_node["region"], # node_loc=df_node["region"], # ).pipe(broadcast, year_vtg=year_wat, year_act=year_wat) - # ) + # ]) # - # inp = inp.append( + # inp = pd.concat([inp, # make_df( # "input", # technology="irrigation_oilcrops", @@ -135,9 +144,9 @@ def add_irr_structure(context: "Context"): # node_origin=df_node["region"], # node_loc=df_node["region"], # ).pipe(broadcast, year_vtg=year_wat, year_act=year_wat) - # ) + # ]) # - # inp = inp.append( + # inp = pd.concat([inp, # make_df( # "input", # technology="irrigation_cereal", @@ -151,7 +160,7 @@ def add_irr_structure(context: "Context"): # node_origin=df_node["region"], # node_loc=df_node["region"], # ).pipe(broadcast, year_vtg=year_wat, year_act=year_wat) - # ) + # ]) # inp.loc[(inp['node_loc'] == 'R11_SAS') & # (inp['commodity'] == 'electr'), # "value", @@ -173,42 +182,48 @@ def add_irr_structure(context: "Context"): node_dest=df_node["region"], ).pipe(broadcast, year_vtg=info.Y) - irr_out = irr_out.append( - make_df( - "output", - technology="irrigation_sugarcrops", - value=1, - unit="km3/year", - level="irr_sugarcrops", - commodity="freshwater", - mode="M1", - time="year", - time_dest="year", - node_loc=df_node["region"], - node_dest=df_node["region"], - ).pipe( - broadcast, - year_vtg=info.Y, - ) + irr_out = pd.concat( + [ + irr_out, + make_df( + "output", + technology="irrigation_sugarcrops", + value=1, + unit="km3/year", + level="irr_sugarcrops", + commodity="freshwater", + mode="M1", + time="year", + time_dest="year", + node_loc=df_node["region"], + node_dest=df_node["region"], + ).pipe( + broadcast, + year_vtg=info.Y, + ), + ] ) - irr_out = irr_out.append( - make_df( - "output", - technology="irrigation_oilcrops", - value=1, - unit="km3/year", - level="irr_oilcrops", - commodity="freshwater", - mode="M1", - time="year", - time_dest="year", - node_loc=df_node["region"], - node_dest=df_node["region"], - ).pipe( - broadcast, - year_vtg=info.Y, - ) + irr_out = pd.concat( + [ + irr_out, + make_df( + "output", + technology="irrigation_oilcrops", + value=1, + unit="km3/year", + level="irr_oilcrops", + commodity="freshwater", + mode="M1", + time="year", + time_dest="year", + node_loc=df_node["region"], + node_dest=df_node["region"], + ).pipe( + broadcast, + year_vtg=info.Y, + ), + ] ) irr_out["year_act"] = irr_out["year_vtg"] diff --git a/message_ix_models/model/water/data/pre_processing/hydro_agg_basin.py b/message_ix_models/model/water/data/pre_processing/hydro_agg_basin.py index 6e19eb0044..5a3e245510 100644 --- a/message_ix_models/model/water/data/pre_processing/hydro_agg_basin.py +++ b/message_ix_models/model/water/data/pre_processing/hydro_agg_basin.py @@ -156,9 +156,9 @@ def bias_correction(df): final_temp = pd.concat((final_temp, temp), axis=1) df_monthly = final_temp - df[ - pd.date_range(str(year) + "-01-01", periods=12, freq="M") - ] = final_temp.groupby(final_temp.columns.month, axis=1).mean() + df[pd.date_range(str(year) + "-01-01", periods=12, freq="M")] = ( + final_temp.groupby(final_temp.columns.month, axis=1).mean() + ) # 5 year monthly data df_5y_m = df[df.columns[df.columns.year.isin(years)]] # 5 year annual @@ -319,9 +319,9 @@ def bias_correction(df): ] df_monthly = final_temp - eflow[ - pd.date_range(str(year) + "-01-01", periods=12, freq="M") - ] = final_temp.groupby(final_temp.columns.month, axis=1).mean() + eflow[pd.date_range(str(year) + "-01-01", periods=12, freq="M")] = ( + final_temp.groupby(final_temp.columns.month, axis=1).mean() + ) eflow_5y_m = eflow[eflow.columns[eflow.columns.year.isin(years)]] eflow_5y_m.to_csv(wd11 + f"e-flow_5y_m_7p0_{iso3}.csv") diff --git a/message_ix_models/model/water/data/pre_processing/hydro_agg_raster.py b/message_ix_models/model/water/data/pre_processing/hydro_agg_raster.py index b7c3dd8632..0c9d582cd9 100644 --- a/message_ix_models/model/water/data/pre_processing/hydro_agg_raster.py +++ b/message_ix_models/model/water/data/pre_processing/hydro_agg_raster.py @@ -5,6 +5,7 @@ data onto the basin mapping used in the nexus module. """ + import glob # Import packages diff --git a/message_ix_models/model/water/data/water_for_ppl.py b/message_ix_models/model/water/data/water_for_ppl.py index e1350f2f7b..564fcd44b8 100644 --- a/message_ix_models/model/water/data/water_for_ppl.py +++ b/message_ix_models/model/water/data/water_for_ppl.py @@ -18,6 +18,151 @@ from message_ix_models import Context +def missing_tech(x: pd.Series) -> pd.Series: + """Assign values to missing data. + It goes through the input data frame and extract the technologies which + don't have input values and then assign manual values to those technologies + along with assigning them an arbitrary level i.e dummy supply + """ + data_dic = { + "geo_hpl": 1 / 0.850, + "geo_ppl": 1 / 0.385, + "nuc_hc": 1 / 0.326, + "nuc_lc": 1 / 0.326, + "solar_th_ppl": 1 / 0.385, + } + + if data_dic.get(x["technology"]): + if x["level"] == "cooling": + return pd.Series((data_dic.get(x["technology"]), "dummy_supply")) + else: + return pd.Series((data_dic.get(x["technology"]), x["level"])) + else: + return pd.Series((x["value"], x["level"])) + + +def cooling_fr(x: pd.Series) -> float: + """Calculate cooling fraction + Returns the calculated cooling fraction after for two categories; + 1. Technologies that produce heat as an output + cooling_fraction(h_cool) = input value(hi) - 1 + Simply subtract 1 from the heating value since the rest of the part is already + accounted in the heating value + 2. Rest of technologies + h_cool = hi -Hi* h_fg - 1, + where: + h_fg (flue gasses losses) = 0.1 (10% assumed losses) + """ + if "hpl" in x["index"]: + return x["value"] - 1 + else: + return x["value"] - (x["value"] * 0.1) - 1 + + +def shares( + x: pd.Series, + context: "Context", + search_cols_cooling_fraction: list, + hold_df: pd.DataFrame, + search_cols: list, +) -> pd.Series: + """Process share and cooling fraction. + Returns + ------- + Product of value of shares of cooling technology types of regions with + corresponding cooling fraction + """ + for col in search_cols_cooling_fraction: + # MAPPING ISOCODE to region name, assume one country only + col2 = context.map_ISO_c[col] if context.type_reg == "country" else col + cooling_fraction = hold_df[ + (hold_df["node_loc"] == col2) + & (hold_df["technology_name"] == x["technology"]) + ]["cooling_fraction"] + x[col] = x[col] * cooling_fraction + + results: list[Any] = [] + for i in x: + if isinstance(i, str): + results.append(i) + else: + if not len(i): + return pd.Series( + [i for i in range(len(search_cols) - 1)] + ["delme"], + index=search_cols, + ) + else: + results.append(float(i)) + return pd.Series(results, index=search_cols) + + +def hist_act(x: pd.Series, context: "Context", hold_cost: pd.DataFrame) -> list: + """Calculate historical activity of cooling technology. + The data for shares is read from ``cooltech_cost_and_shares_ssp_msg.csv`` + Returns + ------- + hist_activity(cooling_tech) = hist_activitiy(parent_technology) * share + *cooling_fraction + """ + tech_df = hold_cost[ + hold_cost["technology"].str.startswith(x.technology) + ] # [x.node_loc] + + node_search = context.regions if context.type_reg == "country" else x["node_loc"] + + node_loc = x["node_loc"] + technology = x["technology"] + cooling_technologies = list(tech_df["technology"]) + new_values = tech_df[node_search] * x.value + + return [ + [ + node_loc, + technology, + cooling_technology, + x.year_act, + x.value, + new_value, + x.unit, + ] + for new_value, cooling_technology in zip(new_values, cooling_technologies) + ] + + +def hist_cap(x: pd.Series, context: "Context", hold_cost: pd.DataFrame) -> list: + """Calculate historical capacity of cooling technology. + The data for shares is read from ``cooltech_cost_and_shares_ssp_msg.csv`` + Returns + ------- + hist_new_capacity(cooling_tech) = historical_new_capacity(parent_technology)* + share * cooling_fraction + """ + tech_df = hold_cost[ + hold_cost["technology"].str.startswith(x.technology) + ] # [x.node_loc] + if context.type_reg == "country": + node_search = context.regions + else: + node_search = x["node_loc"] # R11_EEU + node_loc = x["node_loc"] + technology = x["technology"] + cooling_technologies = list(tech_df["technology"]) + new_values = tech_df[node_search] * x.value + + return [ + [ + node_loc, + technology, + cooling_technology, + x.year_vtg, + x.value, + new_value, + x.unit, + ] + for new_value, cooling_technology in zip(new_values, cooling_technologies) + ] + + # water & electricity for cooling technologies def cool_tech(context: "Context"): """Process cooling technology data for a scenario instance. @@ -27,9 +172,11 @@ def cool_tech(context: "Context"): It adds cooling technologies as addons to the parent technologies.The nomenclature for cooling technology is __. E.g: `coal_ppl__ot_fresh` + Parameters ---------- context : .Context + Returns ------- data : dict of (str -> pandas.DataFrame) @@ -38,7 +185,6 @@ def cool_tech(context: "Context"): Years in the data include the model horizon indicated by ``context["water build info"]``, plus the additional year 2010. """ - # TODO reduce complexity of this function from 18 to 15 or less #: Name of the input file. # The input file mentions water withdrawals and emission heating fractions for # cooling technologies alongwith parent technologies: @@ -66,10 +212,11 @@ def cool_tech(context: "Context"): # Assigning proper nomenclature df_node["node"] = "B" + df_node["BCU_name"].astype(str) df_node["mode"] = "M" + df_node["BCU_name"].astype(str) - if context.type_reg == "country": - df_node["region"] = context.map_ISO_c[context.regions] - else: - df_node["region"] = f"{context.regions}_" + df_node["REGION"].astype(str) + df_node["region"] = ( + context.map_ISO_c[context.regions] + if context.type_reg == "country" + else f"{context.regions}_" + df_node["REGION"].astype(str) + ) node_region = df_node["region"].unique() # reading ppl cooling tech dataframe @@ -88,41 +235,20 @@ def cool_tech(context: "Context"): # Extracting input database from scenario for parent technologies # Extracting input values from scenario - ref_input = scen.par("input", {"technology": cooling_df["parent_tech"]}) + ref_input: pd.DataFrame = scen.par( + "input", {"technology": cooling_df["parent_tech"]} + ) # Extracting historical activity from scenario - ref_hist_act = scen.par( + ref_hist_act: pd.DataFrame = scen.par( "historical_activity", {"technology": cooling_df["parent_tech"]} ) # Extracting historical capacity from scenario - ref_hist_cap = scen.par( + ref_hist_cap: pd.DataFrame = scen.par( "historical_new_capacity", {"technology": cooling_df["parent_tech"]} ) # cooling fraction = H_cool = Hi - 1 - Hi*(h_fg) # where h_fg (flue gasses losses) = 0.1 ref_input["cooling_fraction"] = ref_input["value"] * 0.9 - 1 - - def missing_tech(x): - """Assign values to missing data. - It goes through the input data frame and extract the technologies which - don't have input values and then assign manual values to those technologies - along with assigning them an arbitrary level i.e dummy supply - """ - data_dic = { - "geo_hpl": 1 / 0.850, - "geo_ppl": 1 / 0.385, - "nuc_hc": 1 / 0.326, - "nuc_lc": 1 / 0.326, - "solar_th_ppl": 1 / 0.385, - } - - if data_dic.get(x["technology"]): - if x["level"] == "cooling": - return pd.Series((data_dic.get(x["technology"]), "dummy_supply")) - else: - return pd.Series((data_dic.get(x["technology"]), x["level"])) - else: - return pd.Series((x["value"], x["level"])) - ref_input[["value", "level"]] = ref_input[["technology", "value", "level"]].apply( missing_tech, axis=1 ) @@ -153,23 +279,6 @@ def missing_tech(x): & (input_cool["node_origin"] != f"{context.regions}_GLB") ] - def cooling_fr(x): - """Calculate cooling fraction - Returns the calculated cooling fraction after for two categories; - 1. Technologies that produce heat as an output - cooling_fraction(h_cool) = input value(hi) - 1 - Simply subtract 1 from the heating value since the rest of the part is already - accounted in the heating value - 2. Rest of technologies - h_cool = hi -Hi* h_fg - 1, - where: - h_fg (flue gasses losses) = 0.1 (10% assumed losses) - """ - if "hpl" in x["index"]: - return x["value"] - 1 - else: - return x["value"] - (x["value"] * 0.1) - 1 - input_cool["cooling_fraction"] = input_cool.apply(cooling_fr, axis=1) # Converting water withdrawal units to Km3/GWa @@ -245,40 +354,46 @@ def cooling_fr(x): unit="GWa", ) # once through and closed loop freshwater - inp = inp.append( - make_df( - "input", - node_loc=icmse_df["node_loc"], - technology=icmse_df["technology_name"], - year_vtg=icmse_df["year_vtg"], - year_act=icmse_df["year_act"], - mode=icmse_df["mode"], - node_origin=icmse_df["node_origin"], - commodity="freshwater", - level="water_supply", - time="year", - time_origin="year", - value=icmse_df["value_cool"], - unit="km3/GWa", - ) + inp = pd.concat( + [ + inp, + make_df( + "input", + node_loc=icmse_df["node_loc"], + technology=icmse_df["technology_name"], + year_vtg=icmse_df["year_vtg"], + year_act=icmse_df["year_act"], + mode=icmse_df["mode"], + node_origin=icmse_df["node_origin"], + commodity="freshwater", + level="water_supply", + time="year", + time_origin="year", + value=icmse_df["value_cool"], + unit="km3/GWa", + ), + ] ) # saline cooling technologies - inp = inp.append( - make_df( - "input", - node_loc=saline_df["node_loc"], - technology=saline_df["technology_name"], - year_vtg=saline_df["year_vtg"], - year_act=saline_df["year_act"], - mode=saline_df["mode"], - node_origin=saline_df["node_origin"], - commodity="saline_ppl", - level="saline_supply", - time="year", - time_origin="year", - value=saline_df["value_cool"], - unit="km3/GWa", - ) + inp = pd.concat( + [ + inp, + make_df( + "input", + node_loc=saline_df["node_loc"], + technology=saline_df["technology_name"], + year_vtg=saline_df["year_vtg"], + year_act=saline_df["year_act"], + mode=saline_df["mode"], + node_origin=saline_df["node_origin"], + commodity="saline_ppl", + level="saline_supply", + time="year", + time_origin="year", + value=saline_df["value_cool"], + unit="km3/GWa", + ), + ] ) # Drops NA values from the value column @@ -340,7 +455,7 @@ def cooling_fr(x): # multiply by basin water availability share out_t["value"] = out_t["value"] * out_t["share"] out_t.drop(columns={"share"}, inplace=True) - out = out.append(out_t) + out = pd.concat([out, out_t]) out = out.dropna(subset=["value"]) out.reset_index(drop=True, inplace=True) @@ -368,75 +483,20 @@ def cooling_fr(x): ].drop_duplicates() search_cols_cooling_fraction = [col for col in search_cols if col != "technology"] - def shares(x, context: "Context"): - """Process share and cooling fraction. - Returns - ------- - Product of value of shares of cooling technology types of regions with - corresponding cooling fraction - """ - for col in search_cols_cooling_fraction: - # MAPPING ISOCODE to region name, assume one country only - col2 = context.map_ISO_c[col] if context.type_reg == "country" else col - cooling_fraction = hold_df[ - (hold_df["node_loc"] == col2) - & (hold_df["technology_name"] == x["technology"]) - ]["cooling_fraction"] - x[col] = x[col] * cooling_fraction - - results: list[Any] = [] - for i in x: - if isinstance(i, str): - results.append(i) - else: - if not len(i): - return pd.Series( - [i for i in range(len(search_cols) - 1)] + ["delme"], - index=search_cols, - ) - else: - results.append(float(i)) - return pd.Series(results, index=search_cols) - # Apply function to the - hold_cost = cost[search_cols].apply(shares, axis=1, context=context) + hold_cost = cost[search_cols].apply( + shares, + axis=1, + context=context, + search_cols_cooling_fraction=search_cols_cooling_fraction, + hold_df=hold_df, + search_cols=search_cols, + ) hold_cost = hold_cost[hold_cost["technology"] != "delme"] - def hist_act(x, context: "Context"): - """Calculate historical activity of cooling technology. - The data for shares is read from ``cooltech_cost_and_shares_ssp_msg.csv`` - Returns - ------- - hist_activity(cooling_tech) = hist_activitiy(parent_technology) * share - *cooling_fraction - """ - tech_df = hold_cost[ - hold_cost["technology"].str.startswith(x.technology) - ] # [x.node_loc] - - node_search = ( - context.regions if context.type_reg == "country" else x["node_loc"] - ) - - node_loc = x["node_loc"] - technology = x["technology"] - cooling_technologies = list(tech_df["technology"]) - new_values = tech_df[node_search] * x.value - - return [ - [ - node_loc, - technology, - cooling_technology, - x.year_act, - x.value, - new_value, - x.unit, - ] - for new_value, cooling_technology in zip(new_values, cooling_technologies) - ] - - changed_value_series = ref_hist_act.apply(hist_act, axis=1, context=context) + changed_value_series = ref_hist_act.apply( + hist_act, axis=1, context=context, hold_cost=hold_cost + ) changed_value_series_flat = [ row for series in changed_value_series for row in series ] @@ -452,40 +512,9 @@ def hist_act(x, context: "Context"): # dataframe for historical activities of cooling techs act_value_df = pd.DataFrame(changed_value_series_flat, columns=columns) - def hist_cap(x, context: "Context"): - """Calculate historical capacity of cooling technology. - The data for shares is read from ``cooltech_cost_and_shares_ssp_msg.csv`` - Returns - ------- - hist_new_capacity(cooling_tech) = historical_new_capacity(parent_technology)* - share * cooling_fraction - """ - tech_df = hold_cost[ - hold_cost["technology"].str.startswith(x.technology) - ] # [x.node_loc] - if context.type_reg == "country": - node_search = context.regions - else: - node_search = x["node_loc"] # R11_EEU - node_loc = x["node_loc"] - technology = x["technology"] - cooling_technologies = list(tech_df["technology"]) - new_values = tech_df[node_search] * x.value - - return [ - [ - node_loc, - technology, - cooling_technology, - x.year_vtg, - x.value, - new_value, - x.unit, - ] - for new_value, cooling_technology in zip(new_values, cooling_technologies) - ] - - changed_value_series = ref_hist_cap.apply(hist_cap, axis=1, context=context) + changed_value_series = ref_hist_cap.apply( + hist_cap, axis=1, context=context, hold_cost=hold_cost + ) changed_value_series_flat = [ row for series in changed_value_series for row in series ] @@ -613,13 +642,13 @@ def hist_cap(x, context: "Context"): adon_df = input_cool.copy() # Add 'cooling_' before name of parent technologies that are type_addon # nomenclature - adon_df["tech"] = "cooling__" + adon_df["index"].astype(str) + adon_df["tech"] = "cooling__" + adon_df["parent_tech"].astype(str) # technology : 'parent technology' and type_addon is type of addons such # as 'cooling__bio_hpl' addon_df = make_df( "addon_conversion", node=adon_df["node_loc"], - technology=adon_df["index"], + technology=adon_df["parent_tech"], year_vtg=adon_df["year_vtg"], year_act=adon_df["year_act"], mode=adon_df["mode"], @@ -766,7 +795,8 @@ def non_cooling_tec(context: "Context"): FILE = "tech_water_performance_ssp_msg.csv" path = package_data_path("water", "ppl_cooling_tech", FILE) df = pd.read_csv(path) - cooling_df = df.loc[df["technology_group"] == "cooling"] + cooling_df = df.copy() + cooling_df = cooling_df.loc[cooling_df["technology_group"] == "cooling"] # Separate a column for parent technologies of respective cooling # techs cooling_df["parent_tech"] = ( diff --git a/message_ix_models/model/water/data/water_supply.py b/message_ix_models/model/water/data/water_supply.py index d4a2a25e8b..c56c5c73ae 100644 --- a/message_ix_models/model/water/data/water_supply.py +++ b/message_ix_models/model/water/data/water_supply.py @@ -16,7 +16,7 @@ def map_basin_region_wat(context: "Context"): """ - Calculate share of water avaialbility of basins per each parent region. + Calculate share of water availability of basins per each parent region. The parent region could be global message regions or country @@ -49,16 +49,17 @@ def map_basin_region_wat(context: "Context"): # Reading data, the data is spatially and temporally aggregated from GHMs df_sw["BCU_name"] = df_x["BCU_name"] - - if context.type_reg == "country": - df_sw["MSGREG"] = context.map_ISO_c[context.regions] - else: - df_sw["MSGREG"] = f"{context.regions}_" + df_sw["BCU_name"].str[-3:] + df_sw["MSGREG"] = ( + context.map_ISO_c[context.regions] + if context.type_reg == "country" + else f"{context.regions}_" + df_sw["BCU_name"].str[-3:] + ) df_sw = df_sw.set_index(["MSGREG", "BCU_name"]) # Calculating ratio of water availability in basin by region df_sw = df_sw.groupby(["MSGREG"]).apply(lambda x: x / x.sum()) + df_sw.reset_index(level=0, drop=True, inplace=True) df_sw.reset_index(inplace=True) df_sw["Region"] = "B" + df_sw["BCU_name"].astype(str) df_sw["Mode"] = df_sw["Region"].replace(regex=["^B"], value="M") @@ -101,6 +102,7 @@ def map_basin_region_wat(context: "Context"): # Calculating ratio of water availability in basin by region df_sw = df_sw.groupby(["MSGREG"]).apply(lambda x: x / x.sum()) + df_sw.reset_index(level=0, drop=True, inplace=True) df_sw.reset_index(inplace=True) df_sw["Region"] = "B" + df_sw["BCU_name"].astype(str) df_sw["Mode"] = df_sw["Region"].replace(regex=["^B"], value="M") @@ -139,6 +141,7 @@ def add_water_supply(context: "Context"): info = context["water build info"] # load the scenario from context scen = context.get_scenario() + # scen = Scenario(context.get_platform(), **context.core.scenario_info) # year_wat = (2010, 2015) fut_year = info.Y @@ -159,10 +162,11 @@ def add_water_supply(context: "Context"): # Assigning proper nomenclature df_node["node"] = "B" + df_node["BCU_name"].astype(str) df_node["mode"] = "M" + df_node["BCU_name"].astype(str) - if context.type_reg == "country": - df_node["region"] = context.map_ISO_c[context.regions] - else: - df_node["region"] = f"{context.regions}_" + df_node["REGION"].astype(str) + df_node["region"] = ( + context.map_ISO_c[context.regions] + if context.type_reg == "country" + else f"{context.regions}_" + df_node["REGION"].astype(str) + ) # Storing the energy MESSAGE region names node_region = df_node["region"].unique() @@ -171,10 +175,11 @@ def add_water_supply(context: "Context"): FILE1 = f"gw_energy_intensity_depth_{context.regions}.csv" PATH1 = package_data_path("water", "availability", FILE1) df_gwt = pd.read_csv(PATH1) - if context.type_reg == "country": - df_gwt["region"] = context.map_ISO_c[context.regions] - else: - df_gwt["REGION"] = f"{context.regions}_" + df_gwt["REGION"].astype(str) + df_gwt["region"] = ( + context.map_ISO_c[context.regions] + if context.type_reg == "country" + else f"{context.regions}_" + df_gwt["REGION"].astype(str) + ) # reading groundwater energy intensity data FILE2 = f"historical_new_cap_gw_sw_km3_year_{context.regions}.csv" @@ -204,42 +209,48 @@ def add_water_supply(context: "Context"): ) # Add output df for groundwater supply for regions - output_df = output_df.append( - make_df( - "output", - technology="extract_groundwater", - value=1, - unit="km3", - year_vtg=year_wat, - year_act=year_wat, - level="water_supply", - commodity="freshwater", - mode="M1", - time="year", - time_dest="year", - time_origin="year", - ) - .pipe(broadcast, node_loc=node_region) - .pipe(same_node) + output_df = pd.concat( + [ + output_df, + make_df( + "output", + technology="extract_groundwater", + value=1, + unit="km3", + year_vtg=year_wat, + year_act=year_wat, + level="water_supply", + commodity="freshwater", + mode="M1", + time="year", + time_dest="year", + time_origin="year", + ) + .pipe(broadcast, node_loc=node_region) + .pipe(same_node), + ] ) # Add output of saline water supply for regions - output_df = output_df.append( - make_df( - "output", - technology="extract_salinewater", - value=1, - unit="km3", - year_vtg=year_wat, - year_act=year_wat, - level="water_supply", - commodity="saline_ppl", - mode="M1", - time="year", - time_dest="year", - time_origin="year", - ) - .pipe(broadcast, node_loc=node_region) - .pipe(same_node) + output_df = pd.concat( + [ + output_df, + make_df( + "output", + technology="extract_salinewater", + value=1, + unit="km3", + year_vtg=year_wat, + year_act=year_wat, + level="water_supply", + commodity="saline_ppl", + mode="M1", + time="year", + time_dest="year", + time_origin="year", + ) + .pipe(broadcast, node_loc=node_region) + .pipe(same_node), + ] ) results["output"] = output_df @@ -266,51 +277,56 @@ def add_water_supply(context: "Context"): .pipe(same_time) ) - # input data frame for slack technology balancing equality with demands - inp = inp.append( - make_df( - "input", - technology="gw_recharge", - value=1, - unit="-", - level="water_avail_basin", - commodity="groundwater_basin", - mode="M1", - year_vtg=year_wat, - year_act=year_wat, - ) - .pipe( - broadcast, - node_loc=df_node["node"], - time=sub_time, - ) - .pipe(same_node) - .pipe(same_time) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="gw_recharge", + value=1, + unit="-", + level="water_avail_basin", + commodity="groundwater_basin", + mode="M1", + year_vtg=year_wat, + year_act=year_wat, + ) + .pipe( + broadcast, + node_loc=df_node["node"], + time=sub_time, + ) + .pipe(same_node) + .pipe(same_time), + ] ) # input dataframe linking water supply to energy dummy technology - inp = inp.append( - make_df( - "input", - technology="basin_to_reg", - value=1, - unit="-", - level="water_supply_basin", - commodity="freshwater_basin", - mode=df_node["mode"], - node_origin=df_node["node"], - node_loc=df_node["region"], - ) - .pipe( - broadcast, - year_vtg=year_wat, - time=sub_time, - ) - .pipe(same_time) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="basin_to_reg", + value=1, + unit="-", + level="water_supply_basin", + commodity="freshwater_basin", + mode=df_node["mode"], + node_origin=df_node["node"], + node_loc=df_node["region"], + ) + .pipe( + broadcast, + year_vtg=year_wat, + time=sub_time, + ) + .pipe(same_time), + ] ) inp["year_act"] = inp["year_vtg"] # # input data frame for slack technology balancing equality with demands - # inp = inp.append( + # inp = pd.concat([inp, # make_df( # "input", # technology="salinewater_return", @@ -324,111 +340,126 @@ def add_water_supply(context: "Context"): # node_origin=df_node["node"], # node_loc=df_node["node"], # ).pipe(broadcast, year_vtg=year_wat, year_act=year_wat) - # ) + # ]) # input data frame for freshwater supply yv_ya_sw = map_yv_ya_lt(year_wat, 50, first_year) - inp = inp.append( - make_df( - "input", - technology="extract_surfacewater", - value=1, - unit="-", - level="water_avail_basin", - commodity="surfacewater_basin", - mode="M1", - node_origin=df_node["node"], - node_loc=df_node["node"], - ) - .pipe( - broadcast, - yv_ya_sw, - time=sub_time, - ) - .pipe(same_time) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="extract_surfacewater", + value=1, + unit="-", + level="water_avail_basin", + commodity="surfacewater_basin", + mode="M1", + node_origin=df_node["node"], + node_loc=df_node["node"], + ) + .pipe( + broadcast, + yv_ya_sw, + time=sub_time, + ) + .pipe(same_time), + ] ) # input dataframe for groundwater supply yv_ya_gw = map_yv_ya_lt(year_wat, 20, first_year) - inp = inp.append( - make_df( - "input", - technology="extract_groundwater", - value=1, - unit="-", - level="water_avail_basin", - commodity="groundwater_basin", - mode="M1", - node_origin=df_node["node"], - node_loc=df_node["node"], - ) - .pipe( - broadcast, - yv_ya_gw, - time=sub_time, - ) - .pipe(same_time) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="extract_groundwater", + value=1, + unit="-", + level="water_avail_basin", + commodity="groundwater_basin", + mode="M1", + node_origin=df_node["node"], + node_loc=df_node["node"], + ) + .pipe( + broadcast, + yv_ya_gw, + time=sub_time, + ) + .pipe(same_time), + ] ) # electricity input dataframe for extract freshwater supply # low: 0.001141553, mid: 0.018835616, high: 0.03652968 - inp = inp.append( - make_df( - "input", - technology="extract_surfacewater", - value=0.018835616, - unit="-", - level="final", - commodity="electr", - mode="M1", - time_origin="year", - node_origin=df_node["region"], - node_loc=df_node["node"], - ).pipe( - broadcast, - yv_ya_sw, - time=sub_time, - ) - ) - - inp = inp.append( - make_df( - "input", - technology="extract_groundwater", - value=df_gwt["GW_per_km3_per_year"] + 0.043464579, - unit="-", - level="final", - commodity="electr", - mode="M1", - time_origin="year", - node_origin=df_gwt["REGION"], - node_loc=df_node["node"], - ).pipe( - broadcast, - yv_ya_gw, - time=sub_time, - ) - ) - - inp = inp.append( - make_df( - "input", - technology="extract_gw_fossil", - value=(df_gwt["GW_per_km3_per_year"] + 0.043464579) - * 2, # twice as much normal gw - unit="-", - level="final", - commodity="electr", - mode="M1", - time_origin="year", - node_origin=df_gwt["REGION"], - node_loc=df_node["node"], - ).pipe( - broadcast, - yv_ya_gw, - time=sub_time, - ) + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="extract_surfacewater", + value=0.018835616, + unit="-", + level="final", + commodity="electr", + mode="M1", + time_origin="year", + node_origin=df_node["region"], + node_loc=df_node["node"], + ).pipe( + broadcast, + yv_ya_sw, + time=sub_time, + ), + ] + ) + + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="extract_groundwater", + value=df_gwt["GW_per_km3_per_year"] + 0.043464579, + unit="-", + level="final", + commodity="electr", + mode="M1", + time_origin="year", + node_origin=df_gwt["REGION"], + node_loc=df_node["node"], + ).pipe( + broadcast, + yv_ya_gw, + time=sub_time, + ), + ] + ) + + inp = pd.concat( + [ + inp, + make_df( + "input", + technology="extract_gw_fossil", + value=(df_gwt["GW_per_km3_per_year"] + 0.043464579) + * 2, # twice as much normal gw + unit="-", + level="final", + commodity="electr", + mode="M1", + time_origin="year", + node_origin=df_gwt["REGION"], + node_loc=df_node["node"], + ).pipe( + broadcast, + yv_ya_gw, + time=sub_time, + ), + ] ) if context.type_reg == "global": @@ -462,66 +493,75 @@ def add_water_supply(context: "Context"): .pipe(same_time) ) # Add output df for groundwater supply for basins - output_df = output_df.append( - make_df( - "output", - technology="extract_groundwater", - value=1, - unit="-", - level="water_supply_basin", - commodity="freshwater_basin", - mode="M1", - node_loc=df_node["node"], - node_dest=df_node["node"], - ) - .pipe( - broadcast, - yv_ya_gw, - time=sub_time, - ) - .pipe(same_time) + output_df = pd.concat( + [ + output_df, + make_df( + "output", + technology="extract_groundwater", + value=1, + unit="-", + level="water_supply_basin", + commodity="freshwater_basin", + mode="M1", + node_loc=df_node["node"], + node_dest=df_node["node"], + ) + .pipe( + broadcast, + yv_ya_gw, + time=sub_time, + ) + .pipe(same_time), + ] ) # Add output df for groundwater supply for basins - output_df = output_df.append( - make_df( - "output", - technology="extract_gw_fossil", - value=1, - unit="-", - level="water_supply_basin", - commodity="freshwater_basin", - mode="M1", - node_loc=df_node["node"], - node_dest=df_node["node"], - time_origin="year", - ) - .pipe( - broadcast, - yv_ya_gw, - time=sub_time, - ) - .pipe(same_time) + output_df = pd.concat( + [ + output_df, + make_df( + "output", + technology="extract_gw_fossil", + value=1, + unit="-", + level="water_supply_basin", + commodity="freshwater_basin", + mode="M1", + node_loc=df_node["node"], + node_dest=df_node["node"], + time_origin="year", + ) + .pipe( + broadcast, + yv_ya_gw, + time=sub_time, + ) + .pipe(same_time), + ] ) # Add output of saline water supply for regions - output_df = output_df.append( - make_df( - "output", - technology="extract_salinewater", - value=1, - unit="km3", - year_vtg=year_wat, - year_act=year_wat, - level="saline_supply", - commodity="saline_ppl", - mode="M1", - time="year", - time_dest="year", - time_origin="year", - ) - .pipe(broadcast, node_loc=node_region) - .pipe(same_node) + output_df = pd.concat( + [ + output_df, + make_df( + "output", + technology="extract_salinewater", + value=1, + unit="km3", + year_vtg=year_wat, + year_act=year_wat, + level="saline_supply", + commodity="saline_ppl", + mode="M1", + time="year", + time_dest="year", + time_origin="year", + ) + .pipe(broadcast, node_loc=node_region) + .pipe(same_node), + ] ) hist_new_cap = make_df( @@ -533,33 +573,39 @@ def add_water_supply(context: "Context"): year_vtg=2015, ) - hist_new_cap = hist_new_cap.append( - make_df( - "historical_new_capacity", - node_loc=df_hist["BCU_name"], - technology="extract_groundwater", - value=df_hist["hist_cap_gw_km3_year"] / 5, - unit="km3/year", - year_vtg=2015, - ) + hist_new_cap = pd.concat( + [ + hist_new_cap, + make_df( + "historical_new_capacity", + node_loc=df_hist["BCU_name"], + technology="extract_groundwater", + value=df_hist["hist_cap_gw_km3_year"] / 5, + unit="km3/year", + year_vtg=2015, + ), + ] ) results["historical_new_capacity"] = hist_new_cap # output data frame linking water supply to energy dummy technology - output_df = output_df.append( - make_df( - "output", - technology="basin_to_reg", - value=1, - unit="-", - level="water_supply", - commodity="freshwater", - time_dest="year", - node_loc=df_node["region"], - node_dest=df_node["region"], - mode=df_node["mode"], - ).pipe(broadcast, year_vtg=year_wat, time=sub_time) + output_df = pd.concat( + [ + output_df, + make_df( + "output", + technology="basin_to_reg", + value=1, + unit="-", + level="water_supply", + commodity="freshwater", + time_dest="year", + node_loc=df_node["region"], + node_dest=df_node["region"], + mode=df_node["mode"], + ).pipe(broadcast, year_vtg=year_wat, time=sub_time), + ] ) output_df["year_act"] = output_df["year_vtg"] @@ -577,7 +623,7 @@ def add_water_supply(context: "Context"): ).pipe(broadcast, year_vtg=year_wat, time=sub_time) var["year_act"] = var["year_vtg"] # # Dummy cost for extract surface ewater to prioritize water sources - # var = var.append(make_df( + # var = pd.concat([var, make_df( # "var_cost", # technology='extract_surfacewater', # value= 0.0001, @@ -587,9 +633,9 @@ def add_water_supply(context: "Context"): # ).pipe(broadcast, year_vtg=year_wat, # year_act=year_wat, node_loc=df_node["node"] # ) - # ) + # ]) # # Dummy cost for extract groundwater - # var = var.append(make_df( + # var = pd.concat([var, make_df( # "var_cost", # technology='extract_groundwater', # value= 0.001, @@ -597,7 +643,8 @@ def add_water_supply(context: "Context"): # mode="M1", # time="year", # ).pipe(broadcast, year_vtg=year_wat, - # year_act=year_wat, node_loc=df_node["node"]) + # year_act=year_wat, node_loc=df_node["node"] + # ]) # ) results["var_cost"] = var @@ -629,26 +676,32 @@ def add_water_supply(context: "Context"): .pipe(same_node) ) - tl = tl.append( - make_df( - "technical_lifetime", - technology="extract_groundwater", - value=20, - unit="y", - ) - .pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) - .pipe(same_node) - ) - - tl = tl.append( - make_df( - "technical_lifetime", - technology="extract_gw_fossil", - value=20, - unit="y", - ) - .pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) - .pipe(same_node) + tl = pd.concat( + [ + tl, + make_df( + "technical_lifetime", + technology="extract_groundwater", + value=20, + unit="y", + ) + .pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) + .pipe(same_node), + ] + ) + + tl = pd.concat( + [ + tl, + make_df( + "technical_lifetime", + technology="extract_gw_fossil", + value=20, + unit="y", + ) + .pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) + .pipe(same_node), + ] ) results["technical_lifetime"] = tl @@ -661,22 +714,28 @@ def add_water_supply(context: "Context"): unit="USD/km3", ).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) - inv_cost = inv_cost.append( - make_df( - "inv_cost", - technology="extract_groundwater", - value=54.52, - unit="USD/km3", - ).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) - ) - - inv_cost = inv_cost.append( - make_df( - "inv_cost", - technology="extract_gw_fossil", - value=54.52 * 150, # assume higher as normal GW - unit="USD/km3", - ).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]) + inv_cost = pd.concat( + [ + inv_cost, + make_df( + "inv_cost", + technology="extract_groundwater", + value=54.52, + unit="USD/km3", + ).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]), + ] + ) + + inv_cost = pd.concat( + [ + inv_cost, + make_df( + "inv_cost", + technology="extract_gw_fossil", + value=54.52 * 150, # assume higher as normal GW + unit="USD/km3", + ).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]), + ] ) results["inv_cost"] = inv_cost @@ -771,8 +830,8 @@ def add_e_flow(context: "Context"): ) df_env = pd.read_csv(path1) df_env.drop(["Unnamed: 0"], axis=1, inplace=True) - new_cols = pd.to_datetime(df_env.columns, format="%Y/%m/%d") - df_env.columns = new_cols + # new_cols = pd.to_datetime(df_env.columns, format="%Y/%m/%d") + # df_env.columns = new_cols df_env.index = df_x["BCU_name"] df_env = df_env.stack().reset_index() df_env.columns = ["Region", "years", "value"] diff --git a/message_ix_models/model/water/reporting.py b/message_ix_models/model/water/reporting.py index ce67f35975..292dc2add5 100644 --- a/message_ix_models/model/water/reporting.py +++ b/message_ix_models/model/water/reporting.py @@ -3,16 +3,14 @@ import numpy as np import pandas as pd import pyam -from message_ix import Reporter +from message_ix import Reporter, Scenario -from message_ix_models.util import package_data_path +from message_ix_models.util import HAS_MESSAGE_DATA, package_data_path -try: +if HAS_MESSAGE_DATA: from message_data.tools.post_processing.iamc_report_hackathon import ( report as legacy_reporting, ) -except ImportError: # message_data not installed - legacy_reporting = None log = logging.getLogger(__name__) @@ -260,9 +258,9 @@ def multiply_electricity_output_of_hydro(elec_hydro_var, report_iam): # TODO -def report(sc=False, reg="", sdgs=False): +def report(sc: Scenario, reg: str, sdgs: bool = False): """Report nexus module results""" - + log.info(f"Regions given as {reg}; no warranty of it's not in ['R11','R12']") # Generating reporter rep = Reporter.from_scenario(sc) report = rep.get( @@ -1084,15 +1082,6 @@ def report(sc=False, reg="", sdgs=False): # add population with sanitation or drinking water access mp2 = sc.platform - map_node = sc.set("map_node") - # this might not be the best way to get the region, better from context - if not reg: - if "R11" in map_node.node.to_list()[1]: - reg = "R11" - elif "R12" in map_node.node.to_list()[1]: - reg = "R12" - else: - print("Check the region of the model is consistent with R11,R12") # load data on water and sanitation access load_path = package_data_path("water", "demands", "harmonized", reg) @@ -1100,65 +1089,62 @@ def report(sc=False, reg="", sdgs=False): pop_check = sc.timeseries(variable="Population") pop_check = pop_check[pop_check.year >= 2020] - if pop_check.empty: - print("The Population data does not exist or timeseries() has no future values") - else: - pop_drink_tot = pd.DataFrame() - pop_sani_tot = pd.DataFrame() - pop_sdg6 = pd.DataFrame() - for ur in ["urban", "rural"]: - # CHANGE TO URBAN AND RURAL POP - pop_tot = sc.timeseries(variable=("Population|" + ur.capitalize())) - pop_tot = pop_tot[-(pop_tot.region == "GLB region (R11)")] - pop_reg = np.unique(pop_tot["region"]) - # need to change names - reg_map = mp2.regions() - reg_map = reg_map[reg_map.mapped_to.isin(pop_reg)].drop( - columns=["parent", "hierarchy"] - ) - reg_map["region"] = [x.split("_")[1] for x in reg_map.region] + assert ( + not pop_check.empty + ), "The Population data does not exist or timeseries() has no future values" + + pop_drink_tot = pd.DataFrame() + pop_sani_tot = pd.DataFrame() + pop_sdg6 = pd.DataFrame() + for ur in ["urban", "rural"]: + # CHANGE TO URBAN AND RURAL POP + pop_tot = sc.timeseries(variable=("Population|" + ur.capitalize())) + pop_tot = pop_tot[-(pop_tot.region == "GLB region (R11)")] + pop_reg = np.unique(pop_tot["region"]) + # need to change names + reg_map = mp2.regions() + reg_map = reg_map[reg_map.mapped_to.isin(pop_reg)].drop( + columns=["parent", "hierarchy"] + ) + reg_map["region"] = [x.split("_")[1] for x in reg_map.region] - df_rate = all_rates[all_rates.variable.str.contains(ur)] + df_rate = all_rates[all_rates.variable.str.contains(ur)] - df_rate = df_rate[ - df_rate.variable.str.contains("sdg" if sdgs else "baseline") - ] + df_rate = df_rate[df_rate.variable.str.contains("sdg" if sdgs else "baseline")] - df_rate["region"] = [x.split("|")[1] for x in df_rate.node] - df_rate = df_rate.drop(columns=["node"]) - # make region mean (no weighted average) - df_rate = ( - df_rate.groupby(["year", "variable", "region"])["value"] - .mean() - .reset_index() - ) - # convert region name - df_rate = df_rate.merge(reg_map, how="left") - df_rate = df_rate.drop(columns=["region"]) - df_rate = df_rate.rename( - columns={"mapped_to": "region", "variable": "new_var", "value": "rate"} - ) + df_rate["region"] = [x.split("|")[1] for x in df_rate.node] + df_rate = df_rate.drop(columns=["node"]) + # make region mean (no weighted average) + df_rate = ( + df_rate.groupby(["year", "variable", "region"])["value"] + .mean() + .reset_index() + ) + # convert region name + df_rate = df_rate.merge(reg_map, how="left") + df_rate = df_rate.drop(columns=["region"]) + df_rate = df_rate.rename( + columns={"mapped_to": "region", "variable": "new_var", "value": "rate"} + ) - # Population|Drinking Water Access - df_drink = df_rate[df_rate.new_var.str.contains("connection")] - pop_drink = pop_tot.merge(df_drink, how="left") - pop_drink["variable"] = ( - "Population|Drinking Water Access|" + ur.capitalize() - ) - pop_drink["value"] = pop_drink.value * pop_drink.rate - cols = pop_tot.columns - pop_drink = pop_drink[cols] - pop_drink_tot = pop_drink_tot.append(pop_drink) - pop_sdg6 = pop_sdg6.append(pop_drink) - - # Population|Sanitation Acces - df_sani = df_rate[df_rate.new_var.str.contains("treatment")] - pop_sani = pop_tot.merge(df_sani, how="left") - pop_sani["variable"] = "Population|Sanitation Access|" + ur.capitalize() - pop_sani["value"] = pop_sani.value * pop_sani.rate - pop_sani = pop_sani[cols] - pop_sani_tot = pop_sani_tot.append(pop_drink) - pop_sdg6 = pop_sdg6.append(pop_sani) + # Population|Drinking Water Access + df_drink = df_rate[df_rate.new_var.str.contains("connection")] + pop_drink = pop_tot.merge(df_drink, how="left") + pop_drink["variable"] = "Population|Drinking Water Access|" + ur.capitalize() + pop_drink["value"] = pop_drink.value * pop_drink.rate + cols = pop_tot.columns + pop_drink = pop_drink[cols] + pop_drink_tot = pop_drink_tot.append(pop_drink) + pop_sdg6 = pop_sdg6.append(pop_drink) + + # Population|Sanitation Acces + df_sani = df_rate[df_rate.new_var.str.contains("treatment")] + pop_sani = pop_tot.merge(df_sani, how="left") + pop_sani["variable"] = "Population|Sanitation Access|" + ur.capitalize() + pop_sani["value"] = pop_sani.value * pop_sani.rate + pop_sani = pop_sani[cols] + pop_sani_tot = pop_sani_tot.append(pop_drink) + pop_sdg6 = pop_sdg6.append(pop_sani) # total values pop_drink_tot = ( @@ -1190,7 +1176,7 @@ def report(sc=False, reg="", sdgs=False): pop_sdg6_glb = pop_sdg6_glb[cols] pop_sdg6 = pop_sdg6.append(pop_sdg6_glb) - print("Population|Drinking Water Access") + log.info("Population|Drinking Water Access") # Add water prices, ad-hoc procedure wp = sc.var( @@ -1200,32 +1186,7 @@ def report(sc=False, reg="", sdgs=False): wp["unit"] = "US$2010/m3" wp = wp.rename(columns={"node": "region"}) # get withdrawals for weighted mean - ww = report_iam.as_pandas() - ww = ww[ - ww.variable.isin( - ["out|final|rural_mw|rural_t_d|M1", "out|final|urban_mw|urban_t_d|M1"] - ) - ] - ww["commodity"] = np.where( - ww.variable.str.contains("urban_mw"), "urban_mw", "rural_mw" - ) - ww["wdr"] = ww["value"] - if not suban: - ww = ww[["region", "year", "commodity", "wdr"]] - else: - ww = ww[["region", "year", "subannual", "commodity", "wdr"]] - ww = pd.concat( - [ - ww, - ( - ww.groupby(["region", "year", "commodity"])["wdr"] - .sum() - .reset_index() - .assign(subannual="year") - .loc[:, ["region", "year", "subannual", "commodity", "wdr"]] - ), - ] - ).reset_index(drop=True) + ww = prepare_ww(ww_input=report_iam.as_pandas(), suban=suban) # irrigation water, at regional level # need to update for global model now we have 3 irrigation # probably will need to do a scaled agerave with the ww, no basin level @@ -1237,8 +1198,7 @@ def report(sc=False, reg="", sdgs=False): # driking water wr_dri = wp[wp.commodity.isin(["urban_mw", "rural_mw"])] wr_dri = wr_dri.drop(columns={"level", "lvl", "mrg"}) - if suban: - wr_dri = wr_dri.rename(columns={"time": "subannual"}) + wr_dri = wr_dri.rename(columns={"time": "subannual"}) if suban else wr_dri wr_dri = wr_dri.merge(ww, how="left") wr_dri["variable"] = np.where( wr_dri.commodity == "urban_mw", @@ -1282,7 +1242,7 @@ def report(sc=False, reg="", sdgs=False): map_node_dict = map_node.groupby("node_parent")["node"].apply(list).to_dict() for index, row in map_agg_pd.iterrows(): - print(row["names"]) + log.info(f"Processing {row['names']}") # Aggregates variables as per standard reporting report_iam.aggregate(row["names"], components=row["list_cat"], append=True) @@ -1334,10 +1294,7 @@ def report(sc=False, reg="", sdgs=False): report_pd = report_pd[-report_pd.variable.isin(water_hydro_var)] # add water population - if pop_check.empty: - print("The Population data does not exist or timeseries() has no future values") - else: - report_pd = report_pd.append(pop_sdg6) + report_pd = report_pd.append(pop_sdg6) # add units for index, row in map_agg_pd.iterrows(): @@ -1384,39 +1341,70 @@ def report(sc=False, reg="", sdgs=False): if reg not in ["R11", " R12"]: # temp for leap- re out_path = package_data_path().parents[0] / "reporting_output/" - - if not out_path.exists(): - out_path.mkdir() + out_path.mkdir(exist_ok=True) out_file = out_path / f"{sc.model}_{sc.scenario}_nexus.csv" report_pd.to_csv(out_file, index=False) sc.check_out(timeseries_only=True) - print("Starting to upload timeseries") - print(report_pd.head()) + log.info("Starting to upload timeseries") + log.info(report_pd.head()) sc.add_timeseries(report_pd) - print("Finished uploading timeseries") + log.info("Finished uploading timeseries") sc.commit("Reporting uploaded as timeseries") -def report_full(sc=False, reg="", sdgs=False): +def prepare_ww(ww_input: pd.DataFrame, suban: bool) -> pd.DataFrame: + ww = ww_input[ + ww_input.variable.isin( + ["out|final|rural_mw|rural_t_d|M1", "out|final|urban_mw|urban_t_d|M1"] + ) + ] + ww["commodity"] = np.where( + ww.variable.str.contains("urban_mw"), "urban_mw", "rural_mw" + ) + ww["wdr"] = ww["value"] + if not suban: + ww = ww[["region", "year", "commodity", "wdr"]] + else: + ww = ww[["region", "year", "subannual", "commodity", "wdr"]] + ww = pd.concat( + [ + ww, + ( + ww.groupby(["region", "year", "commodity"])["wdr"] + .sum() + .reset_index() + .assign(subannual="year") + .loc[:, ["region", "year", "subannual", "commodity", "wdr"]] + ), + ] + ).reset_index(drop=True) + + return ww + + +def report_full(sc: Scenario, reg: str, sdgs=False): """Combine old and new reporting workflows""" a = sc.timeseries() # keep historical part, if present a = a[a.year >= 2020] sc.check_out(timeseries_only=True) - print("Remove any previous timeseries") + log.info("Remove any previous timeseries") sc.remove_timeseries(a) - print("Finished removing timeseries, now commit..") + log.info("Finished removing timeseries, now commit..") sc.commit("Remove existing timeseries") - run_old_reporting(sc) - print("First part of reporting completed, now procede with the water variables") + if HAS_MESSAGE_DATA: + run_old_reporting(sc) + log.info( + "First part of reporting completed, now procede with the water variables" + ) report(sc, reg, sdgs) - print("overall NAVIGATE reporting completed") + log.info("overall NAVIGATE reporting completed") # add ad-hoc caplculated variables with a function ts = sc.timeseries() @@ -1432,4 +1420,4 @@ def report_full(sc=False, reg="", sdgs=False): ts_long = pyam.IamDataFrame(ts) ts_long.to_csv(out_file) - print(f"Saving csv to {out_file}") + log.info(f"Saving csv to {out_file}") diff --git a/message_ix_models/model/water/utils.py b/message_ix_models/model/water/utils.py index 72043498ec..1a3378324d 100644 --- a/message_ix_models/model/water/utils.py +++ b/message_ix_models/model/water/utils.py @@ -27,7 +27,7 @@ ] -def read_config(context: Context | None = None): +def read_config(context: Optional[Context] = None): """Read the water model configuration / metadata from file. Numerical values are converted to computation-ready data structures. @@ -38,7 +38,7 @@ def read_config(context: Context | None = None): The current Context, with the loaded configuration. """ - context = context or Context.get_instance(0) + context = context or Context.get_instance(-1) # if context.nexus_set == 'nexus': if "water set" in context: @@ -74,8 +74,8 @@ def map_add_on(rtype=Code): # Create a new code by combining two result["code"].append( Code( - id="".join(c.id for c in indices), - name=", ".join(c.name for c in indices), + id="".join(str(c.id) for c in indices), + name=", ".join(str(c.name) for c in indices), ) ) @@ -100,27 +100,27 @@ def map_add_on(rtype=Code): raise ValueError(rtype) -def add_commodity_and_level(df, default_level=None): +def add_commodity_and_level(df: pd.DataFrame, default_level=None): # Add input commodity and level - t_info = Context.get_instance()["water set"]["technology"]["add"] - c_info = get_codes("commodity") + t_info: list = Context.get_instance()["water set"]["technology"]["add"] + c_info: list = get_codes("commodity") @lru_cache() def t_cl(t): - input = t_info[t_info.index(t)].anno["input"] + input = t_info[t_info.index(t)].annotations["input"] # Commodity must be specified commodity = input["commodity"] # Use the default level for the commodity in the RES (per # commodity.yaml) level = ( input.get("level", "water_supply") - or c_info[c_info.index(commodity)].anno.get("level", None) + or c_info[c_info.index(commodity)].annotations.get("level", None) or default_level ) return commodity, level - def func(row): + def func(row: pd.Series): row[["commodity", "level"]] = t_cl(row["technology"]) return row @@ -134,10 +134,15 @@ def map_yv_ya_lt( Parameters ---------- - labels : pandas.DataFrame - Each column (dimension) corresponds to one in `df`. Each row represents one - matched set of labels for those dimensions. + periods : Tuple[int, ...] + A sequence of years. lt : int, lifetime + ya : int, active year + The first active year. + Returns + ------- + pd.DataFrame + A DataFrame with columns 'year_vtg' and 'year_act'. """ if not ya: ya = periods[0] @@ -160,4 +165,6 @@ def map_yv_ya_lt( ) # Select values using the `ya` and `lt` parameters - return df[(ya <= df.year_act) & (df.year_act - df.year_vtg <= lt)] + return df.loc[(ya <= df.year_act) & (df.year_act - df.year_vtg <= lt)].reset_index( + drop=True + ) diff --git a/message_ix_models/project/ssp/__init__.py b/message_ix_models/project/ssp/__init__.py index 5aed2aee31..4c30d43a14 100644 --- a/message_ix_models/project/ssp/__init__.py +++ b/message_ix_models/project/ssp/__init__.py @@ -59,7 +59,7 @@ def __set__(self, obj, value): @click.group("ssp") def cli(): - pass + """Shared Socioeconomic Pathways (SSP) project.""" @cli.command("gen-structures") diff --git a/message_ix_models/project/ssp/data.py b/message_ix_models/project/ssp/data.py index 2a585eb53b..93aa0fd112 100644 --- a/message_ix_models/project/ssp/data.py +++ b/message_ix_models/project/ssp/data.py @@ -1,16 +1,11 @@ import logging -from copy import copy from message_ix_models.tools.exo_data import ( ExoDataSource, iamc_like_data_for_query, register_source, ) -from message_ix_models.util import ( - HAS_MESSAGE_DATA, - package_data_path, - private_data_path, -) +from message_ix_models.util import package_data_path, private_data_path __all__ = [ "SSPOriginal", @@ -56,6 +51,9 @@ class SSPOriginal(ExoDataSource): id = "SSP" + #: Name of file containing the data. + filename = "SspDb_country_data_2013-06-12.csv.zip" + #: One-to-one correspondence between "model" codes and date fragments in scenario #: codes. model_date = { @@ -74,44 +72,45 @@ def __init__(self, source, source_kw): if not source.startswith(s): raise ValueError(source) - *parts, self.ssp_number = source.partition(s) + *parts, ssp_id = source.partition(s) # Map the `measure` keyword to a string appearing in the data - _kw = copy(source_kw) - self.measure = { + measure = { "GDP": "GDP|PPP", "POP": "Population", - }[_kw.pop("measure")] + }[source_kw.pop("measure")] # Store the model ID, if any - self.model = _kw.pop("model", None) + model = source_kw.pop("model", None) # Determine the date based on the model ID. There is a 1:1 correspondence. - self.date = self.model_date[self.model] + date = self.model_date[model] - if len(_kw): - raise ValueError(_kw) + self.raise_on_extra_kw(source_kw) - def __call__(self): # Assemble a query string - extra = "d" if self.ssp_number == "4" and self.model == "IIASA-WiC POP" else "" - query = " and ".join( - [ - f"SCENARIO == 'SSP{self.ssp_number}{extra}_v9_{self.date}'", - f"VARIABLE == '{self.measure}'", - f"MODEL == '{self.model}'" if self.model else "True", - ] + extra = "d" if ssp_id == "4" and model == "IIASA-WiC POP" else "" + self.query = ( + f"SCENARIO == 'SSP{ssp_id}{extra}_v9_{date}' and VARIABLE == '{measure}'" + + (f" and MODEL == '{model}'" if model else "") ) - log.debug(query) + # log.debug(query) - parts = ("ssp", "SspDb_country_data_2013-06-12.csv.zip") - if HAS_MESSAGE_DATA: - path = private_data_path(*parts) - else: - path = package_data_path("test", *parts) - log.warning(f"Reading random data from {path}") + # Iterate over possible locations for the data file + dirs = [private_data_path("ssp"), package_data_path("test", "ssp")] + for path in [d.joinpath(self.filename) for d in dirs]: + if not path.exists(): + log.info(f"Not found: {path}") + continue + if "test" in path.parts: + log.warning(f"Reading random data from {path}") + break + + self.path = path - return iamc_like_data_for_query(path, query, replace=self.replace) + def __call__(self): + # Use prepared path, query, and replacements + return iamc_like_data_for_query(self.path, self.query, replace=self.replace) @register_source @@ -123,6 +122,7 @@ class SSPUpdate(ExoDataSource): - `source`: Any value from :data:`.SSP_2024` or equivalent string, for instance "ICONICS:SSP(2024).2". + - `release`: One of "3.0" or "preview". Example ------- @@ -137,41 +137,85 @@ class SSPUpdate(ExoDataSource): id = "SSP update" + #: File names containing the data, according to the release. + filename = { + "3.0": "1706548837040-ssp_basic_drivers_release_3.0_full.csv.gz", + "preview": "SSP-Review-Phase-1.csv.gz", + } + def __init__(self, source, source_kw): s = "ICONICS:SSP(2024)." if not source.startswith(s): raise ValueError(source) - *parts, self.ssp_number = source.partition(s) + *parts, ssp_id = source.partition(s) - # Map the `measure` keyword to a string appearing in the data - _kw = copy(source_kw) - self.measure = { + # Map the `measure` keyword to a 'Variable' dimension code + measure = { "GDP": "GDP|PPP", "POP": "Population", - }[_kw.pop("measure")] + }[source_kw.pop("measure")] - # Store the model ID, if any - self.model = _kw.pop("model", None) + # Store the model code, if any + model = source_kw.pop("model", None) - if len(_kw): - raise ValueError(_kw) + # Identify the data release date/version/label + release = source_kw.pop("release", "3.0") - def __call__(self): - # Assemble a query string - query = " and ".join( - [ - f"Scenario == 'SSP{self.ssp_number} - Review Phase 1'", - f"Variable == '{self.measure}'", - f"Model == '{self.model}'" if self.model else "True", - ] - ) + self.raise_on_extra_kw(source_kw) + + # Replacements to apply, if any + self.replace = {} + + # Prepare query pieces + models = [] + scenarios = [] + + if release == "3.0": + # Directories in which to locate `self.filename`; stored directly within + # message_ix_models + dirs = [package_data_path("ssp")] + + scenarios.append(f"SSP{ssp_id}") - parts = ("ssp", "SSP-Review-Phase-1.csv.gz") - if HAS_MESSAGE_DATA: - path = private_data_path(*parts) + if measure == "GDP|PPP": + # Configure to prepend (m="OECD…", s="Historical Reference") + # observations to series + models.extend({model, "OECD ENV-Growth 2023"}) + scenarios.append("Historical Reference") + self.replace.update( + Model={"OECD ENV-Growth 2023": model}, + Scenario={"Historical Reference": scenarios[0]}, + ) + elif release == "preview": + # Look first in message_data, then in message_ix_models test data + dirs = [private_data_path("ssp"), package_data_path("test", "ssp")] + + scenarios.append(f"SSP{ssp_id} - Review Phase 1") else: - path = package_data_path("test", *parts) - log.warning(f"Reading random data from {path}") + log.error( + f"{release = } invalid for {type(self)}; expected one of: " + f"{set(self.filename)}" + ) + raise ValueError(release) + + # Assemble and store a query string + self.query = f"Scenario in {scenarios!r} and Variable == '{measure}'" + ( + f"and Model in {models!r}" if models else "" + ) + # log.info(f"{self.query = }") + + # Iterate over possible locations for the data file + for path in [d.joinpath(self.filename[release]) for d in dirs]: + if not path.exists(): + log.info(f"Not found: {path}") + continue + if "test" in path.parts: + log.warning(f"Reading random data from {path}") + break - return iamc_like_data_for_query(path, query) + self.path = path + + def __call__(self): + # Use prepared path, query, and replacements + return iamc_like_data_for_query(self.path, self.query, replace=self.replace) diff --git a/message_ix_models/project/ssp/structure.py b/message_ix_models/project/ssp/structure.py index ba8e9ab8aa..0119f3f410 100644 --- a/message_ix_models/project/ssp/structure.py +++ b/message_ix_models/project/ssp/structure.py @@ -1,4 +1,5 @@ """Manipulate data structures for working with the SSPs.""" + import logging from textwrap import wrap from typing import TYPE_CHECKING, Optional diff --git a/message_ix_models/report/__init__.py b/message_ix_models/report/__init__.py index edcab1df5f..f50d2cc46f 100644 --- a/message_ix_models/report/__init__.py +++ b/message_ix_models/report/__init__.py @@ -246,7 +246,7 @@ def report(context: Context, *args, **kwargs): with ( nullcontext() if context.core.verbose - else silence_log(["genno", "message_ix_models"]) + else silence_log("genno message_ix_models") ): rep, key = prepare_reporter(context) diff --git a/message_ix_models/report/compat.py b/message_ix_models/report/compat.py index c032a59338..84f63d48e8 100644 --- a/message_ix_models/report/compat.py +++ b/message_ix_models/report/compat.py @@ -1,4 +1,5 @@ """Compatibility code that emulates :mod:`.message_data` reporting.""" + import logging from functools import partial from itertools import chain, count diff --git a/message_ix_models/report/operator.py b/message_ix_models/report/operator.py index 52fd2af0a5..65f993bad1 100644 --- a/message_ix_models/report/operator.py +++ b/message_ix_models/report/operator.py @@ -1,4 +1,5 @@ """Atomic reporting operations for MESSAGEix-GLOBIOM.""" + import itertools import logging import re @@ -13,7 +14,7 @@ from iam_units.emissions import SPECIES from message_ix_models import Context -from message_ix_models.util import add_par_data +from message_ix_models.util import add_par_data, nodes_ex_world if TYPE_CHECKING: from pathlib import Path @@ -34,6 +35,7 @@ "gwp_factors", "make_output_path", "model_periods", + "nodes_ex_world", "remove_ts", "share_curtailment", ] diff --git a/message_ix_models/report/plot.py b/message_ix_models/report/plot.py index c282431862..0dabcff1c8 100644 --- a/message_ix_models/report/plot.py +++ b/message_ix_models/report/plot.py @@ -3,6 +3,7 @@ The current set functions on time series data stored on the scenario by :mod:`message_ix_models.report` or :mod:`message_data` legacy reporting. """ + import logging import re from datetime import datetime @@ -100,9 +101,7 @@ def add_tasks( c.add(k, "get_ts", scenario_key, dict(variable=k.name)) # Add the plot itself - # TODO once the genno class returns the added key, change to "return super().…" - super().add_tasks(c, key, *inputs[1:], strict=strict) - return key + return super().add_tasks(c, key, *inputs[1:], strict=strict) def ggtitle(self, value=None) -> p9.ggtitle: """Return :class:`plotnine.ggtitle` including the current date & time.""" diff --git a/message_ix_models/report/sim.py b/message_ix_models/report/sim.py index 9ad6a76a03..bf18c4b375 100644 --- a/message_ix_models/report/sim.py +++ b/message_ix_models/report/sim.py @@ -1,4 +1,5 @@ """Simulated solution data for testing :mod:`~message_ix_models.report`.""" + import logging from collections import ChainMap, defaultdict from collections.abc import Mapping diff --git a/message_ix_models/report/util.py b/message_ix_models/report/util.py index 62ba48a17f..39496a4d4c 100644 --- a/message_ix_models/report/util.py +++ b/message_ix_models/report/util.py @@ -10,8 +10,6 @@ from message_ix import Reporter from sdmx.model.v21 import Code -from message_ix_models.util import eval_anno - log = logging.getLogger(__name__) @@ -198,6 +196,9 @@ def copy_ts(rep: Reporter, other: str, filters: Optional[dict]) -> Key: def add_replacements(dim: str, codes: Iterable[Code]) -> None: """Update :data:`REPLACE_DIMS` for dimension `dim` with values from `codes`.""" for code in codes: - label = eval_anno(code, "report") - if label is not None: + try: + label = str(code.get_annotation(id="report").text) + except KeyError: + pass + else: REPLACE_DIMS[dim][f"{code.id.title()}$"] = label diff --git a/message_ix_models/testing/__init__.py b/message_ix_models/testing/__init__.py index 27ffba9b2b..86a9be2ff2 100644 --- a/message_ix_models/testing/__init__.py +++ b/message_ix_models/testing/__init__.py @@ -1,21 +1,18 @@ import logging import os from base64 import b32hexencode -from contextlib import contextmanager from copy import deepcopy from pathlib import Path from random import randbytes from tempfile import TemporaryDirectory -import click.testing import message_ix import pandas as pd import pytest -from ixmp import Platform from ixmp import config as ixmp_config -from message_ix_models import cli, util -from message_ix_models.util._logging import mark_time, preserve_log_level +from message_ix_models import util +from message_ix_models.util._logging import mark_time from message_ix_models.util.context import Context log = logging.getLogger(__name__) @@ -48,17 +45,6 @@ def pytest_addoption(parser): ) -def pytest_sessionstart(): - # Quiet logs for some upstream packages - for name in ( - "graphviz._tools", - "pycountry.db", - "matplotlib.backends", - "matplotlib.font_manager", - ): - logging.getLogger(name).setLevel(logging.DEBUG + 1) - - # Fixtures @@ -82,6 +68,8 @@ def session_context(pytestconfig, tmp_env): the :ref:`pytest tmp_path directory `. """ + from platformdirs import user_cache_path + ctx = Context.only() # Temporary, empty local directory for local data @@ -91,7 +79,7 @@ def session_context(pytestconfig, tmp_env): # pick up the existing setting from the user environment. If False, use a pytest- # managed cache directory that persists across test sessions. ctx.cache_path = ( - ctx.local_data.joinpath("cache") + user_cache_path("message-ix-models", ensure_exists=True) if pytestconfig.option.local_cache # TODO use pytestconfig.cache.mkdir() when pytest >= 6.3 is available else Path(pytestconfig.cache.makedir("cache")) @@ -120,10 +108,7 @@ def session_context(pytestconfig, tmp_env): url=f"jdbc:hsqldb:mem://{platform_name}", jvmargs=pytestconfig.option.jvmargs, ) - - # Launch Platform and connect to testdb (reconnect if closed) - mp = Platform(name=platform_name) - mp.open_db() + ixmp_config.save() ctx.platform_info["name"] = platform_name @@ -157,84 +142,22 @@ def user_context(request): # pragma: no cover raise NotImplementedError -class CliRunner(click.testing.CliRunner): - """Subclass of :class:`click.testing.CliRunner` with extra features.""" - - # NB decorator ensures any changes that the CLI makes to the logger level are - # restored - @preserve_log_level() - def invoke(self, *args, **kwargs): - """Invoke the :program:`mix-models` CLI.""" - result = super().invoke(cli.main, *args, **kwargs) - - # Store the result to be used by assert_exit_0() - self.last_result = result - - return result - - def assert_exit_0(self, *args, **kwargs): - """Assert a result has exit_code 0, or print its traceback. - - If any `args` or `kwargs` are given, :meth:`.invoke` is first called. Otherwise, - the result from the last call of :meth:`.invoke` is used. - - Raises - ------ - AssertionError - if the result exit code is not 0. The exception contains the traceback from - within the CLI. - - Returns - ------- - click.testing.Result - """ - __tracebackhide__ = True - - if len(args) + len(kwargs): - self.invoke(*args, **kwargs) - - # Retrieve the last result - result = self.last_result - - if result.exit_code != 0: - print(f"{result.exit_code = }\nresult.output =\n{result.output}") - # Re-raise the exception triggered within the CLI invocation - raise (result.exc_info[1].__context__ or result.exc_info[1]) from None - - return result - - @property - def add_command(self): - return cli_test_group.add_command - - @contextmanager - def temporary_command(self, func: "click.Command", set_target: bool = True): - """Temporarily attach command `func` to :func:`cli_test_group`.""" - assert func.name is not None - try: - cli_test_group.add_command(func) - yield - finally: - cli_test_group.commands.pop(func.name) - - @pytest.fixture -def mix_models_cli(request, test_context, tmp_env): - """A :class:`.CliRunner` object that invokes the :program:`mix-models` CLI.""" - # Require the `test_context` fixture in order to (a) set Context.local_data and (b) - # ensure changes to the Context from tested CLI commands are isolated from other - # tests - yield CliRunner(env=tmp_env) +def mix_models_cli(session_context, tmp_env): + """A :class:`.CliRunner` object that invokes the :program:`mix-models` CLI. + NB this requires: -@cli.main.group("_test", hidden=True) -def cli_test_group(): - """Hidden group of CLI commands. - - Other code which needs to test CLI behaviour **may** attach temporary/throw-away - commands to this group and then invoke them using :func:`mix_models_cli`. This - avoids the need to expose additional commands for testing purposes only. + - The :mod:`ixmp` :func:`.tmp_env` fixture. This sets ``IXMP_DATA`` to a temporary + directory managed by :mod:`pytest`. + - The :func:`session_context` fixture. This (a) sets :attr:`.Config.local_data` to + a temporary directory within ``IXMP_DATA`` and (b) ensures changes to + :class:`.Context` made by invoked commands do not reach other tests. """ + from message_ix_models import cli + from message_ix_models.util.click import CliRunner + + yield CliRunner(cli.main, cli.__name__, env=tmp_env) # Testing utility functions diff --git a/message_ix_models/tests/model/test_disutility.py b/message_ix_models/tests/model/test_disutility.py index f78342f86b..83aaa41fe0 100644 --- a/message_ix_models/tests/model/test_disutility.py +++ b/message_ix_models/tests/model/test_disutility.py @@ -1,4 +1,5 @@ """Tests of :mod:`.model.disutility`.""" + from itertools import product import pandas as pd diff --git a/message_ix_models/tests/model/test_structure.py b/message_ix_models/tests/model/test_structure.py index 8c0fef4777..dcc0ce2f21 100644 --- a/message_ix_models/tests/model/test_structure.py +++ b/message_ix_models/tests/model/test_structure.py @@ -15,7 +15,7 @@ process_commodity_codes, process_units_anno, ) -from message_ix_models.util import as_codes, eval_anno +from message_ix_models.util import as_codes @pytest.mark.parametrize( @@ -97,8 +97,9 @@ def test_commodities(self): assert check in data # Units for one commodity can be retrieved and parsed + g = dict(registry=registry) coal = data[data.index("coal")] - assert isinstance(eval_anno(coal, "units"), registry.Unit) + assert isinstance(coal.eval_annotation("units", globals=g), registry.Unit) # Descriptions are parsed without new lines crudeoil = data[data.index("crudeoil")] @@ -107,7 +108,7 @@ def test_commodities(self): # Processing a second time does not double-wrap the unit expressions process_commodity_codes(data) coal = data[data.index("coal")] - assert isinstance(eval_anno(coal, "units"), registry.Unit) + assert isinstance(coal.eval_annotation("units", globals=g), registry.Unit) def test_levels(self): data = get_codes("level") @@ -277,4 +278,6 @@ def test_process_units_anno(): process_units_anno("", codes[0]) # Parents' units are propagated to the child - assert registry.Unit("kg") == eval_anno(codes[1], "units") + assert registry.Unit("kg") == codes[1].eval_annotation( + "units", dict(registry=registry) + ) diff --git a/message_ix_models/tests/model/water/test_irrigation.py b/message_ix_models/tests/model/water/test_irrigation.py index 43188cae29..c7b45befe4 100644 --- a/message_ix_models/tests/model/water/test_irrigation.py +++ b/message_ix_models/tests/model/water/test_irrigation.py @@ -1,13 +1,23 @@ -from message_ix import Scenario from unittest.mock import patch import pandas as pd +from message_ix import Scenario +from message_ix_models import ScenarioInfo +from message_ix_models.model.structure import get_codes from message_ix_models.model.water.data.irrigation import add_irr_structure def test_add_irr_structure(test_context): context = test_context + + # FIXME You probably want this to be part of a common setup rather than writing + # something like this for every test + context.type_reg = "country" + nodes = get_codes(f"node/{context.regions}") + nodes = list(map(str, nodes[nodes.index("World")].child)) + context.map_ISO_c = {context.regions: nodes[0]} + mp = context.get_platform() scenario_info = { "mp": mp, @@ -21,6 +31,9 @@ def test_add_irr_structure(test_context): s.add_set("node", ["loc1", "loc2"]) s.add_set("year", [2020, 2030, 2040]) + # FIXME same as above + context["water build info"] = ScenarioInfo(s) + # Mock the DataFrame read from CSV df_node = pd.DataFrame({"BCU_name": ["1", "2"], "REGION": ["region1", "region2"]}) diff --git a/message_ix_models/tests/model/water/test_utils.py b/message_ix_models/tests/model/water/test_utils.py index c479ad8adf..c754b1cdfb 100644 --- a/message_ix_models/tests/model/water/test_utils.py +++ b/message_ix_models/tests/model/water/test_utils.py @@ -2,7 +2,7 @@ import pandas as pd import xarray as xr -from sdmx.model.common import Code +from sdmx.model.common import Annotation, Code from message_ix_models import Context from message_ix_models.model.water.utils import ( @@ -11,25 +11,25 @@ map_yv_ya_lt, read_config, ) +from message_ix_models.util import load_private_data def test_read_config(test_context): # Mock the context context = test_context - # Mock the data returned by load_private_data - mock_data = {"test_key": "test_value"} + # Call the function to be tested + result = read_config(context) - # Mock the load_private_data function to return mock_data - with patch("message_ix_models.util.load_private_data", return_value=mock_data): - # Call the function to be tested - result = read_config(context) + config_parts = ["water", "config.yaml"] + set_parts = ["water", "set.yaml"] + technology_parts = ["water", "technology.yaml"] # Assert the results assert isinstance(result, Context) - assert result["water config"] == mock_data - assert result["water set"] == mock_data - assert result["water technology"] == mock_data + assert result["water config"] == load_private_data(*config_parts) + assert result["water set"] == load_private_data(*set_parts) + assert result["water technology"] == load_private_data(*technology_parts) def test_map_add_on(): @@ -42,7 +42,9 @@ def test_map_add_on(): } # Mock the read_config function to return mock_data - with patch("your_module.read_config", return_value=mock_data): + with patch( + "message_ix_models.model.water.utils.read_config", return_value=mock_data + ): # Call the function to be tested result = map_add_on() @@ -51,7 +53,9 @@ def test_map_add_on(): assert result == expected # Testing with rtype = 'indexers' - with patch("your_module.read_config", return_value=mock_data): + with patch( + "message_ix_models.model.water.utils.read_config", return_value=mock_data + ): result = map_add_on(rtype="indexers") expected = { @@ -67,35 +71,40 @@ def test_add_commodity_and_level(): # Mock the dataframe df = pd.DataFrame({"technology": ["tech1", "tech2"]}) + # FIXME Something here is seriously broken. Annotations need rework and + # please clarify what and how the annotations will be accessed and how the + # resulting data will be used! # Mock the data returned by Context.get_instance and get_codes mock_context_data = { "water set": { "technology": { - "add": pd.Series( - data=[ - Code( - id="tech1", - annotations=["input", "commodity", "com1", "level", "lev1"], - ), - Code(id="tech2", annotations=["input", "commodity", "com2"]), - ], - name="tech", - ) + "add": [ + Code( + id="tech1", + annotations=[ + Annotation("input", "commodity", "com1", "level", "lev1") + ], + ), + Code( + id="tech2", + annotations=[Annotation("input", "commodity", "com2")], + ), + ], } } } - mock_codes_data = pd.Series( - data=[ - Code(id="com1", annotations=["level", "lev1"]), - Code(id="com2", annotations=["level", "lev2"]), - ], - name="com", - ) + mock_codes_data = [ + Code(id="com1", annotations=[Annotation("level", "lev1")]), + Code(id="com2", annotations=[Annotation("level", "lev2")]), + ] # Mock the Context.get_instance and get_codes functions to return mock_data with patch( - "your_module.Context.get_instance", return_value=mock_context_data - ), patch("your_module.get_codes", return_value=mock_codes_data): + "message_ix_models.util.context.Context.get_instance", + return_value=mock_context_data, + ), patch( + "message_ix_models.model.structure.get_codes", return_value=mock_codes_data + ): # Call the function to be tested result = add_commodity_and_level(df) @@ -116,21 +125,24 @@ def test_map_yv_ya_lt(): ya = 2020 expected = pd.DataFrame( - {"year_vtg": [2010, 2020, 2020, 2030], "year_act": [2020, 2020, 2030, 2040]} + { + "year_vtg": [2010, 2010, 2020, 2020, 2020, 2030, 2030, 2040], + "year_act": [2020, 2030, 2020, 2030, 2040, 2030, 2040, 2040], + } ) - result = map_yv_ya_lt(periods, lt, ya) + result = map_yv_ya_lt(periods, lt, ya).reset_index(drop=True) + # print(result) pd.testing.assert_frame_equal(result, expected) - # test with no active year specified expected_no_ya = pd.DataFrame( { - "year_vtg": [2020, 2020, 2020, 2020], - "year_act": [2020, 2030, 2040, 2050], + "year_vtg": [2010, 2010, 2010, 2020, 2020, 2020, 2030, 2030, 2040], + "year_act": [2010, 2020, 2030, 2020, 2030, 2040, 2030, 2040, 2040], } ) - result_no_ya = map_yv_ya_lt(periods, lt) + result_no_ya = map_yv_ya_lt(periods, lt).reset_index(drop=True) pd.testing.assert_frame_equal(result_no_ya, expected_no_ya) diff --git a/message_ix_models/tests/model/water/test_water_supply.py b/message_ix_models/tests/model/water/test_water_supply.py index 50f2defd8f..1daa1808a7 100644 --- a/message_ix_models/tests/model/water/test_water_supply.py +++ b/message_ix_models/tests/model/water/test_water_supply.py @@ -1,8 +1,9 @@ -from message_ix import Scenario from unittest.mock import patch import pandas as pd +from message_ix import Scenario +from message_ix_models import ScenarioInfo from message_ix_models.model.water.data.water_supply import ( add_e_flow, add_water_supply, @@ -10,25 +11,20 @@ ) -def test_map_basin_region_wat(): - # Mock the context - context = { - "water build info": {"Y": [2020, 2030, 2040]}, - "type_reg": "country", - "regions": "test_region", - "map_ISO_c": {"test_region": "test_ISO"}, - "RCP": "test_RCP", - "REL": "test_REL", - "time": "year", - } +def test_map_basin_region_wat(test_context): + # FIXME You probably want this to be part of a common setup rather than writing + # something like this for every test + # Personalize the context + context = test_context + context["water build info"] = {"Y": [2020, 2030, 2040]} + context.type_reg = "country" + context.regions = "test_region" + context.map_ISO_c = {"test_region": "test_ISO"} + context.RCP = "test_RCP" + context.REL = "test_REL" + context.time = "year" # Mock the DataFrames read from CSV - pd.DataFrame( - { - "BCU_name": ["test_BCU"], - } - ) - df_sw = pd.DataFrame( { "Unnamed: 0": [0], @@ -42,6 +38,9 @@ def test_map_basin_region_wat(): "message_ix_models.util.private_data_path", return_value="path/to/file" ), patch("pandas.read_csv", return_value=df_sw): context["time"] = "year" + # FIXME This is not working with context.type_reg == "country". Have you ever + # confirmed that the code works in this case? If not, maybe this test is not + # needed. result = map_basin_region_wat(context) # Assert the results @@ -53,19 +52,19 @@ def test_map_basin_region_wat(): def test_add_water_supply(test_context): - # Mock the context - context = { - "water build info": {"Y": [2020, 2030, 2040]}, - "type_reg": "country", - "regions": "test_region", - "map_ISO_c": {"test_region": "test_ISO"}, - "RCP": "test_RCP", - "REL": "test_REL", - "time": "year", - "nexus_set": "nexus", - "get_scenario": lambda: {"firstmodelyear": 2020}, - } + # FIXME You probably want this to be part of a common setup rather than writing + # something like this for every test + # Personalize the context context = test_context + context["water build info"] = {"Y": [2020, 2030, 2040]} + context.type_reg = "country" + context.regions = "test_region" + context.map_ISO_c = {"test_region": "test_ISO"} + context.RCP = "test_RCP" + context.REL = "test_REL" + context.time = "year" + context.nexus_set = "nexus" + mp = context.get_platform() scenario_info = { "mp": mp, @@ -78,6 +77,12 @@ def test_add_water_supply(test_context): s.add_set("technology", ["tech1", "tech2"]) s.add_set("node", ["loc1", "loc2"]) s.add_set("year", [2020, 2030, 2040]) + + # FIXME You probably want this to be part of a common setup rather than writing + # something like this for every test + context.set_scenario(s) + context["water build info"] = ScenarioInfo(s) + # Mock the DataFrames read from CSV df_node = pd.DataFrame({"BCU_name": ["test_BCU"], "REGION": ["test_REGION"]}) @@ -110,9 +115,9 @@ def test_add_water_supply(test_context): with patch( "message_ix_models.util.private_data_path", return_value="path/to/file" ), patch("pandas.read_csv", return_value=df_node), patch( - "message_ix_models.model.water.water_supply.map_basin_region_wat", + "message_ix_models.model.water.data.water_supply.map_basin_region_wat", return_value=df_sw, # Adjust this import - ): + ), patch("message_ix_models.util.context.Context.get_scenario", return_value=s): # Call the function to be tested result = add_water_supply(context) @@ -128,32 +133,41 @@ def test_add_water_supply(test_context): assert isinstance(df, pd.DataFrame) -def test_add_e_flow(): - # Mock the context - context = { - "water build info": {"Y": [2020, 2030, 2040]}, - "regions": "test_region", - "RCP": "test_RCP", - "time": "year", - "SDG": True, - } +def test_add_e_flow(test_context): + # FIXME You probably want this to be part of a common setup rather than writing + # something like this for every test + # Personalize the context + context = test_context + context["water build info"] = {"Y": [2020, 2030, 2040]} + context.regions = "test_region" + context.RCP = "test_RCP" + context.REL = "test_REL" + context.time = "year" + context.SDG = True # Mock the DataFrames read from CSV df_sw = pd.DataFrame( - {"Region": ["test_Region"], "value": [1], "year": [2020], "time": ["year"]} - ) - - pd.DataFrame( - {"Region": ["test_Region"], "value": [1], "year": [2020], "time": ["year"]} + { + "Region": ["test_Region"], + "value": [1], + "year": [2020], + "time": ["year"], + "Unnamed: 0": [0], + "BCU_name": ["test_BCU"], + } ) # Mock the function 'read_water_availability' to return the mocked DataFrame with patch( - "message_ix_models.model.water.demands.read_water_availability", + "message_ix_models.model.water.data.demands.read_water_availability", return_value=(df_sw, df_sw), ), patch( "message_ix_models.util.private_data_path", return_value="path/to/file" ), patch("pandas.read_csv", return_value=df_sw): + # FIXME This doesn't work because read_water_availability() in line 749 of + # water/data/demands expects the second column of df_sw to be "years", but it + # contains the names of the columns at that point starting with df_sw here, not + # something that pandas can convert to DateTimes! # Call the function to be tested result = add_e_flow(context) diff --git a/message_ix_models/tests/test_cli.py b/message_ix_models/tests/test_cli.py index a1b7118a30..5b4f6b2313 100644 --- a/message_ix_models/tests/test_cli.py +++ b/message_ix_models/tests/test_cli.py @@ -1,4 +1,5 @@ """Basic tests of the command line.""" + import ixmp import pytest from message_ix.testing import make_dantzig diff --git a/message_ix_models/tests/test_report.py b/message_ix_models/tests/test_report.py index 1fdf351f1d..4f8e507ece 100644 --- a/message_ix_models/tests/test_report.py +++ b/message_ix_models/tests/test_report.py @@ -1,4 +1,5 @@ """Tests for :mod:`message_ix_models.report`.""" + from importlib.metadata import version import numpy as np diff --git a/message_ix_models/tests/test_testing.py b/message_ix_models/tests/test_testing.py index eb1436128d..b7e5b6e5d5 100644 --- a/message_ix_models/tests/test_testing.py +++ b/message_ix_models/tests/test_testing.py @@ -1,8 +1,5 @@ import os -import click -import pytest - from message_ix_models.testing import bare_res, not_ci @@ -21,8 +18,8 @@ def test_bare_res_solved(request, test_context): def test_cli_runner(mix_models_cli): - with pytest.raises(click.exceptions.UsageError, match="No such command 'foo'"): - mix_models_cli.assert_exit_0(["foo", "bar"]) + result = mix_models_cli.invoke(["foo", "bar"]) + assert "No such command 'foo'" in result.output @not_ci(reason="foo", action="skip") diff --git a/message_ix_models/tests/test_util.py b/message_ix_models/tests/test_util.py index d76b85ad7a..cf0d43bd59 100644 --- a/message_ix_models/tests/test_util.py +++ b/message_ix_models/tests/test_util.py @@ -1,4 +1,5 @@ """Tests of :mod:`message_ix_models.util`.""" + import logging import re from importlib.metadata import version @@ -156,10 +157,11 @@ def test_convert_units(recwarn): # With store="quantity", a series of pint.Quantity is returned result = convert_units(*args, store="quantity") - assert all( - np.isclose(a, b, atol=1e-4 * registry.kg) - for a, b in zip(exp.values, result.values) - ) + # Will raise a DimensionalityError if units are not equal + ratios = [(a / b) for a, b in zip(exp.values, result.values)] + # Assert equal units and sufficiently close values + for ratio in ratios: + assert ratio.dimensionless and np.isclose(ratio, 1, atol=1e-4) # With store="magnitude", a series of floats exp = pd.Series([q.magnitude for q in exp.values], name="bar") diff --git a/message_ix_models/tests/test_workflow.py b/message_ix_models/tests/test_workflow.py index 1394fab8f6..9874374638 100644 --- a/message_ix_models/tests/test_workflow.py +++ b/message_ix_models/tests/test_workflow.py @@ -91,34 +91,36 @@ def _wf( def test_make_click_command(mix_models_cli) -> None: import click + from message_ix_models.cli import cli_test_group + from message_ix_models.util.click import temporary_command + # make_click_command() runs and generates a command name = "make-click-command" cmd = make_click_command(f"{__name__}._wf", name=name, slug="test") assert isinstance(cmd, click.Command) # Add this into the hidden CLI test group - mix_models_cli.add_command(cmd) - - # Invoke the command with various parameters - for params, output in ( - (["--go", "B"], "nothing returned, workflow will continue with"), - (["B"], "Workflow diagram written to"), - ): - # Command runs and exits with 0 - result = mix_models_cli.assert_exit_0(["_test", "run"] + params) - # Expected log messages or output were printed - assert output in result.output - - # Invalid usage - for params, output in ( - (["--go", "C"], "Error: No step(s) matched"), - (["--go"], "Error: No target step provided and no default for"), - # Step changes_b() fails if changes_a() is not first run - (["--go", "--from=[AX]", "B"], "Execute None: # Log messages reflect workflow steps executed start_index = 1 if caplog.messages[0].startswith("Cull") else 0 - m = "MESSAGEix-GLOBIOM R14 YB" + # This setting obtains the value R11 on some Windows GHA jobs, but is otherwise R14. + # TODO Debug and fix. + m = f"MESSAGEix-GLOBIOM {test_context.model.regions} YB" messages = [ f"Loaded ixmp://{mp}/{m}/test_workflow#1", f"Step runs on ixmp://{mp}/{m}/test_workflow#1", @@ -164,7 +168,7 @@ def test_workflow(caplog, request, test_context, wf) -> None: assert re.match(expr, message) assert re.match( - r"""'B': + rf"""'B': - - 'context': - @@ -172,7 +176,7 @@ def test_workflow(caplog, request, test_context, wf) -> None: - - 'context' \(above\) - 'base': - - MESSAGEix-GLOBIOM R14 YB/test_workflow> + - {m}/test_workflow> - 'context' \(above\) - None""", wf.describe("B"), @@ -183,12 +187,12 @@ def test_workflow(caplog, request, test_context, wf) -> None: # Description reflects that changes_a() will no longer be called assert re.match( - r"""'B': + rf"""'B': - - 'context': - - 'A': - - MESSAGEix-GLOBIOM R14 YB/test_workflow> + - {m}/test_workflow> - 'context' \(above\) - None""", wf.describe("B"), diff --git a/message_ix_models/tests/tools/iea/test_web.py b/message_ix_models/tests/tools/iea/test_web.py index 645306d0b1..0da05571be 100644 --- a/message_ix_models/tests/tools/iea/test_web.py +++ b/message_ix_models/tests/tools/iea/test_web.py @@ -7,6 +7,7 @@ from message_ix_models.testing import GHA from message_ix_models.tools.exo_data import prepare_computer from message_ix_models.tools.iea.web import DIMS, generate_code_lists, load_data +from message_ix_models.util import HAS_MESSAGE_DATA class TestIEA_EWEB: @@ -68,7 +69,9 @@ def test_prepare_computer(self, test_context, source, source_kw): pytest.param( "IEA", "2023", - marks=pytest.mark.xfail(GHA, reason="No fuzzed version of this data"), + marks=pytest.mark.xfail( + GHA or not HAS_MESSAGE_DATA, reason="No fuzzed version of this data" + ), ), ("OECD", "2023"), ("OECD", "2022"), diff --git a/message_ix_models/tests/util/test_click.py b/message_ix_models/tests/util/test_click.py index fe0f26ac71..593dd3d3a3 100644 --- a/message_ix_models/tests/util/test_click.py +++ b/message_ix_models/tests/util/test_click.py @@ -1,7 +1,9 @@ """Basic tests of the command line.""" + import click -from message_ix_models.util.click import common_params +from message_ix_models.cli import cli_test_group +from message_ix_models.util.click import common_params, temporary_command def test_default_path_cb(session_context, mix_models_cli): @@ -22,7 +24,7 @@ def func(ctx, rep_out_path): expected = session_context.local_data / "reporting_output" # Run the command - with mix_models_cli.temporary_command(func): + with temporary_command(cli_test_group, func): result = mix_models_cli.assert_exit_0(cmd) # The value was stored on, and retrieved from, `ctx` @@ -48,7 +50,7 @@ def inner(context, regions): print(context.model.regions) # Give the option for the outer group, but not for the inner command - with mix_models_cli.temporary_command(outer): + with temporary_command(cli_test_group, outer): result = mix_models_cli.assert_exit_0( ["_test", "outer", "--regions=ZMB", "inner"] ) @@ -68,7 +70,7 @@ def func(ctx, ssp): print(ctx["ssp"]) # Print the value stored on the Context object # Run the command with a valid value - with mix_models_cli.temporary_command(func): + with temporary_command(cli_test_group, func): result = mix_models_cli.assert_exit_0(["_test", func.name, "SSP2"]) # The value was stored on, and retrieved from, `ctx` @@ -95,7 +97,7 @@ def func(ctx, **kwargs): p.write_text(text) # Run the command, referring to the temporary file - with mix_models_cli.temporary_command(func): + with temporary_command(cli_test_group, func): result = mix_models_cli.assert_exit_0( ["_test", func.name, f"--urls-from-file={p}"] ) diff --git a/message_ix_models/tests/util/test_context.py b/message_ix_models/tests/util/test_context.py index 2da18bb4ee..25c7ff1e87 100644 --- a/message_ix_models/tests/util/test_context.py +++ b/message_ix_models/tests/util/test_context.py @@ -157,11 +157,12 @@ def test_write_debug_archive(self, mix_models_cli): """:meth:`.write_debug_archive` works.""" # Create a CLI command attached to the hidden "_test" group - from message_ix_models.testing import cli_test_group + from message_ix_models.cli import cli_test_group + from message_ix_models.util.click import temporary_command - @cli_test_group.command("write-debug-archive") + @click.command("write-debug-archive") @click.pass_obj - def _(context): + def command(context): # Register one file to be archived p = context.core.local_data.joinpath("foo.txt") context.core.debug_paths.append(p) @@ -176,7 +177,8 @@ def _(context): context.write_debug_archive() # Invoke the command; I/O occurs in a temporary directory - result = mix_models_cli.invoke(["_test", "write-debug-archive"]) + with temporary_command(cli_test_group, command): + result = mix_models_cli.invoke(["_test", "write-debug-archive"]) # Output path is constructed as expected; file exists match = re.search( diff --git a/message_ix_models/tests/util/test_logging.py b/message_ix_models/tests/util/test_logging.py index f3be6907de..7563c26a57 100644 --- a/message_ix_models/tests/util/test_logging.py +++ b/message_ix_models/tests/util/test_logging.py @@ -1,6 +1,8 @@ import logging import re +import pytest + from message_ix_models.util._logging import mark_time, silence_log @@ -17,6 +19,40 @@ def test_mark_time(caplog): assert all(re.match(r" \+\d+\.\d = \d+\.\d seconds", m) for m in caplog.messages) +class TestQueueListener: + #: Number of log messages to emit. + N = 1_000 + + #: Number of times to run the test. + k = 4 + + @pytest.mark.parametrize( + "method", + ("click", pytest.param("subprocess", marks=pytest.mark.skip(reason="Slow."))), + ) + @pytest.mark.parametrize("k", range(k)) + def test_flush(self, caplog, mix_models_cli, method, k): + """Test logging in multiple processes, multiple threads, and with :mod:`click`. + + With pytest-xdist, these :attr:`k` test cases will run in multiple processes. + Each process will have its main thread, and the thread of the QueueListener. + The test ensures that all :attr:`N` log records emitted by the :py:`func()` are + "flushed" from the queue, transferred to stdout by the :class:`.StreamHandler` + and captured by the :class:`.CliRunner`. + """ + + # Run the command, capture output + # See message_ix_models.cli._log_threads + result = mix_models_cli.assert_exit_0( + ["_test", "log-threads", str(k), str(self.N)], method=method + ) + + # All records are emitted; the last record ends with N - 1 + assert result.output.rstrip().endswith(f"{self.N - 1}"), result.output.split( + "\n" + )[-2:] + + def test_silence_log(caplog): # An example logger log = logging.getLogger("message_ix_models.model") @@ -34,7 +70,7 @@ def test_silence_log(caplog): log.warning(msg) assert [ - "Set level=40 for logger(s): message_ix_models message_data", + "Set level=40 for logger(s): message_data message_ix_models", "…restored.", ] == caplog.messages caplog.clear() diff --git a/message_ix_models/tests/util/test_node.py b/message_ix_models/tests/util/test_node.py index 8cfaafef4b..3365708fe4 100644 --- a/message_ix_models/tests/util/test_node.py +++ b/message_ix_models/tests/util/test_node.py @@ -1,4 +1,5 @@ """Tests of :mod:`message_ix_models.util.node`.""" + import re import pandas as pd diff --git a/message_ix_models/tests/util/test_sdmx.py b/message_ix_models/tests/util/test_sdmx.py index 16fb946be4..febea39234 100644 --- a/message_ix_models/tests/util/test_sdmx.py +++ b/message_ix_models/tests/util/test_sdmx.py @@ -7,14 +7,17 @@ from message_ix_models.util.sdmx import eval_anno, make_enum, read -def test_eval_anno(caplog): +def test_eval_anno(caplog, recwarn): c = Code() - assert None is eval_anno(c, "foo") + with pytest.warns(DeprecationWarning): + assert None is eval_anno(c, "foo") c.annotations.append(Annotation(id="foo", text="bar baz")) - with caplog.at_level(logging.DEBUG, logger="message_ix_models"): + with caplog.at_level(logging.DEBUG, logger="message_ix_models"), pytest.warns( + DeprecationWarning + ): assert "bar baz" == eval_anno(c, "foo") assert re.fullmatch( @@ -23,7 +26,8 @@ def test_eval_anno(caplog): c.annotations.append(Annotation(id="qux", text="3 + 4")) - assert 7 == eval_anno(c, id="qux") + with pytest.warns(DeprecationWarning): + assert 7 == eval_anno(c, id="qux") def test_make_enum(): diff --git a/message_ix_models/tools/advance.py b/message_ix_models/tools/advance.py index d8558d5905..fff6a5e511 100644 --- a/message_ix_models/tools/advance.py +++ b/message_ix_models/tools/advance.py @@ -1,4 +1,5 @@ """Handle data from the ADVANCE project.""" + import logging from pathlib import Path from typing import Optional diff --git a/message_ix_models/tools/exo_data.py b/message_ix_models/tools/exo_data.py index 4c8ef0dfce..78ff487abf 100644 --- a/message_ix_models/tools/exo_data.py +++ b/message_ix_models/tools/exo_data.py @@ -1,6 +1,8 @@ """Generic tools for working with exogenous data sources.""" + import logging from abc import ABC, abstractmethod +from copy import deepcopy from operator import itemgetter from pathlib import Path from typing import Any, Dict, Literal, Mapping, Optional, Tuple, Type @@ -111,6 +113,15 @@ def transform(self, c: "Computer", base_key: Key) -> Key: return k2 + def raise_on_extra_kw(self, kwargs) -> None: + """Helper for subclasses.""" + if len(kwargs): + log.error( + f"Unhandled extra keyword arguments for {type(self).__name__}: " + + repr(kwargs) + ) + raise ValueError(kwargs) + def prepare_computer( context, @@ -163,7 +174,7 @@ def prepare_computer( for cls in SOURCES.values(): try: # Instantiate a Source object to provide this data - source_obj = cls(source, source_kw or dict()) + source_obj = cls(source, deepcopy(source_kw or dict())) except Exception: pass # Class does not recognize the arguments diff --git a/message_ix_models/tools/iamc.py b/message_ix_models/tools/iamc.py index 66d3c3e0fd..91f5e9fbcd 100644 --- a/message_ix_models/tools/iamc.py +++ b/message_ix_models/tools/iamc.py @@ -1,4 +1,5 @@ """Tools for working with IAMC-structured data.""" + from typing import Optional import pandas as pd diff --git a/message_ix_models/tools/iea/web.py b/message_ix_models/tools/iea/web.py index 659824db81..e78a288ac8 100644 --- a/message_ix_models/tools/iea/web.py +++ b/message_ix_models/tools/iea/web.py @@ -1,4 +1,5 @@ """Tools for IEA (Extended) World Energy Balance (WEB) data.""" + import logging import zipfile from copy import copy diff --git a/message_ix_models/tools/wb.py b/message_ix_models/tools/wb.py index a217d88153..08525c15d5 100644 --- a/message_ix_models/tools/wb.py +++ b/message_ix_models/tools/wb.py @@ -1,4 +1,5 @@ """Tools for World Bank data.""" + import logging from collections import defaultdict from functools import lru_cache @@ -12,7 +13,8 @@ log = logging.getLogger(__name__) -def assign_income_groups( +# FIXME Reduce complexity from 12 → ≤11 +def assign_income_groups( # noqa: C901 cl_node: "sdmx.model.common.Codelist", cl_income_group: "sdmx.model.common.Codelist", method: str = "population", @@ -67,7 +69,7 @@ def get_weight(code: "sdmx.model.common.Code") -> float: elif method == "population": # Retrieve WB_WDI data for SERIES=SP_POP_TOTAL (Population, total) dm = sdmx.Client("WB_WDI").data( - "WDI", key="A.SP_POP_TOTL.", params=dict(startperiod=2020, endperiod=2020) + "WDI", key="A.SP_POP_TOTL.", params=dict(start_period=2020, end_period=2020) ) # Convert to pd.Series with multi-index with levels: REF_AREA, SERIES, FREQ, diff --git a/message_ix_models/util/__init__.py b/message_ix_models/util/__init__.py index a0209b311c..2b6fcc3941 100644 --- a/message_ix_models/util/__init__.py +++ b/message_ix_models/util/__init__.py @@ -1,5 +1,6 @@ import logging from collections import ChainMap, defaultdict +from datetime import datetime from functools import partial, update_wrapper from importlib.metadata import version from itertools import count @@ -21,6 +22,7 @@ import pint from ._convert_units import convert_units, series_of_pint_quantity +from ._logging import mark_time, preserve_log_level, silence_log from .cache import cached from .common import ( HAS_MESSAGE_DATA, @@ -57,6 +59,7 @@ "check_support", "convert_units", "copy_column", + "datetime_now_with_tz", "eval_anno", "ffill", "identify_nodes", @@ -67,15 +70,19 @@ "make_io", "make_matched_dfs", "make_source_tech", + "mark_time", "maybe_query", "merge_data", "minimum_version", "package_data_path", + "preserve_log_level", "private_data_path", "replace_par_data", "same_node", "same_time", "series_of_pint_quantity", + "show_versions", + "silence_log", "strip_par_data", ] @@ -209,7 +216,9 @@ def _check_dim(d): _check_dim(dim) # Concatenate 1 copy of `df` for each row in `labels` df = pd.concat( - [df.assign(**row) for _, row in labels.iterrows()], ignore_index=True + [df.assign(**row) for _, row in labels.iterrows()], + ignore_index=True, + sort=False, ) # Next, broadcast other dimensions given as keyword arguments @@ -226,7 +235,7 @@ def _check_dim(d): # - Re-add the column from the constructed MultiIndex # - Reindex for sequential row numbers df = ( - pd.concat([df] * len(levels), keys=levels, names=[dim]) + pd.concat([df] * len(levels), keys=levels, names=[dim], sort=False) .drop(dim, axis=1) .reset_index(dim) .reset_index(drop=True) @@ -278,6 +287,12 @@ def copy_column(column_name): return lambda df: df[column_name] +def datetime_now_with_tz() -> datetime: + """Current date and time with time zone information.""" + tz = datetime.now().astimezone().tzinfo + return datetime.now(tz) + + def ffill( df: pd.DataFrame, dim: str, values: Sequence[CodeLike], expr: Optional[str] = None ) -> pd.DataFrame: @@ -321,8 +336,7 @@ def _maybe_eval(df): class KeyIterator(Protocol): - def __call__(self) -> "genno.Key": - ... + def __call__(self) -> "genno.Key": ... def iter_keys(base: "genno.Key") -> KeyIterator: @@ -664,6 +678,24 @@ def same_time(df: pd.DataFrame) -> pd.DataFrame: return df.assign(**{c: copy_column("time") for c in cols}) +def show_versions() -> str: + """Output of :func:`ixmp.show_versions`, as a :class:`str`.""" + from io import StringIO + + from . import ixmp + from ._logging import preserve_log_handlers + + # Retrieve package versions + buf = StringIO() + + # show_versions() imports pyam-iamc, which in turn imports ixmp4, which removes all + # handlers from the root logger (?!). Preserve the message-ix-models logging config. + with preserve_log_handlers(): + ixmp.show_versions(buf) + + return buf.getvalue() + + # FIXME Reduce complexity from 14 to ≤13 def strip_par_data( # noqa: C901 scenario: message_ix.Scenario, diff --git a/message_ix_models/util/_logging.py b/message_ix_models/util/_logging.py index 63039b45a3..7dc575d128 100644 --- a/message_ix_models/util/_logging.py +++ b/message_ix_models/util/_logging.py @@ -1,75 +1,35 @@ """Logging utilities.""" +import atexit import logging import logging.config +import logging.handlers +import re +import sys +import time from contextlib import contextmanager -from copy import deepcopy +from datetime import datetime, timedelta, timezone +from queue import SimpleQueue from time import process_time +from typing import Dict, Optional, Union, cast +from warnings import warn +# NB mark_time, preserve_log_level, and silence_log are exposed by util/__init__.py __all__ = [ "Formatter", - "make_formatter", + "QueueListener", + "SilenceFilter", + "StreamHandler", "setup", - "silence_log", ] log = logging.getLogger(__name__) +# References to handlers +_HANDLER: Dict[str, logging.Handler] = dict() -@contextmanager -def silence_log(names=None, level=logging.ERROR): - """Context manager to temporarily quiet 1 or more loggers. - - Parameters - ---------- - names : str, *optional* - Space-separated names of loggers to quiet. - level : int, *optional* - Minimum level of log messages to allow. - - Examples - -------- - >>> with silence_log(): - >>> log.warning("This message is not recorded.") - """ - # Default: the top-level logger for the package containing this file - if names is None: - names = [__name__.split(".")[0], "message_data"] - elif isinstance(names, str): - names = [names] - - log.info(f"Set level={level} for logger(s): {' '.join(names)}") - - # Retrieve the logger objects - loggers = list(map(logging.getLogger, names)) - # Store their current levels - levels = [] - - try: - for logger in loggers: - levels.append(logger.getEffectiveLevel()) # Store the current levels - logger.setLevel(level) # Set the level - yield - finally: - # Restore the levels - for logger, original_level in zip(loggers, levels): - logger.setLevel(original_level) - log.info("…restored.") - - -@contextmanager -def preserve_log_level(): - """Context manager to preserve the level of the ``message_ix_models`` logger.""" - # Get the top-level logger for the package containing this file - main_log = logging.getLogger(__name__.split(".")[0]) - - try: - # Store the current level - level = main_log.getEffectiveLevel() - yield - finally: - # Restore the level - main_log.setLevel(level) +# For mark_time() +_TIMES = [] class Formatter(logging.Formatter): @@ -77,8 +37,8 @@ class Formatter(logging.Formatter): Parameters ---------- - colorama : module - If provided, :mod:`colorama` is used to colour log messages printed to stdout. + use_color : bool, *optional* + If :any:`True`, :mod:`colorama` is used to colour log messages. """ CYAN = "" @@ -87,12 +47,20 @@ class Formatter(logging.Formatter): _short_name = None - def __init__(self, colorama): + def __init__(self, use_colour: bool = True): super().__init__() - if colorama: - self.CYAN = colorama.Fore.CYAN - self.DIM = colorama.Style.DIM - self.RESET_ALL = colorama.Style.RESET_ALL + + try: + if use_colour: + # Import and initialize colorama + import colorama + + colorama.init() + self.CYAN = colorama.Fore.CYAN + self.DIM = colorama.Style.DIM + self.RESET_ALL = colorama.Style.RESET_ALL + except ImportError: # pragma: no cover + pass # Not installed def format(self, record): """Format `record`. @@ -100,7 +68,7 @@ def format(self, record): Records are formatted like:: model.transport.data.add_par_data 220 rows in 'input' - ...add_par_data: further messages + ...add_par_data further messages …with the calling function name (e.g. 'add_par_data') coloured for legibility on first occurrence, then dimmed when repeated. @@ -121,29 +89,57 @@ def format(self, record): return f"{prefix}{record.funcName}{self.RESET_ALL} {record.getMessage()}" -def make_formatter(): - """Return a :class:`Formatter` instance for the ``message_ix_models`` logger. +class QueueHandler(logging.handlers.QueueHandler): + # For typing with Python ≤ 3.11 only; from 3.12 this attribute is described + listener: "QueueListener" - See also - -------- - setup - """ - try: - # Initialize colorama - import colorama - colorama.init() - except ImportError: # pragma: no cover - # Colorama not installed - colorama = None +class QueueListener(logging.handlers.QueueListener): + """:class:`.logging.QueueListener` with a :meth:`.flush` method.""" - return Formatter(colorama) + def flush(self): + """Flush the queue: join the listener/monitor thread and then restart.""" + if self._thread is not None: + super().stop() + self.start() -_TIMES = [] +class SilenceFilter(logging.Filter): + """Log filter that only allows records from `names` that are at or above `level`.""" + + __slots__ = ("level", "name_expr") + + def __init__(self, names: str, level: int): + self.level = level + # Compile a regular expression for the name + self.name_re = re.compile("|".join(map(re.escape, sorted(names.split())))) + + def filter(self, record) -> bool: + return not (record.levelno < self.level and self.name_re.match(record.name)) + + +class StreamHandler(logging.StreamHandler): + """Like :class:`.logging.StreamHandler`, but retrieve the stream on each access. + + This avoids the case that :mod:`click`, :mod:`pytest`, or something else adjusts + :py:`sys.stdout` temporarily, but the handler's stored reference to the original is + not updated. + """ + + #: Name of the :mod:`sys` stream to use, as :class:`str` rather than a direct + #: reference. + stream_name: str + def __init__(self, stream_name: str): + self.stream_name = stream_name + logging.Handler.__init__(self) -def mark_time(quiet=False): + @property + def stream(self): + return getattr(sys, self.stream_name) + + +def mark_time(quiet: bool = False) -> None: """Record and log (if `quiet` is :obj:`True`) a time mark.""" _TIMES.append(process_time()) if not quiet and len(_TIMES) > 1: @@ -152,67 +148,167 @@ def mark_time(quiet=False): ) -CONFIG = dict( - version=1, - disable_existing_loggers=False, - formatters=dict(simple={"()": "message_ix_models.util._logging.make_formatter"}), - handlers=dict( - console={ - "class": "logging.StreamHandler", - "level": "NOTSET", - "formatter": "simple", - "stream": "ext://sys.stdout", - }, - # commented: needs code in setup() to choose an appropriate file path - # file_handler={ - # "class": "logging.handlers.RotatingFileHandler", - # "level": "DEBUG", - # "formatter": "simple", - # "backupCount": "100", - # "delay": True, - # }, - ), - loggers=dict( - message_ix_models=dict( - level="NOTSET", - # propagate=False, - # handlers=[], - ), - message_data=dict( - level="NOTSET", - # propagate=False, - # handlers=[], - ), - ), - root=dict( - handlers=[], - ), -) +@contextmanager +def preserve_log_handlers(name: Optional[str] = None): + """Context manager to preserve the handlers of a `logger`.""" + # Access the named logger + logger = logging.getLogger(name) + # Make a copy of its list of handlers + handlers = list(logger.handlers) + + try: + yield + finally: + # Make a list of handlers which have disappeared from the logger + to_restore = list(filter(lambda h: h not in logger.handlers, handlers)) + for h in to_restore: + logger.addHandler(h) + # Log after the handlers have been restored + log.debug(f"Restore to {logger}.handlers: {to_restore or '(none)'}") + + +@contextmanager +def preserve_log_level(): + """Context manager to preserve the level of the ``message_ix_models`` logger.""" + # Get the top-level logger for the package containing this file + main_log = logging.getLogger(__name__.split(".")[0]) + + try: + # Store the current level + level = main_log.getEffectiveLevel() + yield + finally: + # Restore the level + main_log.setLevel(level) + + +def configure(): + """Apply logging configuration.""" + # NB We do this programmatically as logging.config.dictConfig()'s automatic steps + # require adjustments that end up being more verbose and less clear. + from platformdirs import user_log_path + + # Stream handler + _HANDLER["console"] = h_console = StreamHandler(stream_name="stdout") + h_console.setLevel(logging.CRITICAL) + h_console.setFormatter(Formatter()) + + # Construct the file name for the log file + log_file_path = user_log_path("message-ix-models", ensure_exists=True).joinpath( + datetime.now(timezone(timedelta(seconds=time.timezone))) + .isoformat(timespec="seconds") + .replace(":", "") + ) + + # File handler + _HANDLER["file"] = h_file = logging.FileHandler( + filename=str(log_file_path), delay=True + ) + h_file.setLevel(logging.CRITICAL) + h_file.setFormatter(Formatter(use_colour=False)) + + # Queue handler + queue = SimpleQueue() + _HANDLER["queue"] = h_queue = QueueHandler(queue) + logging.root.addHandler(h_queue) + + # Queue listener + h_queue.listener = listener = QueueListener( + queue, h_console, h_file, respect_handler_level=True + ) + listener.start() + atexit.register(listener.stop) + + for name, level in ( + (None, logging.DEBUG), + # Ensure no level set for these packages; the level of the "console"/"file" + # handlers determines outputs + ("message_ix_models", logging.NOTSET), + ("message_data", logging.NOTSET), + # Hide lower-level messages for some upstream packages from the file log + ("graphviz._tools", logging.WARNING), + ("matplotlib", logging.WARNING), + ("pycountry.db", logging.WARNING), + ): + logging.getLogger(name).setLevel(level) def setup( - level="NOTSET", - console=True, - # file=False, -): + level: Union[str, int] = 99, + console: bool = True, + *, + file: bool = False, +) -> None: """Initialize logging. Parameters ---------- level : str, *optional* - Log level for :mod:`message_ix_models` and :mod:`message_data`. + Log level for the console log handler. console : bool, *optional* - If :obj:`True`, print all messages to console using a :class:`Formatter`. + If :obj:`False`, do not print any messages to console. + file : bool, *optional* + If :obj:`False`, do not print any messages to file. + """ + + root = logging.getLogger() + if not any(isinstance(h, logging.handlers.QueueHandler) for h in root.handlers): + # Not yet configured + configure() + + # Apply settings to loggers and handlers: either just-created, or pre-existing + + # Set the level of the console handler + _HANDLER["console"].setLevel(99 if console is False else level) + + if file is False: + _HANDLER["file"].setLevel(99) + else: + _HANDLER["file"].setLevel("DEBUG") + log.info(f"Log to {cast(logging.FileHandler, _HANDLER['file']).baseFilename}") + + +def flush() -> None: + """Flush the queue.""" + cast(QueueHandler, _HANDLER["queue"]).listener.flush() + + +@contextmanager +def silence_log(names: Optional[str] = None, level: int = logging.ERROR): + """Context manager to temporarily quiet 1 or more loggers. + + Parameters + ---------- + names : str, *optional* + Space-separated names of loggers to quiet. + level : int, *optional* + Minimum level of log messages to allow. + + Examples + -------- + >>> with silence_log(): + >>> log.warning("This message is not recorded.") """ - # Copy to avoid modifying with the operations below - config = deepcopy(CONFIG) + if isinstance(names, list): + warn( + "silence_log(names=…) as list of str; use a single, space-separated str", + DeprecationWarning, + stacklevel=2, + ) + names = " ".join(names) - config["root"].setdefault("level", level) + # Create a filter; default, the top-level logger for the current package + f = SilenceFilter(names or f"message_data {__name__.split('.')[0]}", level) + log.info(f"Set level={level} for logger(s): {f.name_re.pattern.replace('|', ' ')}") - if console: - config["root"]["handlers"].append("console") - # if file: - # config["loggers"]["message_data"]["handlers"].append("file") + try: + # Add the same filter to every handler of the root logger + for handler in logging.root.handlers: + handler.addFilter(f) - # Apply the configuration - logging.config.dictConfig(config) + yield + finally: + # Remove the filter + for handler in logging.root.handlers: + handler.removeFilter(f) + log.info("…restored.") diff --git a/message_ix_models/util/cache.py b/message_ix_models/util/cache.py index e10ee7d9a5..389d2dec5d 100644 --- a/message_ix_models/util/cache.py +++ b/message_ix_models/util/cache.py @@ -4,16 +4,16 @@ :mod:`message_ix_models`, so these can be used as arguments to cached functions and included in the computed cache key: -- :class:`sdmx.model.IdentifiableArtefact`, including :class:`.Code`: hashed as - their string representation / ID. +- :class:`sdmx.model.IdentifiableArtefact`, including :class:`.Code`: hashed as their + string representation / ID. - :class:`ixmp.Platform`, :class:`xarray.Dataset`: ignored, with a warning logged. - :class:`ScenarioInfo`: only the :attr:`~ScenarioInfo.set` entries are hashed. - """ + import json import logging from dataclasses import asdict, is_dataclass -from typing import Callable +from typing import TYPE_CHECKING, Callable, Set import genno.caching import ixmp @@ -23,6 +23,9 @@ from .context import Context from .scenarioinfo import ScenarioInfo +if TYPE_CHECKING: + from pathlib import Path + log = logging.getLogger(__name__) @@ -31,7 +34,7 @@ SKIP_CACHE = False # Paths already logged, to decrease verbosity -PATHS_SEEN = set() +PATHS_SEEN: Set["Path"] = set() # Show genno how to hash function arguments seen in message_ix_models @@ -73,12 +76,12 @@ def cached(func: Callable) -> Callable: :doc:`genno:cache` in the :mod:`genno` documentation """ # Determine and create the cache path - cache_path = Context.get_instance(-1).get_cache_path() - cache_path.mkdir(exist_ok=True, parents=True) + cache_path = Context.get_instance(-1).core.cache_path if cache_path not in PATHS_SEEN: log.debug(f"{func.__name__}() will cache in {cache_path}") PATHS_SEEN.add(cache_path) + cache_path.mkdir(parents=True, exist_ok=True) # Use the genno internals to wrap the function. cached_load = genno.caching.decorate( diff --git a/message_ix_models/util/click.py b/message_ix_models/util/click.py index fae8891093..830c3247cc 100644 --- a/message_ix_models/util/click.py +++ b/message_ix_models/util/click.py @@ -2,17 +2,23 @@ These are used for building CLIs using :mod:`click`. """ + import logging +import sys +from contextlib import contextmanager +from dataclasses import dataclass, field from datetime import datetime from pathlib import Path -from typing import Callable, List, Optional, Union +from typing import Callable, List, Literal, Mapping, Optional, Union, cast import click +import click.testing from click import Argument, Choice, Option from message_ix_models import Context, model from message_ix_models.model.structure import codelists +from ._logging import preserve_log_level from .scenarioinfo import ScenarioInfo log = logging.getLogger(__name__) @@ -87,8 +93,6 @@ def _cb(context: Union[click.Context, Context], param, value): def format_sys_argv() -> str: """Format :data:`sys.argv` in a readable manner.""" - import sys - lines = ["Invoked:"] indent = "" for item in sys.argv: @@ -112,6 +116,17 @@ def store_context(context: Union[click.Context, Context], param, value): return value +@contextmanager +def temporary_command(group: "click.Group", command: "click.Command"): + """Temporarily attach command `command` to `group`.""" + assert command.name is not None + try: + group.add_command(command) + yield + finally: + group.commands.pop(command.name) + + def urls_from_file( context: Union[click.Context, Context], param, value ) -> List[ScenarioInfo]: @@ -261,3 +276,80 @@ def unique_id() -> str: # expose_value=False, ), } + + +@dataclass +class CliRunner: + """Similar to :class:`click.testing.CliRunner`, with extra features.""" + + #: CLI entry point + cli_cmd: click.Command + #: CLI module + cli_module: str + + env: Mapping[str, str] = field(default_factory=dict) + charset: str = "utf-8" + + #: Method for invoking the command + method: Literal["click", "subprocess"] = "click" + + def invoke(self, *args, **kwargs) -> click.testing.Result: + method = kwargs.pop("method", self.method) + + if method == "click": + runner = click.testing.CliRunner(env=self.env) + with preserve_log_level(): + result = runner.invoke(self.cli_cmd, *args, **kwargs) + elif method == "subprocess": + result = self.invoke_subprocess(*args, **kwargs) + + # Store the result to be used by assert_exit_0() + self.last_result = result + + return result + + def invoke_subprocess(self, *args, **kwargs) -> click.testing.Result: + """Invoke the CLI in a subprocess.""" + import subprocess + + assert 1 == len(args) + all_args: List[str] = [sys.executable, "-m", self.cli_module, *args[0]] + + # Run; capture in a subprocess.CompletedProcess + cp = subprocess.run(all_args, capture_output=True, env=self.env, **kwargs) + + # Convert to a click.testing.Result + return click.testing.Result( + runner=cast(click.testing.CliRunner, self), + stdout_bytes=cp.stdout or bytes(), + stderr_bytes=cp.stderr or bytes(), + return_value=None, + exit_code=cp.returncode, + exception=None, + exc_info=None, + ) + + def assert_exit_0(self, *args, **kwargs) -> click.testing.Result: + """Assert a result has exit_code 0, or print its traceback. + + If any `args` or `kwargs` are given, :meth:`.invoke` is first called. Otherwise, + the result from the last call of :meth:`.invoke` is used. + + Raises + ------ + AssertionError + if the result exit code is not 0. + """ + __tracebackhide__ = True + + if len(args) + len(kwargs): + self.invoke(*args, **kwargs) + + # Retrieve the last result + result = self.last_result + + if result.exit_code != 0: + print(f"{result.exit_code = }", f"{result.output = }", sep="\n") + raise RuntimeError(result.exit_code) + + return result diff --git a/message_ix_models/util/common.py b/message_ix_models/util/common.py index 5d75c7760d..3c2a12bf31 100644 --- a/message_ix_models/util/common.py +++ b/message_ix_models/util/common.py @@ -12,7 +12,6 @@ try: import message_data except ImportError: - log.warning("message_data is not installed or cannot be imported") MESSAGE_DATA_PATH: Optional[Path] = None HAS_MESSAGE_DATA = False else: # pragma: no cover (needs message_data) @@ -258,11 +257,15 @@ def package_data_path(*parts) -> Path: return _make_path(MESSAGE_MODELS_PATH / "data", *parts) -def private_data_path(*parts) -> Path: # pragma: no cover (needs message_data) +def private_data_path(*parts) -> Path: """Construct a path to a file under :file:`data/` in :mod:`message_data`. - Use this function to access non-public (e.g. embargoed or proprietary) data stored - in the :mod:`message_data` repository. + Use this function to access non-public (for instance, embargoed or proprietary) data + stored in the :mod:`message_data` repository. + + If the repository is not available, the function falls back to + :meth:`.Context.get_local_path`, where users may put files obtained through other + messages. Parameters ---------- @@ -273,4 +276,11 @@ def private_data_path(*parts) -> Path: # pragma: no cover (needs message_data) -------- :ref:`Choose locations for data ` """ - return _make_path(cast(Path, MESSAGE_DATA_PATH) / "data", *parts) + if HAS_MESSAGE_DATA: + return _make_path(cast(Path, MESSAGE_DATA_PATH) / "data", *parts) + else: + from .context import Context + + base = Context.get_instance(-1).get_local_path() + log.warning(f"message_data not installed; fall back to {base}") + return base.joinpath(*parts) diff --git a/message_ix_models/util/config.py b/message_ix_models/util/config.py index a467736e93..d6bf3299df 100644 --- a/message_ix_models/util/config.py +++ b/message_ix_models/util/config.py @@ -10,6 +10,7 @@ log = logging.getLogger(__name__) +ixmp.config.register("no message_data", bool, False) ixmp.config.register("message local data", Path, Path.cwd()) @@ -165,7 +166,8 @@ class Config: dest: Optional[str] = None #: Base path for cached data, e.g. as given by the :program:`--cache-path` CLI - #: option. Default: :file:`{local_data}/cache/`. + #: option. Default: the directory :file:`message-ix-models` within the directory + #: given by :func:`.platformdirs.user_cache_path`. cache_path: Optional[str] = None #: Paths of files containing debug outputs. See :meth:`Context.write_debug_archive`. @@ -179,3 +181,9 @@ class Config: #: Flag for causing verbose output to logs or stdout. Different modules will respect #: :attr:`verbose` in distinct ways. verbose: bool = False + + def __post_init__(self): + if self.cache_path is None: + from platformdirs import user_cache_path + + self.cache_path = user_cache_path("message-ix-models", ensure_exists=True) diff --git a/message_ix_models/util/context.py b/message_ix_models/util/context.py index 60c915902e..b5c2b3e84d 100644 --- a/message_ix_models/util/context.py +++ b/message_ix_models/util/context.py @@ -1,4 +1,5 @@ """Context and settings for :mod:`message_ix_models` code.""" + import logging from copy import deepcopy from dataclasses import fields @@ -10,6 +11,7 @@ from click import BadOptionUsage from .config import Config +from .ixmp import parse_url log = logging.getLogger(__name__) @@ -137,6 +139,12 @@ def __deepcopy__(self, memo): return result + def __eq__(self, other) -> bool: + # Don't compare contents, only identity, for _CONTEXTS.index() + if not isinstance(other, Context): + return NotImplemented + return id(self) == id(other) + def __repr__(self): return f"<{self.__class__.__name__} object at {id(self)} with {len(self)} keys>" @@ -269,14 +277,8 @@ def get_cache_path(self, *parts) -> Path: The directory containing the resulting path is created if it does not already exist. """ - # Construct relative to local_data if cache_path is not defined - base = self.core.cache_path or self.core.local_data.joinpath("cache") - - result = base.joinpath(*parts) - - # Ensure the directory exists - result.parent.mkdir(parents=True, exist_ok=True) - + result = self.core.cache_path.joinpath(*parts) + result.parent.mkdir(parents=True, exist_ok=True) # Ensure the directory exists return result def get_local_path(self, *parts: str, suffix=None) -> Path: @@ -371,7 +373,7 @@ def handle_cli_args( ) self.core.url = url - urlinfo = ixmp.utils.parse_url(url) + urlinfo = parse_url(url) platform_info.update(urlinfo[0]) scenario_info.update(urlinfo[1]) elif platform: diff --git a/message_ix_models/util/ixmp.py b/message_ix_models/util/ixmp.py index f19f24afee..fe154eba1d 100644 --- a/message_ix_models/util/ixmp.py +++ b/message_ix_models/util/ixmp.py @@ -2,7 +2,13 @@ try: # ixmp 3.8.0 and later - from ixmp.util import discard_on_error, maybe_check_out, maybe_commit, parse_url + from ixmp.util import ( + discard_on_error, + maybe_check_out, + maybe_commit, + parse_url, + show_versions, + ) except ImportError: # ixmp <= 3.7.0 from contextlib import nullcontext @@ -11,6 +17,7 @@ maybe_check_out, maybe_commit, parse_url, + show_versions, ) def discard_on_error(*args): diff --git a/message_ix_models/util/node.py b/message_ix_models/util/node.py index 6d7206e6b8..4845a32905 100644 --- a/message_ix_models/util/node.py +++ b/message_ix_models/util/node.py @@ -1,4 +1,5 @@ """Utilities for nodes.""" + import logging from typing import List, Sequence, Union @@ -126,8 +127,6 @@ def identify_nodes(scenario: Scenario) -> str: def nodes_ex_world(nodes: Sequence[Union[str, Code]]) -> List[Union[str, Code]]: """Exclude "World" and anything containing "GLB" from `nodes`. - May also be used as a reporting computation. - - .. todo:: Make available from :mod:`message_ix_models.report`. + May also be used as a genno (reporting) operator. """ return list(filter(lambda n_: "GLB" not in n_ and n_ != "World", nodes)) diff --git a/message_ix_models/util/pooch.py b/message_ix_models/util/pooch.py index 1540ecdf99..4f12205c66 100644 --- a/message_ix_models/util/pooch.py +++ b/message_ix_models/util/pooch.py @@ -1,4 +1,5 @@ """Utilities for using :doc:`Pooch `.""" + import logging from pathlib import Path from typing import Tuple diff --git a/message_ix_models/util/scenarioinfo.py b/message_ix_models/util/scenarioinfo.py index dba9926cbd..8a51f676c3 100644 --- a/message_ix_models/util/scenarioinfo.py +++ b/message_ix_models/util/scenarioinfo.py @@ -1,4 +1,5 @@ """:class:`ScenarioInfo` class.""" + import logging import re from collections import defaultdict @@ -11,7 +12,6 @@ import sdmx.model.v21 as sdmx_model from .ixmp import parse_url -from .sdmx import eval_anno if TYPE_CHECKING: from message_ix import Scenario @@ -235,13 +235,16 @@ def units_for(self, set_name: str, id: str) -> pint.Unit: -------- io_units """ + try: idx = self.set[set_name].index(id) except ValueError: print(self.set[set_name]) raise - return eval_anno(self.set[set_name][idx], "units") + return self.set[set_name][idx].eval_annotation( + id="units", globals=dict(registry=pint.get_application_registry()) + ) def io_units( self, technology: str, commodity: str, level: Optional[str] = None @@ -309,8 +312,6 @@ def year_from_codes(self, codes: List[sdmx_model.Code]): [2090, 2100, 2110] """ - from message_ix_models.util import eval_anno - # Clear existing values if len(self.set["year"]): log.debug(f"Discard existing 'year' elements: {repr(self.set['year'])}") @@ -333,7 +334,7 @@ def year_from_codes(self, codes: List[sdmx_model.Code]): self.set["year"].append(year) # Check for an annotation 'firstmodelyear: true' - if eval_anno(code, "firstmodelyear"): + if code.eval_annotation(id="firstmodelyear"): if fmy_set: # No coverage: data that triggers this should not be committed raise ValueError( # pragma: no cover @@ -349,7 +350,7 @@ def year_from_codes(self, codes: List[sdmx_model.Code]): duration_period.append( dict( year=year, - value=eval_anno(code, "duration_period") + value=code.eval_annotation(id="duration_period") or (year - duration_period[-1]["year"]), unit="y", ) diff --git a/message_ix_models/util/sdmx.py b/message_ix_models/util/sdmx.py index 75e04c8cdf..1e7176826f 100644 --- a/message_ix_models/util/sdmx.py +++ b/message_ix_models/util/sdmx.py @@ -1,10 +1,12 @@ """Utilities for handling objects from :mod:`sdmx`.""" + import logging from datetime import datetime from enum import Enum, Flag from importlib.metadata import version from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Union +from warnings import warn import sdmx import sdmx.message @@ -23,7 +25,10 @@ CodeLike = Union[str, Code] -def as_codes(data: Union[List[str], Dict[str, CodeLike]]) -> List[Code]: +# FIXME Reduce complexity from 13 → ≤11 +def as_codes( # noqa: C901 + data: Union[List[str], Dict[str, CodeLike]], +) -> List[Code]: """Convert `data` to a :class:`list` of |Code| objects. Various inputs are accepted: @@ -99,6 +104,13 @@ def eval_anno(obj: AnnotableArtefact, id: str): Use :meth:`sdmx.model.common.AnnotableArtefact.eval_annotation`, which provides the same functionality. """ + warn( + "message_ix_models.util.eval_anno; use sdmx.model.common.AnnotableArtefact" + ".eval_annotation() instead.", + DeprecationWarning, + stacklevel=2, + ) + try: value = str(obj.get_annotation(id=id).text) except KeyError: # No such attribute diff --git a/message_ix_models/workflow.py b/message_ix_models/workflow.py index a627201294..0443529a47 100644 --- a/message_ix_models/workflow.py +++ b/message_ix_models/workflow.py @@ -1,4 +1,5 @@ """Tools for modeling workflows.""" + import logging import re from typing import ( @@ -320,6 +321,8 @@ def make_click_command(wf_callback: str, name: str, slug: str, **kwargs) -> "Com def _func(context, go, truncate_step, target_step, **kwargs): from importlib import import_module + from message_ix_models.util import show_versions + # Import the module and retrieve the callback function module_name, callback_name = wf_callback.rsplit(".", maxsplit=1) module = import_module(module_name) @@ -361,7 +364,8 @@ def _func(context, go, truncate_step, target_step, **kwargs): ) target_step = wf.default_key - log.info(f"Execute workflow:\n{wf.describe(target_step)}") + log.info(f"Execute workflow\n{wf.describe(target_step)}") + log.debug(f"…with package versions:\n{show_versions()}") if not go: path = context.get_local_path(f"{slug}-workflow.svg") diff --git a/pyproject.toml b/pyproject.toml index c840e53626..ec8f9a2ca8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: R", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Information Analysis", @@ -116,11 +117,12 @@ no_implicit_optional = false addopts = "-p no:faulthandler --cov=message_ix_models --cov-report=" filterwarnings = "ignore:distutils Version classes.*:DeprecationWarning" -[tool.ruff] +[tool.ruff.lint] select = ["C9", "E", "F", "I", "W"] - -[tool.ruff.mccabe] -max-complexity = 13 +# Exceptions: +# - .tools.wb.assign_income_groups(): 12 > 11 +# - .util.sdmx.as_codes(): 13 > 11 +mccabe.max-complexity = 11 [tool.setuptools.packages] find = {}