iiasa · khaeru · May 25, 2022 · Mar 24, 2022 · Mar 31, 2022 · Mar 31, 2022
diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
@@ -4,6 +4,8 @@
 .. All changes
 .. -----------
 
+- Extend functionality of :meth:`.vintage_and_active_years` (:pull:`572`)
+
 .. _v3.5.0:
 
 v3.5.0 (2022-05-06)

diff --git a/message_ix/core.py b/message_ix/core.py
@@ -1,7 +1,8 @@
 import logging
 from collections.abc import Mapping
 from functools import lru_cache
-from itertools import product
+from itertools import chain, product
+from typing import Iterable, List, Optional, Tuple, Union
 from warnings import warn
 
 import ixmp
@@ -20,9 +21,9 @@
 class Scenario(ixmp.Scenario):
     """|MESSAGEix| Scenario.
 
-    See :class:`ixmp.TimeSeries` for the meaning of arguments `mp`, `model`,
-    `scenario`, `version`, and `annotation`. The `scheme` of a newly-created
-    Scenario is always 'MESSAGE'.
+    See :class:`ixmp.TimeSeries` for the meaning of arguments `mp`, `model`, `scenario`,
+    `version`, and `annotation`. The `scheme` of a newly-created Scenario is always
+    "MESSAGE".
     """
 
     def __init__(
@@ -289,21 +290,26 @@ def recurse(k, v, parent="World"):
         self.add_set("lvl_spatial", levels)
         self.add_set("map_spatial_hierarchy", hierarchy)
 
-    def add_horizon(self, year=[], firstmodelyear=None, data=None):
+    def add_horizon(
+        self,
+        year: Iterable[int] = [],
+        firstmodelyear: Optional[int] = None,
+        data: Optional[dict] = None,
+    ) -> None:
         """Set the scenario time horizon via ``year`` and related categories.
 
-        :meth:`add_horizon` acts like ``add_set("year", ...)``, except with
-        additional conveniences:
+        :meth:`add_horizon` acts like ``add_set("year", ...)``, except with additional
+        conveniences:
 
-        - The `firstmodelyear` argument can be used to set the first period
-          handled by the MESSAGE optimization. This is equivalent to::
+        - The `firstmodelyear` argument can be used to set the first period handled by
+          the MESSAGE optimization. This is equivalent to::
 
             scenario.add_cat("year", "firstmodelyear", ..., is_unique=True)
 
-        - Parameter ``duration_period`` is assigned values based on `year`:
-          The duration of periods is calculated as the interval between
-          successive `year` elements, and the duration of the first period is
-          set to value that appears most frequently.
+        - Parameter ``duration_period`` is assigned values based on `year`: The duration
+          of periods is calculated as the interval between successive `year` elements,
+          and the duration of the first period is set to value that appears most
+          frequently.
 
         See :doc:`time` for a detailed terminology of years and periods in
         :mod:`message_ix`.
@@ -314,25 +320,24 @@ def add_horizon(self, year=[], firstmodelyear=None, data=None):
             The set of periods.
 
         firstmodelyear : int, optional
-            First period for the model solution. If not given, the first entry
-            of `year` is used.
+            First period for the model solution. If not given, the first entry of `year`
+            is used.
 
         Other parameters
         ----------------
         data : dict
             .. deprecated:: 3.1
 
-               The "year" key corresponds to `year` and is required.
-               A "firstmodelyear" key corresponds to `firstmodelyear` and is
-               optional.
+               The "year" key corresponds to `year` and is required. A "firstmodelyear"
+               key corresponds to `firstmodelyear` and is optional.
 
         Raises
         ------
         ValueError
-            If the ``year`` set of the Scenario is already populated. Changing
-            the time periods of an existing Scenario can entail complex
-            adjustments to data. For this purpose, adjust each set and
-            parameter individually, or see :mod:`.tools.add_year`.
+            If the ``year`` set of the Scenario is already populated. Changing the time
+            periods of an existing Scenario can entail complex adjustments to data. For
+            this purpose, adjust each set and parameter individually, or see
+            :mod:`.tools.add_year`.
 
         Examples
         --------
@@ -343,8 +348,8 @@ def add_horizon(self, year=[], firstmodelyear=None, data=None):
         >>> s.add_horizon([2020, 2030, 2040])
         """
         # Check arguments
-        # NB once the deprecated signature is removed, these two 'if' blocks
-        #    and the data= argument can be deleted.
+        # NB once the deprecated signature is removed, these two 'if' blocks and the
+        #    data= argument can be deleted.
         if isinstance(year, dict):
             # Move a dict argument to `data` to trigger the next block
             if data:
@@ -353,7 +358,7 @@ def add_horizon(self, year=[], firstmodelyear=None, data=None):
 
         if data:
             warn(
-                "dict() argument to add_horizon(); use year= and " "firstmodelyear=",
+                "dict() argument to add_horizon(); use year= and firstmodelyear=",
                 DeprecationWarning,
             )
 
@@ -391,8 +396,8 @@ def add_horizon(self, year=[], firstmodelyear=None, data=None):
             # Cannot infer any durations with only 1 period
             return
         elif len(set(duration)) == 1:
-            # All periods have the same duration; use this for the duration of
-            # the first period
+            # All periods have the same duration; use this for the duration of the first
+            # period
             duration_first = duration[0]
         else:
             # More than one period duration. Use the mode, i.e. the most common
@@ -408,71 +413,126 @@ def add_horizon(self, year=[], firstmodelyear=None, data=None):
         self.add_par(
             "duration_period",
             pd.DataFrame(
-                {
-                    "year": year,
-                    "value": [duration_first] + duration,
-                    "unit": "y",
-                }
+                {"year": year, "value": [duration_first] + duration, "unit": "y"}
             ),
         )
 
-    def vintage_and_active_years(self, ya_args=None, in_horizon=True):
-        """Return sets of vintage and active years for use in data input.
-
-        For a valid pair `(year_vtg, year_act)`, the following conditions are
-        satisfied:
-
-        1. Both the vintage year (`year_vtg`) and active year (`year_act`) are
-           in the model's ``year`` set.
-        2. `year_vtg` <= `year_act`.
-        3. `year_act` <= the model's first year **or** `year_act` is in the
-           smaller subset :meth:`ixmp.Scenario.years_active` for the given
-           `ya_args`.
+    def vintage_and_active_years(
+        self,
+        ya_args: Union[Tuple[str, str], Tuple[str, str, Union[int, str]]] = None,
+        in_horizon: bool = True,
+        vtg_lower: int = 0,
+        act_lower: int = 0,
+    ) -> pd.DataFrame:
+        r"""Return matched pairs of vintage and active years for use in data input.
+
+        Each returned pair of (vintage year :math:`y^V`, active year :math:`y^A`)
+        satisfies all of the following conditions:
+
+        1. :math:`y^V, y^A \in Y`: both vintage and active year are in the ``year`` set
+           of the Scenario.
+        2. :math:`y^V \leq y^A`: a technology cannot be active before it is constructed.
+        3. (If `in_horizon` is :obj:`True`) :math:`y^A \geq y_0`, the
+           :attr:`.firstmodelyear`.
+        4. (If `ya_args` are given) :math:`y^A - y^V + \text{duration_period}_{y^V} <
+           \text{technical_lifetime}_{y^V}`; that is, at least part of the active period
+           is within the technical lifetime defined for technology of the corresponding
+           vintage. This is the same condition satisfied by :meth:`years_active`.
 
         Parameters
         ----------
-        ya_args : tuple of (node, tec, yr_vtg), optional
-            Arguments to :meth:`years_active`.
+        ya_args : tuple of (node, tec) or (node, tec, yr_vtg), optional
+            If all three are provided, they are supplied directly to
+            :meth:`.years_active`, and only the `yr_vtg` will appear in the results. If
+            only (node, tec) are provided, then :meth:`.years_active` is called for
+            every vintage where the (node, tec) has a defined technical lifetime.
         in_horizon : bool, optional
-            Only return years within the model horizon
-            (:obj:`firstmodelyear` or later).
+            Only return year_act within the model horizon (:attr:`.firstmodelyear` or
+            later).
+        vtg_lower : int, optional
+            Only return year_vtg from the specified value onwards.
+        act_lower : int, optional
+            Only return year_act from the specified value onwards.
 
         Returns
         -------
         pandas.DataFrame
-            with columns 'year_vtg' and 'year_act', in which each row is a
-            valid pair.
-        """
-        first = self.firstmodelyear
+            with columns "year_vtg" and "year_act", in which each row is a valid pair.
+
+        Examples
+        --------
+        :meth:`pandas.DataFrame.query` can be used to further manipulate the data in the
+        returned data frame. To limit the vintage years included:
+
+        >>> base = s.vintage_and_active_years(("node", "tech"))
+        >>> df = base.query("year_vtg >= 2022")
+
+        Limit the active years included:
+
+        >>> df = base.query("year_act >= 2040")
 
+        More complex expressions as a chained call:
+
+        >>> df = s.vintage_and_active_years(
+        ...     ("node", "tech"), in_horizon=True
+        ... ).query("year_act >= 2025 or year_vtg < 2010")
+
+        """
         # Prepare lists of vintage (yv) and active (ya) years
-        if ya_args:
-            if len(ya_args) != 3:
-                raise ValueError("3 arguments are required if using `ya_args`")
-            ya = self.years_active(*ya_args)
-            yv = ya[0:1]  # Just the first element, as a list
-        else:
+        if ya_args is None:
             # Product of all years
-            yv = ya = self.set("year")
+            years = self.set("year")
+            values: Iterable = product(years, years)
+        elif len(ya_args) == 3:
+            # Specific vintage for `years_active()`
+            values = map(
+                lambda y: (int(ya_args[-1]), y),  # type: ignore
+                self.years_active(*ya_args),
+            )
+        elif len(ya_args) == 2:
+            # All possible vintages for the given (node, technology)
+            vintages = sorted(
+                self.par(
+                    "technical_lifetime",
+                    filters={"node_loc": ya_args[0], "technology": ya_args[1]},
+                )["year_vtg"].unique()
+            )
+
+            # One list of (yv, ya) values for each vintage
+            # NB this could be made more efficient using a modified version of the
+            #    code in years_active(); however any performance penalty from repeated
+            #    calls is probably mitigated by caching.
+            iters = []
+            for yv in vintages:
+                iters.append(
+                    [(yv, y) for y in self.years_active(ya_args[0], ya_args[1], yv)]
+                )
+            values = chain(*iters)
+        else:
+            raise ValueError(
+                f"ya_args must be a 2- or 3-tuple; got {ya_args} of length "
+                f"{len(ya_args)}"
+            )
+
+        # Minimum value for year_act
+        ya_lower = max(self.firstmodelyear if in_horizon else -np.inf, act_lower)
+        # ya_lower = self.firstmodelyear if in_horizon else -np.inf  # Without act_lower
 
         # Predicate for filtering years
         def _valid(elem):
             yv, ya = elem
-            return (yv <= ya) and (not in_horizon or (first <= ya))
+            return vtg_lower <= yv <= ya and ya_lower <= ya
+            # return yv <= ya and ya_lower <= ya  # Without vtg_lower
 
-        # - Cartesian product of all yv and ya.
-        # - Filter only valid years.
-        # - Convert to data frame.
-        return pd.DataFrame(
-            filter(_valid, product(yv, ya)), columns=["year_vtg", "year_act"]
-        )
+        # Filter values and convert to data frame
+        return pd.DataFrame(filter(_valid, values), columns=["year_vtg", "year_act"])
 
-    def years_active(self, node, tec, yr_vtg):
-        """Return years in which *tec* of *yr_vtg* can be active in *node*.
+    def years_active(self, node: str, tec: str, yr_vtg: Union[int, str]) -> List[int]:
+        """Return years in which `tec` of `yr_vtg` can be active in `node`.
 
         The :ref:`parameters <params-tech>` ``duration_period`` and
-        ``technical_lifetime`` are used to determine which periods are partly
-        or fully within the lifetime of the technology.
+        ``technical_lifetime`` are used to determine which periods are partly or fully
+        within the lifetime of the technology.
 
         Parameters
         ----------
@@ -491,7 +551,7 @@ def years_active(self, node, tec, yr_vtg):
         yv = int(yr_vtg)
         filters = dict(node_loc=[node], technology=[tec], year_vtg=[yv])
 
-        # Lifetime of the technology at the node
+        # Lifetime of the technology at the node and year_vtg
         lt = self.par("technical_lifetime", filters=filters).at[0, "value"]
 
         # Duration of periods
@@ -500,8 +560,8 @@ def years_active(self, node, tec, yr_vtg):
         data["age"] = data.where(data.year >= yv, 0)["value"].cumsum()
 
         # Return periods:
-        # - the tec's age at the end of the *prior* period is less than or
-        #   equal to its lifetime, and
+        # - the tec's age at the end of the *prior* period is less than or equal to its
+        #   lifetime, and
         # - at or later than the vintage year.
         return (
             data.where(data.age.shift(1, fill_value=0) < lt)

diff --git a/message_ix/tests/test_core.py b/message_ix/tests/test_core.py
@@ -194,75 +194,6 @@ def test_add_horizon_repeat(test_mp, caplog):
         scen.add_horizon([2015, 2020, 2025], firstmodelyear=2010)
 
 
-def test_vintage_and_active_years(test_mp):
-    scen = Scenario(test_mp, **SCENARIO["dantzig"], version="new")
-
-    years = [2000, 2010, 2020]
-    scen.add_horizon(year=years, firstmodelyear=2010)
-    obs = scen.vintage_and_active_years()
-    exp = pd.DataFrame(
-        {
-            "year_vtg": (2000, 2000, 2010, 2010, 2020),
-            "year_act": (2010, 2020, 2010, 2020, 2020),
-        }
-    )
-    pdt.assert_frame_equal(exp, obs, check_like=True)  # ignore col order
-
-    # Add a technology, its lifetime, and period durations
-    scen.add_set("node", "foo")
-    scen.add_set("technology", "bar")
-    scen.add_par(
-        "duration_period", pd.DataFrame({"unit": "???", "value": 10, "year": years})
-    )
-    scen.add_par(
-        "technical_lifetime",
-        pd.DataFrame(
-            {
-                "node_loc": "foo",
-                "technology": "bar",
-                "unit": "???",
-                "value": 20,
-                "year_vtg": years,
-            }
-        ),
-    )
-
-    # part is before horizon
-    obs = scen.vintage_and_active_years(ya_args=("foo", "bar", "2000"))
-    exp = pd.DataFrame({"year_vtg": (2000,), "year_act": (2010,)})
-    pdt.assert_frame_equal(exp, obs, check_like=True)  # ignore col order
-
-    obs = scen.vintage_and_active_years(
-        ya_args=("foo", "bar", "2000"), in_horizon=False
-    )
-    exp = pd.DataFrame({"year_vtg": (2000, 2000), "year_act": (2000, 2010)})
-    pdt.assert_frame_equal(exp, obs, check_like=True)  # ignore col order
-
-    # fully in horizon
-    obs = scen.vintage_and_active_years(ya_args=("foo", "bar", "2010"))
-    exp = pd.DataFrame({"year_vtg": (2010, 2010), "year_act": (2010, 2020)})
-    pdt.assert_frame_equal(exp, obs, check_like=True)  # ignore col order
-
-    # part after horizon
-    obs = scen.vintage_and_active_years(ya_args=("foo", "bar", "2020"))
-    exp = pd.DataFrame({"year_vtg": (2020,), "year_act": (2020,)})
-    pdt.assert_frame_equal(exp, obs, check_like=True)  # ignore col order
-
-    # Advance the first model year
-    scen.add_cat("year", "firstmodelyear", years[-1], is_unique=True)
-
-    # Empty data frame: only 2000 and 2010 valid year_act for this node/tec;
-    # but both are before the first model year
-    obs = scen.vintage_and_active_years(
-        ya_args=("foo", "bar", years[0]), in_horizon=True
-    )
-    pdt.assert_frame_equal(pd.DataFrame(columns=["year_vtg", "year_act"]), obs)
-
-    # Exception is raised for incorrect arguments
-    with pytest.raises(ValueError, match="3 arguments are required if using `ya_args`"):
-        scen.vintage_and_active_years(ya_args=("foo", "bar"))
-
-
 def test_cat_all(dantzig_message_scenario):
     scen = dantzig_message_scenario
     df = scen.cat("technology", "all")