run_gsa() as a standalone function

polca · Jul 31, 2024 · 2f78b8a · 2f78b8a
1 parent 081e742
commit 2f78b8a
Show file tree

Hide file tree

Showing 4 changed files with 166 additions and 158 deletions.
diff --git a/dev/timing.py b/dev/timing.py
@@ -1,9 +1,9 @@
-from pathways import Pathways
+from pathways import Pathways, run_gsa
 
 p = Pathways(
     datapackage="remind-SSP2-PkBudg1150-stem-SPS1.zip",
     geography_mapping="geo_mapping_remind.yaml",
-    activities_mapping="act_categories_agg.yaml",
+    #activities_mapping="act_categories_agg.yaml",
 )
 
 vars = [v for v in p.scenarios.coords["variables"].values if v.startswith("FE")]
@@ -17,11 +17,14 @@
     scenarios=p.scenarios.pathway.values.tolist(),
     years=[2020, 2030, 2040, 2050],
     variables=vars,
-    use_distributions=100,
+    use_distributions=10,
     subshares=True,
-    multiprocessing=True,
 )
 
 p.export_results()
 
-p.run_gsa()
+print(p.lca_results.coords)
+print(p.lca_results.shape)
+print(p.lca_results.sum())
+
+run_gsa()
diff --git a/pathways/__init__.py b/pathways/__init__.py
@@ -1,5 +1,6 @@
 __version__ = (0, 0, 1)
-__all__ = ("__version__", "Pathways")
+__all__ = ("__version__", "Pathways", "run_gsa")
 
 
 from .pathways import Pathways
+from .stats import run_gsa
diff --git a/pathways/pathways.py b/pathways/pathways.py
@@ -18,15 +18,11 @@
 import yaml
 
 from .data_validation import validate_datapackage
-from .filesystem_constants import DATA_DIR, DIR_CACHED_DB, STATS_DIR, USER_LOGS_DIR
+from .filesystem_constants import DATA_DIR, USER_LOGS_DIR
 from .lca import _calculate_year, get_lca_matrices
 from .lcia import get_lcia_method_names
 from .stats import (
-    create_mapping_sheet,
-    log_results,
-    log_subshares,
-    log_uncertainty_values,
-    run_GSA_delta,
+    log_mc_parameters_to_excel,
 )
 from .subshares import generate_samples
 from .utils import (
@@ -122,96 +118,6 @@ def _load_array(filepath):
     return np.stack(results, axis=2)
 
 
-def log_mc_parameters_to_excel(
-    model: str,
-    scenario: str,
-    year: int,
-    methods: list,
-    result: dict,
-    uncertainty_parameters: dict,
-    uncertainty_values: dict,
-    tehnosphere_indices: dict,
-    iteration_results: dict,
-    shares: dict = None,
-):
-    export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx"
-
-    # create Excel workbook using openpyxl
-    with pd.ExcelWriter(export_path, engine="openpyxl") as writer:
-
-        df_sum_impacts = pd.DataFrame()
-        df_uncertainty_values = pd.DataFrame()
-        df_technology_shares = pd.DataFrame()
-        writer.book.create_sheet("Indices mapping")
-        writer.book.create_sheet("Monte Carlo values")
-        writer.book.create_sheet("Technology shares")
-        writer.book.create_sheet("Total impacts")
-
-        for region, data in result.items():
-
-            total_impacts = np.sum(iteration_results[region], axis=(0, 2, 3))
-
-            df_sum_impacts = pd.concat(
-                [
-                    df_sum_impacts,
-                    log_results(
-                        total_impacts=total_impacts,
-                        methods=methods,
-                        region=region,
-                    ),
-                ]
-            )
-
-            uncertainty_indices = uncertainty_parameters[region]
-            uncertainty_vals = uncertainty_values[region]
-
-            df_uncertainty_values = pd.concat(
-                [
-                    df_uncertainty_values,
-                    log_uncertainty_values(
-                        region=region,
-                        uncertainty_indices=uncertainty_indices,
-                        uncertainty_values=uncertainty_vals,
-                    ),
-                ],
-            )
-
-            if shares:
-                sub_shares = {}
-                for k, v in shares.items():
-                    for x, y in v.items():
-                        if x == year:
-                            for z, w in y.items():
-                                sub_shares[f"{k} - {z}"] = w
-
-                df_technology_shares = pd.concat(
-                    [
-                        df_technology_shares,
-                        log_subshares(
-                            shares=sub_shares,
-                            region=region,
-                        ),
-                    ],
-                )
-
-        indices = tehnosphere_indices[region]
-
-        df_technosphere_indices = create_mapping_sheet(indices=indices)
-
-        df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False)
-        df_uncertainty_values.to_excel(
-            writer, sheet_name="Monte Carlo values", index=False
-        )
-        df_technology_shares.to_excel(
-            writer, sheet_name="Technology shares", index=False
-        )
-        df_technosphere_indices.to_excel(
-            writer, sheet_name="Indices mapping", index=False
-        )
-
-        print(f"Monte Carlo parameters added to: {export_path.resolve()}")
-
-
 class Pathways:
     """The Pathways class reads in a datapackage that contains scenario data,
     mapping between scenario variables and LCA datasets, and LCA matrices.
@@ -259,8 +165,15 @@ def __init__(
         # a mapping of geographies can be added
         # to aggregate locations to a higher level
         # e.g. from countries to regions
-        self.geography_mapping = load_mapping(geography_mapping) or None
-        self.activities_mapping = load_mapping(activities_mapping) or None
+        if geography_mapping:
+            self.geography_mapping = load_mapping(geography_mapping)
+        else:
+            self.geography_mapping = None
+
+        if activities_mapping:
+            self.activities_mapping = load_mapping(activities_mapping)
+        else:
+            self.activities_mapping = None
 
         clean_cache_directory()
 
@@ -631,57 +544,3 @@ def export_results(self, filename: str = None) -> str:
         :return: None
         """
         return export_results_to_parquet(self.lca_results, filename)
-
-    def run_gsa(self, method: str = "delta") -> None:
-        """
-        Run a global sensitivity analysis (GSA) on the LCA results.
-        Updates Excel files with the GSA results.
-        :param method: str. The method used for the GSA. Default is 'delta'. Only 'delta' is supported at the moment.
-        :return: None.
-        """
-        if method != "delta":
-            raise ValueError(f"Method {method} is not supported.")
-
-        for model in self.lca_results.coords["model"].values:
-            for scenario in self.lca_results.coords["scenario"].values:
-                for year in self.lca_results.coords["year"].values:
-                    export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx"
-
-                    # load content of "Monte Carlo values" sheet into a pandas DataFrame
-                    df_mc_vals = pd.read_excel(
-                        export_path, sheet_name="Monte Carlo values"
-                    )
-
-                    # load content of "Technology shares" sheet into a pandas DataFrame
-                    # if it exists
-
-                    try:
-                        df_technology_shares = pd.read_excel(
-                            export_path,
-                            sheet_name="Technology shares",
-                        )
-                    except:
-                        df_technology_shares = None
-
-                    # load content of "Total impacts" sheet into a pandas DataFrame
-
-                    df_sum_impacts = pd.read_excel(
-                        export_path, sheet_name="Total impacts"
-                    )
-
-                    # open Excel workbook
-                    with pd.ExcelWriter(
-                        export_path, engine="openpyxl", mode="a"
-                    ) as writer:
-
-                        df_GSA_results = run_GSA_delta(
-                            total_impacts=df_sum_impacts,
-                            uncertainty_values=df_mc_vals,
-                            technology_shares=df_technology_shares,
-                        )
-
-                        df_GSA_results.to_excel(
-                            writer, sheet_name=f"GSA {method.capitalize()}", index=False
-                        )
-
-                    print(f"GSA results added to: {export_path.resolve()}")
diff --git a/pathways/stats.py b/pathways/stats.py
@@ -10,6 +10,8 @@
 from openpyxl import Workbook, load_workbook
 from SALib.analyze import delta
 
+from pathways.filesystem_constants import STATS_DIR
+
 
 def log_double_accounting(
     filtered_names: Dict[Tuple[str, ...], Set[str]],
@@ -323,3 +325,146 @@ def run_GSA_delta(
         results,
         columns=["LCIA method", "Parameter", "Delta", "Delta Conf", "S1", "S1 Conf"],
     )
+
+
+def log_mc_parameters_to_excel(
+    model: str,
+    scenario: str,
+    year: int,
+    methods: list,
+    result: dict,
+    uncertainty_parameters: dict,
+    uncertainty_values: dict,
+    tehnosphere_indices: dict,
+    iteration_results: dict,
+    shares: dict = None,
+):
+    export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx"
+
+    # create Excel workbook using openpyxl
+    with pd.ExcelWriter(export_path, engine="openpyxl") as writer:
+
+        df_sum_impacts = pd.DataFrame()
+        df_uncertainty_values = pd.DataFrame()
+        df_technology_shares = pd.DataFrame()
+        writer.book.create_sheet("Indices mapping")
+        writer.book.create_sheet("Monte Carlo values")
+        writer.book.create_sheet("Technology shares")
+        writer.book.create_sheet("Total impacts")
+
+        for region, data in result.items():
+
+            total_impacts = np.sum(iteration_results[region], axis=(0, 2, 3))
+
+            df_sum_impacts = pd.concat(
+                [
+                    df_sum_impacts,
+                    log_results(
+                        total_impacts=total_impacts,
+                        methods=methods,
+                        region=region,
+                    ),
+                ]
+            )
+
+            uncertainty_indices = uncertainty_parameters[region]
+            uncertainty_vals = uncertainty_values[region]
+
+            df_uncertainty_values = pd.concat(
+                [
+                    df_uncertainty_values,
+                    log_uncertainty_values(
+                        region=region,
+                        uncertainty_indices=uncertainty_indices,
+                        uncertainty_values=uncertainty_vals,
+                    ),
+                ],
+            )
+
+            if shares:
+                sub_shares = {}
+                for k, v in shares.items():
+                    for x, y in v.items():
+                        if x == year:
+                            for z, w in y.items():
+                                sub_shares[f"{k} - {z}"] = w
+
+                df_technology_shares = pd.concat(
+                    [
+                        df_technology_shares,
+                        log_subshares(
+                            shares=sub_shares,
+                            region=region,
+                        ),
+                    ],
+                )
+
+        indices = tehnosphere_indices[region]
+
+        df_technosphere_indices = create_mapping_sheet(indices=indices)
+
+        df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False)
+        df_uncertainty_values.to_excel(
+            writer, sheet_name="Monte Carlo values", index=False
+        )
+        df_technology_shares.to_excel(
+            writer, sheet_name="Technology shares", index=False
+        )
+        df_technosphere_indices.to_excel(
+            writer, sheet_name="Indices mapping", index=False
+        )
+
+        print(f"Monte Carlo parameters added to: {export_path.resolve()}")
+
+
+def run_gsa(directory: [str, None] = STATS_DIR, method: str = "delta") -> None:
+    """
+    Run a global sensitivity analysis (GSA) on the LCA results.
+    Updates Excel files with the GSA results.
+    :param method: str. The method used for the GSA. Default is 'delta'. Only 'delta' is supported at the moment.
+    :param directory: str. The directory where the Excel files are stored. Default is 'stats'.
+    :return: None.
+    """
+    if method != "delta":
+        raise ValueError(f"Method {method} is not supported.")
+
+    # iterate through the Excel files in the directory
+
+    for file in Path(directory).rglob("*.xlsx"):
+        # load content of "Monte Carlo values" sheet into a pandas DataFrame
+        df_mc_vals = pd.read_excel(
+            file, sheet_name="Monte Carlo values"
+        )
+
+        # load content of "Technology shares" sheet into a pandas DataFrame
+        # if it exists
+
+        try:
+            df_technology_shares = pd.read_excel(
+                file,
+                sheet_name="Technology shares",
+            )
+        except:
+            df_technology_shares = None
+
+        # load content of "Total impacts" sheet into a pandas DataFrame
+        df_sum_impacts = pd.read_excel(
+            file, sheet_name="Total impacts"
+        )
+
+        # open Excel workbook
+        with pd.ExcelWriter(
+            file, engine="openpyxl", mode="a"
+        ) as writer:
+
+            df_GSA_results = run_GSA_delta(
+                total_impacts=df_sum_impacts,
+                uncertainty_values=df_mc_vals,
+                technology_shares=df_technology_shares,
+            )
+
+            df_GSA_results.to_excel(
+                writer, sheet_name=f"GSA {method.capitalize()}", index=False
+            )
+
+        print(f"GSA results added to: {file.resolve()}")