From 2f78b8a207973c8f14f10c46022cb3dd9ae87d99 Mon Sep 17 00:00:00 2001 From: romainsacchi Date: Wed, 31 Jul 2024 16:37:09 +0200 Subject: [PATCH] `run_gsa()` as a standalone function --- dev/timing.py | 13 ++-- pathways/__init__.py | 3 +- pathways/pathways.py | 163 +++---------------------------------------- pathways/stats.py | 145 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 158 deletions(-) diff --git a/dev/timing.py b/dev/timing.py index ecf4f08..ca56d2f 100644 --- a/dev/timing.py +++ b/dev/timing.py @@ -1,9 +1,9 @@ -from pathways import Pathways +from pathways import Pathways, run_gsa p = Pathways( datapackage="remind-SSP2-PkBudg1150-stem-SPS1.zip", geography_mapping="geo_mapping_remind.yaml", - activities_mapping="act_categories_agg.yaml", + #activities_mapping="act_categories_agg.yaml", ) vars = [v for v in p.scenarios.coords["variables"].values if v.startswith("FE")] @@ -17,11 +17,14 @@ scenarios=p.scenarios.pathway.values.tolist(), years=[2020, 2030, 2040, 2050], variables=vars, - use_distributions=100, + use_distributions=10, subshares=True, - multiprocessing=True, ) p.export_results() -p.run_gsa() +print(p.lca_results.coords) +print(p.lca_results.shape) +print(p.lca_results.sum()) + +run_gsa() diff --git a/pathways/__init__.py b/pathways/__init__.py index 6d5860f..bf252f4 100644 --- a/pathways/__init__.py +++ b/pathways/__init__.py @@ -1,5 +1,6 @@ __version__ = (0, 0, 1) -__all__ = ("__version__", "Pathways") +__all__ = ("__version__", "Pathways", "run_gsa") from .pathways import Pathways +from .stats import run_gsa diff --git a/pathways/pathways.py b/pathways/pathways.py index 7e4e7d4..50564fc 100644 --- a/pathways/pathways.py +++ b/pathways/pathways.py @@ -18,15 +18,11 @@ import yaml from .data_validation import validate_datapackage -from .filesystem_constants import DATA_DIR, DIR_CACHED_DB, STATS_DIR, USER_LOGS_DIR +from .filesystem_constants import DATA_DIR, USER_LOGS_DIR from .lca import _calculate_year, get_lca_matrices from .lcia import get_lcia_method_names from .stats import ( - create_mapping_sheet, - log_results, - log_subshares, - log_uncertainty_values, - run_GSA_delta, + log_mc_parameters_to_excel, ) from .subshares import generate_samples from .utils import ( @@ -122,96 +118,6 @@ def _load_array(filepath): return np.stack(results, axis=2) -def log_mc_parameters_to_excel( - model: str, - scenario: str, - year: int, - methods: list, - result: dict, - uncertainty_parameters: dict, - uncertainty_values: dict, - tehnosphere_indices: dict, - iteration_results: dict, - shares: dict = None, -): - export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx" - - # create Excel workbook using openpyxl - with pd.ExcelWriter(export_path, engine="openpyxl") as writer: - - df_sum_impacts = pd.DataFrame() - df_uncertainty_values = pd.DataFrame() - df_technology_shares = pd.DataFrame() - writer.book.create_sheet("Indices mapping") - writer.book.create_sheet("Monte Carlo values") - writer.book.create_sheet("Technology shares") - writer.book.create_sheet("Total impacts") - - for region, data in result.items(): - - total_impacts = np.sum(iteration_results[region], axis=(0, 2, 3)) - - df_sum_impacts = pd.concat( - [ - df_sum_impacts, - log_results( - total_impacts=total_impacts, - methods=methods, - region=region, - ), - ] - ) - - uncertainty_indices = uncertainty_parameters[region] - uncertainty_vals = uncertainty_values[region] - - df_uncertainty_values = pd.concat( - [ - df_uncertainty_values, - log_uncertainty_values( - region=region, - uncertainty_indices=uncertainty_indices, - uncertainty_values=uncertainty_vals, - ), - ], - ) - - if shares: - sub_shares = {} - for k, v in shares.items(): - for x, y in v.items(): - if x == year: - for z, w in y.items(): - sub_shares[f"{k} - {z}"] = w - - df_technology_shares = pd.concat( - [ - df_technology_shares, - log_subshares( - shares=sub_shares, - region=region, - ), - ], - ) - - indices = tehnosphere_indices[region] - - df_technosphere_indices = create_mapping_sheet(indices=indices) - - df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False) - df_uncertainty_values.to_excel( - writer, sheet_name="Monte Carlo values", index=False - ) - df_technology_shares.to_excel( - writer, sheet_name="Technology shares", index=False - ) - df_technosphere_indices.to_excel( - writer, sheet_name="Indices mapping", index=False - ) - - print(f"Monte Carlo parameters added to: {export_path.resolve()}") - - class Pathways: """The Pathways class reads in a datapackage that contains scenario data, mapping between scenario variables and LCA datasets, and LCA matrices. @@ -259,8 +165,15 @@ def __init__( # a mapping of geographies can be added # to aggregate locations to a higher level # e.g. from countries to regions - self.geography_mapping = load_mapping(geography_mapping) or None - self.activities_mapping = load_mapping(activities_mapping) or None + if geography_mapping: + self.geography_mapping = load_mapping(geography_mapping) + else: + self.geography_mapping = None + + if activities_mapping: + self.activities_mapping = load_mapping(activities_mapping) + else: + self.activities_mapping = None clean_cache_directory() @@ -631,57 +544,3 @@ def export_results(self, filename: str = None) -> str: :return: None """ return export_results_to_parquet(self.lca_results, filename) - - def run_gsa(self, method: str = "delta") -> None: - """ - Run a global sensitivity analysis (GSA) on the LCA results. - Updates Excel files with the GSA results. - :param method: str. The method used for the GSA. Default is 'delta'. Only 'delta' is supported at the moment. - :return: None. - """ - if method != "delta": - raise ValueError(f"Method {method} is not supported.") - - for model in self.lca_results.coords["model"].values: - for scenario in self.lca_results.coords["scenario"].values: - for year in self.lca_results.coords["year"].values: - export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx" - - # load content of "Monte Carlo values" sheet into a pandas DataFrame - df_mc_vals = pd.read_excel( - export_path, sheet_name="Monte Carlo values" - ) - - # load content of "Technology shares" sheet into a pandas DataFrame - # if it exists - - try: - df_technology_shares = pd.read_excel( - export_path, - sheet_name="Technology shares", - ) - except: - df_technology_shares = None - - # load content of "Total impacts" sheet into a pandas DataFrame - - df_sum_impacts = pd.read_excel( - export_path, sheet_name="Total impacts" - ) - - # open Excel workbook - with pd.ExcelWriter( - export_path, engine="openpyxl", mode="a" - ) as writer: - - df_GSA_results = run_GSA_delta( - total_impacts=df_sum_impacts, - uncertainty_values=df_mc_vals, - technology_shares=df_technology_shares, - ) - - df_GSA_results.to_excel( - writer, sheet_name=f"GSA {method.capitalize()}", index=False - ) - - print(f"GSA results added to: {export_path.resolve()}") diff --git a/pathways/stats.py b/pathways/stats.py index 6eaa405..c2d78ff 100644 --- a/pathways/stats.py +++ b/pathways/stats.py @@ -10,6 +10,8 @@ from openpyxl import Workbook, load_workbook from SALib.analyze import delta +from pathways.filesystem_constants import STATS_DIR + def log_double_accounting( filtered_names: Dict[Tuple[str, ...], Set[str]], @@ -323,3 +325,146 @@ def run_GSA_delta( results, columns=["LCIA method", "Parameter", "Delta", "Delta Conf", "S1", "S1 Conf"], ) + + +def log_mc_parameters_to_excel( + model: str, + scenario: str, + year: int, + methods: list, + result: dict, + uncertainty_parameters: dict, + uncertainty_values: dict, + tehnosphere_indices: dict, + iteration_results: dict, + shares: dict = None, +): + export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx" + + # create Excel workbook using openpyxl + with pd.ExcelWriter(export_path, engine="openpyxl") as writer: + + df_sum_impacts = pd.DataFrame() + df_uncertainty_values = pd.DataFrame() + df_technology_shares = pd.DataFrame() + writer.book.create_sheet("Indices mapping") + writer.book.create_sheet("Monte Carlo values") + writer.book.create_sheet("Technology shares") + writer.book.create_sheet("Total impacts") + + for region, data in result.items(): + + total_impacts = np.sum(iteration_results[region], axis=(0, 2, 3)) + + df_sum_impacts = pd.concat( + [ + df_sum_impacts, + log_results( + total_impacts=total_impacts, + methods=methods, + region=region, + ), + ] + ) + + uncertainty_indices = uncertainty_parameters[region] + uncertainty_vals = uncertainty_values[region] + + df_uncertainty_values = pd.concat( + [ + df_uncertainty_values, + log_uncertainty_values( + region=region, + uncertainty_indices=uncertainty_indices, + uncertainty_values=uncertainty_vals, + ), + ], + ) + + if shares: + sub_shares = {} + for k, v in shares.items(): + for x, y in v.items(): + if x == year: + for z, w in y.items(): + sub_shares[f"{k} - {z}"] = w + + df_technology_shares = pd.concat( + [ + df_technology_shares, + log_subshares( + shares=sub_shares, + region=region, + ), + ], + ) + + indices = tehnosphere_indices[region] + + df_technosphere_indices = create_mapping_sheet(indices=indices) + + df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False) + df_uncertainty_values.to_excel( + writer, sheet_name="Monte Carlo values", index=False + ) + df_technology_shares.to_excel( + writer, sheet_name="Technology shares", index=False + ) + df_technosphere_indices.to_excel( + writer, sheet_name="Indices mapping", index=False + ) + + print(f"Monte Carlo parameters added to: {export_path.resolve()}") + + +def run_gsa(directory: [str, None] = STATS_DIR, method: str = "delta") -> None: + """ + Run a global sensitivity analysis (GSA) on the LCA results. + Updates Excel files with the GSA results. + :param method: str. The method used for the GSA. Default is 'delta'. Only 'delta' is supported at the moment. + :param directory: str. The directory where the Excel files are stored. Default is 'stats'. + :return: None. + """ + if method != "delta": + raise ValueError(f"Method {method} is not supported.") + + # iterate through the Excel files in the directory + + for file in Path(directory).rglob("*.xlsx"): + # load content of "Monte Carlo values" sheet into a pandas DataFrame + df_mc_vals = pd.read_excel( + file, sheet_name="Monte Carlo values" + ) + + # load content of "Technology shares" sheet into a pandas DataFrame + # if it exists + + try: + df_technology_shares = pd.read_excel( + file, + sheet_name="Technology shares", + ) + except: + df_technology_shares = None + + # load content of "Total impacts" sheet into a pandas DataFrame + df_sum_impacts = pd.read_excel( + file, sheet_name="Total impacts" + ) + + # open Excel workbook + with pd.ExcelWriter( + file, engine="openpyxl", mode="a" + ) as writer: + + df_GSA_results = run_GSA_delta( + total_impacts=df_sum_impacts, + uncertainty_values=df_mc_vals, + technology_shares=df_technology_shares, + ) + + df_GSA_results.to_excel( + writer, sheet_name=f"GSA {method.capitalize()}", index=False + ) + + print(f"GSA results added to: {file.resolve()}")