diff --git a/pathways/lca.py b/pathways/lca.py index a886e1e..d4acc9b 100644 --- a/pathways/lca.py +++ b/pathways/lca.py @@ -4,11 +4,11 @@ """ import logging +import pickle import uuid from collections import defaultdict from pathlib import Path from typing import Any, Dict, List, Tuple -import pickle import bw2calc as bc import bw_processing as bwp @@ -25,9 +25,9 @@ from .lcia import fill_characterization_factors_matrices from .stats import ( create_mapping_sheet, - log_uncertainty_values, log_results, log_subshares, + log_uncertainty_values, run_GSA_delta, run_GSA_OLS, ) @@ -410,7 +410,10 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int # Save the technosphere indices to disk id_technosphere_indices = uuid.uuid4() - pickle.dump(lca.technosphere_indices, open(DIR_CACHED_DB / f"{id_technosphere_indices}.pkl", "wb")) + pickle.dump( + lca.technosphere_indices, + open(DIR_CACHED_DB / f"{id_technosphere_indices}.pkl", "wb"), + ) # Save the characterization vectors to disk id_results_array = uuid.uuid4() diff --git a/pathways/pathways.py b/pathways/pathways.py index a4ce35d..fa12610 100644 --- a/pathways/pathways.py +++ b/pathways/pathways.py @@ -5,6 +5,7 @@ """ import logging +import pickle from collections import defaultdict from multiprocessing import Pool, cpu_count from typing import Any, List, Optional @@ -14,7 +15,6 @@ import pyprind import xarray as xr import yaml -import pickle from .data_validation import validate_datapackage from .filesystem_constants import DATA_DIR, DIR_CACHED_DB, STATS_DIR, USER_LOGS_DIR @@ -22,9 +22,9 @@ from .lcia import get_lcia_method_names from .stats import ( create_mapping_sheet, - log_uncertainty_values, log_results, log_subshares, + log_uncertainty_values, run_GSA_delta, ) from .subshares import generate_samples @@ -519,7 +519,9 @@ def _fill_in_result_array( ] ) - uncertainty_indices = uncertainty_parameters[data["uncertainty_params"]] + uncertainty_indices = uncertainty_parameters[ + data["uncertainty_params"] + ] uncertainty_vals = uncertainty_values[data["uncertainty_vals"]] df_uncertainty_values = pd.concat( @@ -553,20 +555,34 @@ def _fill_in_result_array( indices = tehnosphere_indices[data["technosphere_indices"]] # only keep indices which are also present in uncertainty_indices - indices = {k: v for k, v in indices.items() if v in set(uncertainty_indices.flatten().tolist())} + indices = { + k: v + for k, v in indices.items() + if v in set(uncertainty_indices.flatten().tolist()) + } df_technosphere_indices = create_mapping_sheet(indices=indices) - df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False) - df_uncertainty_values.to_excel(writer, sheet_name="Monte Carlo values", index=False) - df_technology_shares.to_excel(writer, sheet_name="Technology shares", index=False) - df_technosphere_indices.to_excel(writer, sheet_name="Indices mapping", index=False) + df_sum_impacts.to_excel( + writer, sheet_name="Total impacts", index=False + ) + df_uncertainty_values.to_excel( + writer, sheet_name="Monte Carlo values", index=False + ) + df_technology_shares.to_excel( + writer, sheet_name="Technology shares", index=False + ) + df_technosphere_indices.to_excel( + writer, sheet_name="Indices mapping", index=False + ) df_GSA = run_GSA_delta( total_impacts=df_sum_impacts, uncertainty_values=df_uncertainty_values, - technology_shares=df_technology_shares + technology_shares=df_technology_shares, + ) + df_GSA.to_excel( + writer, sheet_name="Global Sensitivity Analysis", index=False ) - df_GSA.to_excel(writer, sheet_name="Global Sensitivity Analysis", index=False) print(f"Statistical analysis: {export_path.resolve()}") diff --git a/pathways/stats.py b/pathways/stats.py index 704026e..076dd46 100644 --- a/pathways/stats.py +++ b/pathways/stats.py @@ -98,7 +98,8 @@ def log_double_accounting( def log_subshares( - shares: dict, region: str, + shares: dict, + region: str, ) -> pd.DataFrame: """ Create a pandas DataFrame where the keys of shares are the columns @@ -113,9 +114,9 @@ def log_subshares( def log_uncertainty_values( - region: str, - uncertainty_indices: np.array, - uncertainty_values: np.array, + region: str, + uncertainty_indices: np.array, + uncertainty_values: np.array, ) -> pd.DataFrame: """ Create a pandas DataFrame with the region and uncertainty indices as columns, @@ -160,21 +161,21 @@ def log_results( return df[["iteration", "region"] + methods] -def create_mapping_sheet( - indices: dict -) -> pd.DataFrame: +def create_mapping_sheet(indices: dict) -> pd.DataFrame: """ Create a mapping sheet for the activities with uncertainties. """ # Converting the dictionary into a pandas DataFrame - df = pd.DataFrame(indices.items(), columns=['Index', 'Value']) + df = pd.DataFrame(indices.items(), columns=["Index", "Value"]) # Split the 'Index' column into four separate columns - df[['Name', 'Product', 'Unit', 'Region']] = pd.DataFrame(df['Index'].tolist(), index=df.index) + df[["Name", "Product", "Unit", "Region"]] = pd.DataFrame( + df["Index"].tolist(), index=df.index + ) # Drop the now unnecessary 'Index' column - df.drop(columns=['Index'], inplace=True) + df.drop(columns=["Index"], inplace=True) return df @@ -274,8 +275,12 @@ def run_GSA_delta( # merge uncertainty_values and technology_shares # based on "iteration" and "region" columns - df_parameters = uncertainty_values.merge(technology_shares, on=["iteration", "region"]) - parameters = [param for param in df_parameters.columns if param not in ["iteration", "region"]] + df_parameters = uncertainty_values.merge( + technology_shares, on=["iteration", "region"] + ) + parameters = [ + param for param in df_parameters.columns if param not in ["iteration", "region"] + ] problem = { "num_vars": len(parameters),