Skip to content

Commit

Permalink
run_gsa() as a standalone function
Browse files Browse the repository at this point in the history
  • Loading branch information
romainsacchi committed Jul 31, 2024
1 parent 081e742 commit 2f78b8a
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 158 deletions.
13 changes: 8 additions & 5 deletions dev/timing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathways import Pathways
from pathways import Pathways, run_gsa

p = Pathways(
datapackage="remind-SSP2-PkBudg1150-stem-SPS1.zip",
geography_mapping="geo_mapping_remind.yaml",
activities_mapping="act_categories_agg.yaml",
#activities_mapping="act_categories_agg.yaml",
)

vars = [v for v in p.scenarios.coords["variables"].values if v.startswith("FE")]
Expand All @@ -17,11 +17,14 @@
scenarios=p.scenarios.pathway.values.tolist(),
years=[2020, 2030, 2040, 2050],
variables=vars,
use_distributions=100,
use_distributions=10,
subshares=True,
multiprocessing=True,
)

p.export_results()

p.run_gsa()
print(p.lca_results.coords)
print(p.lca_results.shape)
print(p.lca_results.sum())

run_gsa()
3 changes: 2 additions & 1 deletion pathways/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__version__ = (0, 0, 1)
__all__ = ("__version__", "Pathways")
__all__ = ("__version__", "Pathways", "run_gsa")


from .pathways import Pathways
from .stats import run_gsa
163 changes: 11 additions & 152 deletions pathways/pathways.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,11 @@
import yaml

from .data_validation import validate_datapackage
from .filesystem_constants import DATA_DIR, DIR_CACHED_DB, STATS_DIR, USER_LOGS_DIR
from .filesystem_constants import DATA_DIR, USER_LOGS_DIR
from .lca import _calculate_year, get_lca_matrices
from .lcia import get_lcia_method_names
from .stats import (
create_mapping_sheet,
log_results,
log_subshares,
log_uncertainty_values,
run_GSA_delta,
log_mc_parameters_to_excel,
)
from .subshares import generate_samples
from .utils import (
Expand Down Expand Up @@ -122,96 +118,6 @@ def _load_array(filepath):
return np.stack(results, axis=2)


def log_mc_parameters_to_excel(
model: str,
scenario: str,
year: int,
methods: list,
result: dict,
uncertainty_parameters: dict,
uncertainty_values: dict,
tehnosphere_indices: dict,
iteration_results: dict,
shares: dict = None,
):
export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx"

# create Excel workbook using openpyxl
with pd.ExcelWriter(export_path, engine="openpyxl") as writer:

df_sum_impacts = pd.DataFrame()
df_uncertainty_values = pd.DataFrame()
df_technology_shares = pd.DataFrame()
writer.book.create_sheet("Indices mapping")
writer.book.create_sheet("Monte Carlo values")
writer.book.create_sheet("Technology shares")
writer.book.create_sheet("Total impacts")

for region, data in result.items():

total_impacts = np.sum(iteration_results[region], axis=(0, 2, 3))

df_sum_impacts = pd.concat(
[
df_sum_impacts,
log_results(
total_impacts=total_impacts,
methods=methods,
region=region,
),
]
)

uncertainty_indices = uncertainty_parameters[region]
uncertainty_vals = uncertainty_values[region]

df_uncertainty_values = pd.concat(
[
df_uncertainty_values,
log_uncertainty_values(
region=region,
uncertainty_indices=uncertainty_indices,
uncertainty_values=uncertainty_vals,
),
],
)

if shares:
sub_shares = {}
for k, v in shares.items():
for x, y in v.items():
if x == year:
for z, w in y.items():
sub_shares[f"{k} - {z}"] = w

df_technology_shares = pd.concat(
[
df_technology_shares,
log_subshares(
shares=sub_shares,
region=region,
),
],
)

indices = tehnosphere_indices[region]

df_technosphere_indices = create_mapping_sheet(indices=indices)

df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False)
df_uncertainty_values.to_excel(
writer, sheet_name="Monte Carlo values", index=False
)
df_technology_shares.to_excel(
writer, sheet_name="Technology shares", index=False
)
df_technosphere_indices.to_excel(
writer, sheet_name="Indices mapping", index=False
)

print(f"Monte Carlo parameters added to: {export_path.resolve()}")


class Pathways:
"""The Pathways class reads in a datapackage that contains scenario data,
mapping between scenario variables and LCA datasets, and LCA matrices.
Expand Down Expand Up @@ -259,8 +165,15 @@ def __init__(
# a mapping of geographies can be added
# to aggregate locations to a higher level
# e.g. from countries to regions
self.geography_mapping = load_mapping(geography_mapping) or None
self.activities_mapping = load_mapping(activities_mapping) or None
if geography_mapping:
self.geography_mapping = load_mapping(geography_mapping)
else:
self.geography_mapping = None

if activities_mapping:
self.activities_mapping = load_mapping(activities_mapping)
else:
self.activities_mapping = None

clean_cache_directory()

Expand Down Expand Up @@ -631,57 +544,3 @@ def export_results(self, filename: str = None) -> str:
:return: None
"""
return export_results_to_parquet(self.lca_results, filename)

def run_gsa(self, method: str = "delta") -> None:
"""
Run a global sensitivity analysis (GSA) on the LCA results.
Updates Excel files with the GSA results.
:param method: str. The method used for the GSA. Default is 'delta'. Only 'delta' is supported at the moment.
:return: None.
"""
if method != "delta":
raise ValueError(f"Method {method} is not supported.")

for model in self.lca_results.coords["model"].values:
for scenario in self.lca_results.coords["scenario"].values:
for year in self.lca_results.coords["year"].values:
export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx"

# load content of "Monte Carlo values" sheet into a pandas DataFrame
df_mc_vals = pd.read_excel(
export_path, sheet_name="Monte Carlo values"
)

# load content of "Technology shares" sheet into a pandas DataFrame
# if it exists

try:
df_technology_shares = pd.read_excel(
export_path,
sheet_name="Technology shares",
)
except:
df_technology_shares = None

# load content of "Total impacts" sheet into a pandas DataFrame

df_sum_impacts = pd.read_excel(
export_path, sheet_name="Total impacts"
)

# open Excel workbook
with pd.ExcelWriter(
export_path, engine="openpyxl", mode="a"
) as writer:

df_GSA_results = run_GSA_delta(
total_impacts=df_sum_impacts,
uncertainty_values=df_mc_vals,
technology_shares=df_technology_shares,
)

df_GSA_results.to_excel(
writer, sheet_name=f"GSA {method.capitalize()}", index=False
)

print(f"GSA results added to: {export_path.resolve()}")
145 changes: 145 additions & 0 deletions pathways/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from openpyxl import Workbook, load_workbook
from SALib.analyze import delta

from pathways.filesystem_constants import STATS_DIR


def log_double_accounting(
filtered_names: Dict[Tuple[str, ...], Set[str]],
Expand Down Expand Up @@ -323,3 +325,146 @@ def run_GSA_delta(
results,
columns=["LCIA method", "Parameter", "Delta", "Delta Conf", "S1", "S1 Conf"],
)


def log_mc_parameters_to_excel(
model: str,
scenario: str,
year: int,
methods: list,
result: dict,
uncertainty_parameters: dict,
uncertainty_values: dict,
tehnosphere_indices: dict,
iteration_results: dict,
shares: dict = None,
):
export_path = STATS_DIR / f"{model}_{scenario}_{year}.xlsx"

# create Excel workbook using openpyxl
with pd.ExcelWriter(export_path, engine="openpyxl") as writer:

df_sum_impacts = pd.DataFrame()
df_uncertainty_values = pd.DataFrame()
df_technology_shares = pd.DataFrame()
writer.book.create_sheet("Indices mapping")
writer.book.create_sheet("Monte Carlo values")
writer.book.create_sheet("Technology shares")
writer.book.create_sheet("Total impacts")

for region, data in result.items():

total_impacts = np.sum(iteration_results[region], axis=(0, 2, 3))

df_sum_impacts = pd.concat(
[
df_sum_impacts,
log_results(
total_impacts=total_impacts,
methods=methods,
region=region,
),
]
)

uncertainty_indices = uncertainty_parameters[region]
uncertainty_vals = uncertainty_values[region]

df_uncertainty_values = pd.concat(
[
df_uncertainty_values,
log_uncertainty_values(
region=region,
uncertainty_indices=uncertainty_indices,
uncertainty_values=uncertainty_vals,
),
],
)

if shares:
sub_shares = {}
for k, v in shares.items():
for x, y in v.items():
if x == year:
for z, w in y.items():
sub_shares[f"{k} - {z}"] = w

df_technology_shares = pd.concat(
[
df_technology_shares,
log_subshares(
shares=sub_shares,
region=region,
),
],
)

indices = tehnosphere_indices[region]

df_technosphere_indices = create_mapping_sheet(indices=indices)

df_sum_impacts.to_excel(writer, sheet_name="Total impacts", index=False)
df_uncertainty_values.to_excel(
writer, sheet_name="Monte Carlo values", index=False
)
df_technology_shares.to_excel(
writer, sheet_name="Technology shares", index=False
)
df_technosphere_indices.to_excel(
writer, sheet_name="Indices mapping", index=False
)

print(f"Monte Carlo parameters added to: {export_path.resolve()}")


def run_gsa(directory: [str, None] = STATS_DIR, method: str = "delta") -> None:
"""
Run a global sensitivity analysis (GSA) on the LCA results.
Updates Excel files with the GSA results.
:param method: str. The method used for the GSA. Default is 'delta'. Only 'delta' is supported at the moment.
:param directory: str. The directory where the Excel files are stored. Default is 'stats'.
:return: None.
"""
if method != "delta":
raise ValueError(f"Method {method} is not supported.")

# iterate through the Excel files in the directory

for file in Path(directory).rglob("*.xlsx"):
# load content of "Monte Carlo values" sheet into a pandas DataFrame
df_mc_vals = pd.read_excel(
file, sheet_name="Monte Carlo values"
)

# load content of "Technology shares" sheet into a pandas DataFrame
# if it exists

try:
df_technology_shares = pd.read_excel(
file,
sheet_name="Technology shares",
)
except:
df_technology_shares = None

# load content of "Total impacts" sheet into a pandas DataFrame
df_sum_impacts = pd.read_excel(
file, sheet_name="Total impacts"
)

# open Excel workbook
with pd.ExcelWriter(
file, engine="openpyxl", mode="a"
) as writer:

df_GSA_results = run_GSA_delta(
total_impacts=df_sum_impacts,
uncertainty_values=df_mc_vals,
technology_shares=df_technology_shares,
)

df_GSA_results.to_excel(
writer, sheet_name=f"GSA {method.capitalize()}", index=False
)

print(f"GSA results added to: {file.resolve()}")

0 comments on commit 2f78b8a

Please sign in to comment.