Skip to content

Commit

Permalink
Fixing data packaging
Browse files Browse the repository at this point in the history
  • Loading branch information
romainsacchi committed Dec 22, 2024
1 parent f0fdc7b commit 138a2ed
Showing 1 changed file with 42 additions and 40 deletions.
82 changes: 42 additions & 40 deletions premise/pathways.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,45 +234,44 @@ def add_variables_mapping(self):
if "configurations" in scenario:
configurations = scenario["configurations"]
for key, val in configurations.items():
for variables in val.get("production pathways", {}).values():
for variable in variables:
if variable not in mapping:
variable_name = variable["production volume"]["variable"]
mapping[variable] = {"scenario variable": variable_name}
filters = variable.get("ecoinvent alias")
mask = variable.get("ecoinvent alias").get("mask")

mapping[variable]["dataset"] = self.find_activities(
filters=filters,
database=scenario["database"],
mask=mask,
)

mapping[variable]["dataset"] = [
dict(t)
for t in {
tuple(sorted(d.items()))
for d in mapping[variable]["dataset"]
}
]

if len(mapping[variable]["dataset"]) == 0:
print(f"No dataset found for {variable} in {variable_name}")
print(f"Filters: {filters}")
print(f"Mask: {mask}")
continue

variables = list(
val["production pathways"].keys()
)
variables.remove(variable)
# remove datasets which names are in list of variables
# except for the current variable
mapping[variable]["dataset"] = [
d
for variable, variable_details in val.get("production pathways", {}).items():
if variable not in mapping:
variable_scenario_name = variable_details.get("production volume", {}).get("variable", 0)
mapping[variable] = {"scenario variable": variable_scenario_name}
filters = variable_details.get("ecoinvent alias")
mask = variable_details.get("ecoinvent alias").get("mask")

mapping[variable]["dataset"] = self.find_activities(
filters=filters,
database=scenario["database"],
mask=mask,
)

mapping[variable]["dataset"] = [
dict(t)
for t in {
tuple(sorted(d.items()))
for d in mapping[variable]["dataset"]
if not any(v in d["name"] for v in variables)
]
}
]

if len(mapping[variable]["dataset"]) == 0:
print(f"No dataset found for {variable} in {variable_scenario_name}")
print(f"Filters: {filters}")
print(f"Mask: {mask}")
continue

variables = list(
val["production pathways"].keys()
)
variables.remove(variable)
# remove datasets which names are in list of variables
# except for the current variable
mapping[variable]["dataset"] = [
d
for d in mapping[variable]["dataset"]
if not any(v in d["name"] for v in variables)
]

with open(Path.cwd() / "pathways" / "mapping" / "mapping.yaml", "w") as f:
yaml.dump(mapping, f)
Expand Down Expand Up @@ -306,7 +305,6 @@ def add_scenario_data(self):
# add a scenario dimension
data = data.expand_dims("scenario")
data.coords["scenario"] = [scenario_name]
self.scenario_names.append(scenario_name)

data_list.append(data)

Expand All @@ -323,8 +321,12 @@ def add_scenario_data(self):

# split the columns "scenarios" into "model" and "pathway"
df[["model", "pathway"]] = df["scenario"].str.split(" - ", n=1, expand=True)
# remove any spaces in the "pathway" column
#df["pathway"] = df["pathway"].str.replace(" ", "")
df = df.drop(columns=["scenario"])

self.scenario_names = df["pathway"].unique().tolist()

# remove rows with empty values under "value"
df = df.dropna(subset=["value"])

Expand All @@ -351,7 +353,7 @@ def build_datapackage(self, name: str, contributors: list = None):
f"Data package generated by premise {__version__}."
)
package.descriptor["premise version"] = str(__version__)
package.descriptor["scenarios"] = list(set(self.scenario_names))
package.descriptor["scenarios"] = self.scenario_names
package.descriptor["keywords"] = [
"ecoinvent",
"scenario",
Expand Down

0 comments on commit 138a2ed

Please sign in to comment.