Merge branch 'main' of https://github.com/cafriedb/premise_validation

polca · Aug 7, 2024 · a50ad27 · a50ad27
2 parents 3c75401 + 4b15b3f
commit a50ad27
Show file tree

Hide file tree

Showing 7 changed files with 430 additions and 276 deletions.
diff --git a/dopo/__init__.py b/dopo/__init__.py
@@ -5,14 +5,12 @@
     "__version__",
     'generate_sets_from_filters',
     'MethodFinder',
-    'find_and_create_method',
-    'get_all_methods',
     'compare_activities_multiple_methods',
-    'lvl1_plot',
-    'lvl21_plot_stacked_absolute', 
-    'lvl22_plot_input_comparison_with_method', 
-    'lvl23_plot_input_comparison_plot_no_method', 
-    'lvl3_plot_relative_changes' 
+    'scores_across_activities',
+    'inputs_contributions',
+    'inputs_contribution',
+    'input_contribution_across_activities',
+    'activities_across_databases'
 )
 
 __version__ = "0.0.1"
@@ -22,9 +20,9 @@
 from dopo.sector_score_dict import compare_activities_multiple_methods
 from dopo.sector_score_dict import small_inputs_to_other_column
 from dopo.plots import (
-    lvl1_plot, 
-    lvl21_plot_stacked_absolute, 
-    lvl22_plot_input_comparison_with_method, 
-    lvl23_plot_input_comparison_plot_no_method, 
-    lvl3_plot_relative_changes 
+    scores_across_activities,
+    inputs_contributions,
+    inputs_contribution,
+    input_contribution_across_activities,
+    activities_across_databases
 )
diff --git a/dopo/activity_filter.py b/dopo/activity_filter.py
@@ -6,10 +6,11 @@
 # Sector filter functions from premise
 # ---------------------------------------------------
 
+
 def _act_fltr(
     database: list,
-    fltr = None,
-    mask  = None,
+    fltr=None,
+    mask=None,
 ):
     """Filter `database` for activities_list matching field contents given by `fltr` excluding strings in `mask`.
     `fltr`: string, list of strings or dictionary.
@@ -49,25 +50,22 @@ def _act_fltr(
         if isinstance(value, list):
             for val in value:
                 filters = [a for a in filters if val in a[field]]
-                
-            #filters.extend([ws.either(*[ws.contains(field, v) for v in value])])
+
+            # filters.extend([ws.either(*[ws.contains(field, v) for v in value])])
         else:
-            filters = [
-                a for a in filters if value in a[field]
-            ]
+            filters = [a for a in filters if value in a[field]]
 
-            #filters.append(ws.contains(field, value))
-
+            # filters.append(ws.contains(field, value))
 
     if mask:
         for field, value in mask.items():
             if isinstance(value, list):
                 for val in value:
                     filters = [f for f in filters if val not in f[field]]
-                #filters.extend([ws.exclude(ws.contains(field, v)) for v in value])
+                # filters.extend([ws.exclude(ws.contains(field, v)) for v in value])
             else:
                 filters = [f for f in filters if value not in f[field]]
-                #filters.append(ws.exclude(ws.contains(field, value)))
+                # filters.append(ws.exclude(ws.contains(field, value)))
 
     return filters
 
@@ -84,7 +82,7 @@ def generate_sets_from_filters(yaml_filepath, database=None) -> dict:
     :rtype: dict
     """
 
-    filtr=_get_mapping(yaml_filepath, var='ecoinvent_aliases')
+    filtr = _get_mapping(yaml_filepath, var="ecoinvent_aliases")
 
     names = []
 
@@ -98,35 +96,26 @@ def generate_sets_from_filters(yaml_filepath, database=None) -> dict:
             else:
                 names.append(entry["fltr"])
 
-    #subset = list(
+    # subset = list(
     #    ws.get_many(
     #        database,
     #        ws.either(*[ws.contains("name", name) for name in names]),
     #    )
-    #)
-
-    subset=[
-        a for a in database if any(
-
-
-            x in a["name"] for x in names
-        )
-    ]
+    # )
 
+    subset = [a for a in database if any(x in a["name"] for x in names)]
 
     techs = {
         tech: _act_fltr(subset, fltr.get("fltr"), fltr.get("mask"))
         for tech, fltr in filtr.items()
     }
 
-    mapping = {
-        tech: {act for act in actlst} for tech, actlst in techs.items()
-    }
-
+    mapping = {tech: {act for act in actlst} for tech, actlst in techs.items()}
 
     return mapping
 
-def _get_mapping(filepath, var): 
+
+def _get_mapping(filepath, var):
     """
     Loa a YAML file and return a dictionary given a variable.
     :param filepath: YAML file path
@@ -142,10 +131,9 @@ def _get_mapping(filepath, var):
     for key, val in techs.items():
         if var in val:
             mapping[key] = val[var]
-
-    return mapping
 
+    return mapping
 
-# Example on how to call the functions to create a set of filtered activities_list 
-#set_from_fltrs = generate_sets_from_filters(yaml_filepath, database=ei39SSP)
 
+# Example on how to call the functions to create a set of filtered activities_list
+# set_from_fltrs = generate_sets_from_filters(yaml_filepath, database=ei39SSP)
diff --git a/dopo/cpc_inputs.py b/dopo/cpc_inputs.py
@@ -1,104 +1,120 @@
-
 # Imports
 # --------
 
-#brightway
+# brightway
 import brightway2 as bw
 import bw2analyzer as ba
 
-#common
+# common
 import pandas as pd
 import numpy as np
 
-#to be completed
+# to be completed
 import ast
 
-# Function to generate dataframes containing inputs in cpc format not characterized from an activity list 
+# Function to generate dataframes containing inputs in cpc format not characterized from an activity list
 # Level 2.3 plot dependency
 # ------------------------------------------------------------------------------------------------------------------------------------
 
-def _get_cpc_inputs_of_activities(activities_list, input_type='list'):
 
-    '''
+def _get_cpc_inputs_of_activities(activities_list, input_type="list"):
+    """
     for param description see function lvl23_plot_input_comparison_plot_no_method
 
     NOTE: could adapt this function to get the outputs, or create another one. At the moment only inputs are considered.
-    
-    '''
+
+    """
 
     def _activity_list_inputs_cpc(activities_list, input_type):
         all_inputs = []
-        
-        if input_type == 'list':
+
+        if input_type == "list":
             activity_iterator = activities_list
-        elif input_type == 'dict':
+        elif input_type == "dict":
             activity_iterator = activities_list.values()
         else:
             raise ValueError("input_type must be either 'list' or 'dict'")
-        
+
         for activity in activity_iterator:
-            inputs_keys = pd.Series({bw.get_activity(exc.input).key: exc.amount for exc in activity.technosphere()},
-                                    name=activity['name'] + ', ' + activity['location'])
-
+            inputs_keys = pd.Series(
+                {
+                    bw.get_activity(exc.input).key: exc.amount
+                    for exc in activity.technosphere()
+                },
+                name=activity["name"] + ", " + activity["location"],
+            )
+
             # Adjust the way the key is presented
             inputs_keys = inputs_keys.reset_index()
-            inputs_keys['full_key'] = inputs_keys.apply(lambda row: f"('{row['level_0']}', '{row['level_1']}')", axis=1)
-            inputs_keys = inputs_keys.drop(['level_0', 'level_1'], axis=1).set_index('full_key')
-
+            inputs_keys["full_key"] = inputs_keys.apply(
+                lambda row: f"('{row['level_0']}', '{row['level_1']}')", axis=1
+            )
+            inputs_keys = inputs_keys.drop(["level_0", "level_1"], axis=1).set_index(
+                "full_key"
+            )
+
             # Add empty cpc column and activity information
-            inputs_keys.insert(0, 'identifier', activity['name'] + ', ' + activity['location'])
-            inputs_keys.insert(1, 'activity', activity['name'])
-            inputs_keys.insert(2, 'location', activity['location'])
-            inputs_keys.insert(3, 'unit', activity['unit'])
-            inputs_keys.insert(4, 'cpc', None)
-
+            inputs_keys.insert(
+                0, "identifier", activity["name"] + ", " + activity["location"]
+            )
+            inputs_keys.insert(1, "activity", activity["name"])
+            inputs_keys.insert(2, "location", activity["location"])
+            inputs_keys.insert(3, "unit", activity["unit"])
+            inputs_keys.insert(4, "cpc", None)
+
             all_inputs.append(inputs_keys)
-        
+
         # Combine all inputs into a single DataFrame
         combined_inputs = pd.concat(all_inputs, axis=0)
-        
+
         return combined_inputs
 
     def _update_cpc_information(combined_inputs):
         for index, row in combined_inputs.iterrows():
             # Transform each key to tuple
             tuple_key = ast.literal_eval(index)
-            
+
             # Get input activity for the key
             input_activity = bw.get_activity(tuple_key)
-            
+
             # Get cpc name for activity
             cpc_name = ba.comparisons.get_cpc(input_activity)
-            
+
             # Store cpc_name in the 'cpc' column of the combined_inputs dataframe
-            combined_inputs.at[index, 'cpc'] = cpc_name
-        
+            combined_inputs.at[index, "cpc"] = cpc_name
+
         return combined_inputs
 
     def _transform_dataframe(combined_inputs):
         # Set 'identifier' as the new index and drop the 'full_key' index
-        combined_inputs = combined_inputs.reset_index().set_index('identifier').drop('full_key', axis=1)
-
+        combined_inputs = (
+            combined_inputs.reset_index()
+            .set_index("identifier")
+            .drop("full_key", axis=1)
+        )
+
         # Determine the index of the 'unit' column
-        unit_index = combined_inputs.columns.get_loc('unit')
-        
+        unit_index = combined_inputs.columns.get_loc("unit")
+
         # Split the dataframe into two parts
-        combined_inputs_identifier = combined_inputs.iloc[:, :unit_index+1]
-        combined_inputs_cpc = combined_inputs.iloc[:, unit_index+1:]
-        #set index of to 'cpc' in combined_input_cpc
-        combined_inputs_cpc = combined_inputs_cpc.set_index('cpc')
-        
+        combined_inputs_identifier = combined_inputs.iloc[:, : unit_index + 1]
+        combined_inputs_cpc = combined_inputs.iloc[:, unit_index + 1 :]
+        # set index of to 'cpc' in combined_input_cpc
+        combined_inputs_cpc = combined_inputs_cpc.set_index("cpc")
+
         # Combine rows with the same index value in combined_inputs_cpc
-        combined_inputs_cpc = combined_inputs_cpc.groupby(level=0).agg(lambda x: np.sum(x) if x.dtype.kind in 'biufc' else x.iloc[0])
+        combined_inputs_cpc = combined_inputs_cpc.groupby(level=0).agg(
+            lambda x: np.sum(x) if x.dtype.kind in "biufc" else x.iloc[0]
+        )
         # Transpose combined_inputs_cpc
         combined_inputs_cpc_trans = combined_inputs_cpc.T
-        
+
         # Merge combined_inputs_identifier and combined_inputs_cpc_trans
         result = combined_inputs_identifier.join(combined_inputs_cpc_trans)
         result = result.drop_duplicates()
 
         # Sort dataframe by activity and location aplphabetically and reset the index
-        result = result.sort_values(by=['activity', 'location'])
+        result = result.sort_values(by=["activity", "location"])
         result = result.reset_index(drop=True)
         return result