Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor footprint changes #167

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
22 changes: 12 additions & 10 deletions viz_scripts/generic_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"if survey_info.get('trip-labels', None) == 'ENKETO':\n",
" ipython = get_ipython()\n",
" ipython._showtraceback = scaffolding.no_traceback_handler\n",
" raise Exception(\"The plots in this notebook are not relecant for ENKETO trip-labels\")"
" raise Exception(\"The plots in this notebook are not relevant for ENKETO trip-labels\")"
]
},
{
Expand Down Expand Up @@ -121,7 +121,8 @@
" program,\n",
" study_type,\n",
" dynamic_labels,\n",
" include_test_users=include_test_users)"
" include_test_users=include_test_users,\n",
" add_footprint=True)"
]
},
{
Expand Down Expand Up @@ -166,7 +167,8 @@
" program,\n",
" study_type,\n",
" dynamic_labels,\n",
" include_test_users=include_test_users)"
" include_test_users=include_test_users,\n",
" add_footprint=True)"
]
},
{
Expand Down Expand Up @@ -244,7 +246,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good text change. More concise & clearer

" plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
Expand Down Expand Up @@ -297,7 +299,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -332,7 +334,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -431,7 +433,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
Expand Down Expand Up @@ -466,8 +468,8 @@
"try:\n",
" ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
" ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
" labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
" inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
" labeled_land_trips_df = expanded_ct[expanded_ct['base_mode'] != \"AIR\"] if \"base_mode\" in expanded_ct.columns else None\n",
" inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['base_mode'] != \"AIR\"] if \"base_mode\" in expanded_ct_inferred.columns else None\n",
" sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
" \n",
" sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
Expand All @@ -478,7 +480,7 @@
" plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
Expand Down
6 changes: 3 additions & 3 deletions viz_scripts/mode_specific_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
" plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
" f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
" plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
" f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
" f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
" plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
Expand Down Expand Up @@ -301,7 +301,7 @@
" plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n",
" plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" \"Inferred from prior labels\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" plot_title = plot_title_no_quality\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
Expand Down Expand Up @@ -338,7 +338,7 @@
" plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n",
" plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
Expand Down
35 changes: 15 additions & 20 deletions viz_scripts/scaffolding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections import defaultdict
from collections import OrderedDict
import difflib
import logging

import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.tcquery as esttc
Expand Down Expand Up @@ -59,6 +60,7 @@ async def add_base_mode_footprint(trip_list):
labels = await emcu.read_json_resource("label-options.default.json")
value_to_basemode = {mode["value"]: mode.get("base_mode", mode.get("baseMode", "UNKNOWN")) for mode in labels["MODE"]}

counter_trip_error = 0
for trip in trip_list:
#format so emffc can get id for metadata
trip['data']['_id'] = trip['_id']
Expand All @@ -75,13 +77,14 @@ async def add_base_mode_footprint(trip_list):
trip['data']['replaced_base_mode'] = "UNKNOWN"
trip['data']['replaced_mode_footprint'] = {}

except:
print("hit exception")
except Exception as e:
counter_trip_error = counter_trip_error + 1
logging.exception(f"Exception in add_base_mode_footprint for trip - {trip['data']['_id']}")
trip['data']['base_mode'] = "UNKNOWN"
trip['data']['replaced_base_mode'] = "UNKNOWN"
trip['data']['mode_confirm_footprint'] = {}
trip['data']['replaced_mode_footprint'] = {}
logging.debug(f"There are {counter_trip_error} trip errors")
return trip_list

async def load_all_confirmed_trips(tq, add_footprint):
Expand Down Expand Up @@ -254,7 +257,7 @@ async def map_trip_data(expanded_trip_df, study_type, dynamic_labels):

return expanded_trip_df

async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False):
async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False, add_footprint=False):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding, is this cleanup from something that was meant to be added in a previous PR?

I saw that load_viz_notebook_data already had the add_footprint parameter, but load_viz_notebook_inferred_data was missing it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this is not a cleanup which was meant to be added in the previous PR. We were using add_footprint parameter to compute footprint calculations in labeled trips in energy_calculations notebook. Earlier, we were not using add_footprint parameter for load_viz_notebook_inferred_data.

We need to pass add_footprint parameter as True to enable extraction of base_mode. Therefore, this change is required, such that we have base_mode as column added in the expanded_ct_inferred data frame. This enables us for filtering AIR as the base_mode, instead of using mode_confirm_w_other = 'air'.

""" Inputs:
year/month/program/study_type = parameters from the visualization notebook
dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose
Expand All @@ -263,7 +266,7 @@ async def load_viz_notebook_inferred_data(year, month, program, study_type, dyna
"""
# Access database
tq = get_time_query(year, month)
participant_ct_df = await load_all_participant_trips(program, tq, include_test_users)
participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, add_footprint)
inferred_ct = filter_inferred_trips(participant_ct_df)
expanded_it = expand_inferredlabels(inferred_ct)
expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels)
Expand Down Expand Up @@ -501,29 +504,21 @@ def unit_conversions(df):
df['distance_miles']= df["distance"]*0.00062 #meters to miles
df['distance_kms'] = df["distance"] / 1000 #meters to kms

def extract_kwh(footprint_dict):
if 'kwh' in footprint_dict.keys():
return footprint_dict['kwh']
else:
print("missing kwh", footprint_dict)
return np.nan

def extract_co2(footprint_dict):
if 'kg_co2' in footprint_dict.keys():
return footprint_dict['kg_co2']
def extract_footprint(footprint_dict, footprint_key):
if footprint_key in footprint_dict.keys():
return footprint_dict[footprint_key]
Comment on lines +507 to +509
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good refactor

else:
print("missing co2", footprint_dict)
return np.nan

def unpack_energy_emissions(expanded_ct):
expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_co2)
expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kg_co2')
expanded_ct['Mode_confirm_lb_CO2'] = kg_to_lb(expanded_ct['Mode_confirm_kg_CO2'])
expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_co2)
expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kg_co2')
expanded_ct['Replaced_mode_lb_CO2'] = kg_to_lb(expanded_ct['Replaced_mode_kg_CO2'])
CO2_impact(expanded_ct)

expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_kwh)
expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_kwh)
expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kwh')
expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kwh')
energy_impact(expanded_ct)

return expanded_ct
Expand Down