diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 5e8d1e5..9a50314 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -68,7 +68,7 @@ "if survey_info.get('trip-labels', None) == 'ENKETO':\n", " ipython = get_ipython()\n", " ipython._showtraceback = scaffolding.no_traceback_handler\n", - " raise Exception(\"The plots in this notebook are not relecant for ENKETO trip-labels\")" + " raise Exception(\"The plots in this notebook are not relevant for ENKETO trip-labels\")" ] }, { @@ -121,7 +121,8 @@ " program,\n", " study_type,\n", " dynamic_labels,\n", - " include_test_users=include_test_users)" + " include_test_users=include_test_users,\n", + " add_footprint=True)" ] }, { @@ -166,7 +167,8 @@ " program,\n", " study_type,\n", " dynamic_labels,\n", - " include_test_users=include_test_users)" + " include_test_users=include_test_users,\n", + " add_footprint=True)" ] }, { @@ -244,7 +246,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", + " \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", @@ -297,7 +299,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", + " \"Inferred from prior labels\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -332,7 +334,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", + " \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -431,7 +433,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", + " \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", @@ -466,8 +468,8 @@ "try:\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n", - " inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n", + " labeled_land_trips_df = expanded_ct[expanded_ct['base_mode'] != \"AIR\"] if \"base_mode\" in expanded_ct.columns else None\n", + " inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['base_mode'] != \"AIR\"] if \"base_mode\" in expanded_ct_inferred.columns else None\n", " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", @@ -478,7 +480,7 @@ " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", + " \"Inferred from prior labels\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n", " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index a165630..6e50b2a 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -264,7 +264,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n", + " f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -301,7 +301,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled and Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n", + " \"Inferred from prior labels\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n", " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -338,7 +338,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n", + " f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 3e46eba..d79e8c8 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -5,6 +5,7 @@ from collections import defaultdict from collections import OrderedDict import difflib +import logging import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.timeseries.tcquery as esttc @@ -59,6 +60,7 @@ async def add_base_mode_footprint(trip_list): labels = await emcu.read_json_resource("label-options.default.json") value_to_basemode = {mode["value"]: mode.get("base_mode", mode.get("baseMode", "UNKNOWN")) for mode in labels["MODE"]} + counter_trip_error = 0 for trip in trip_list: #format so emffc can get id for metadata trip['data']['_id'] = trip['_id'] @@ -75,13 +77,14 @@ async def add_base_mode_footprint(trip_list): trip['data']['replaced_base_mode'] = "UNKNOWN" trip['data']['replaced_mode_footprint'] = {} - except: - print("hit exception") + except Exception as e: + counter_trip_error = counter_trip_error + 1 + logging.exception(f"Exception in add_base_mode_footprint for trip - {trip['data']['_id']}") trip['data']['base_mode'] = "UNKNOWN" trip['data']['replaced_base_mode'] = "UNKNOWN" trip['data']['mode_confirm_footprint'] = {} trip['data']['replaced_mode_footprint'] = {} - + logging.debug(f"There are {counter_trip_error} trip errors") return trip_list async def load_all_confirmed_trips(tq, add_footprint): @@ -254,7 +257,7 @@ async def map_trip_data(expanded_trip_df, study_type, dynamic_labels): return expanded_trip_df -async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False): +async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False, add_footprint=False): """ Inputs: year/month/program/study_type = parameters from the visualization notebook dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose @@ -263,7 +266,7 @@ async def load_viz_notebook_inferred_data(year, month, program, study_type, dyna """ # Access database tq = get_time_query(year, month) - participant_ct_df = await load_all_participant_trips(program, tq, include_test_users) + participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, add_footprint) inferred_ct = filter_inferred_trips(participant_ct_df) expanded_it = expand_inferredlabels(inferred_ct) expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels) @@ -501,29 +504,21 @@ def unit_conversions(df): df['distance_miles']= df["distance"]*0.00062 #meters to miles df['distance_kms'] = df["distance"] / 1000 #meters to kms -def extract_kwh(footprint_dict): - if 'kwh' in footprint_dict.keys(): - return footprint_dict['kwh'] - else: - print("missing kwh", footprint_dict) - return np.nan - -def extract_co2(footprint_dict): - if 'kg_co2' in footprint_dict.keys(): - return footprint_dict['kg_co2'] +def extract_footprint(footprint_dict, footprint_key): + if footprint_key in footprint_dict.keys(): + return footprint_dict[footprint_key] else: - print("missing co2", footprint_dict) return np.nan def unpack_energy_emissions(expanded_ct): - expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_co2) + expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kg_co2') expanded_ct['Mode_confirm_lb_CO2'] = kg_to_lb(expanded_ct['Mode_confirm_kg_CO2']) - expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_co2) + expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kg_co2') expanded_ct['Replaced_mode_lb_CO2'] = kg_to_lb(expanded_ct['Replaced_mode_kg_CO2']) CO2_impact(expanded_ct) - expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_kwh) - expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_kwh) + expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kwh') + expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kwh') energy_impact(expanded_ct) return expanded_ct