From 61ab4b9e0e2b869916087d9f289186c4f002eda4 Mon Sep 17 00:00:00 2001 From: $aTyam Date: Fri, 11 Aug 2023 04:20:29 -0400 Subject: [PATCH 1/2] Update greedy_similarity_binning.py Added a new list type variable to store labels at one place in order of trips processed. --- .../analysis/modelling/trip_model/greedy_similarity_binning.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py index d750a451e..352ebc20f 100644 --- a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py +++ b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py @@ -121,6 +121,7 @@ class label to apply: self.is_incremental = config['incremental_evaluation'] self.bins: Dict[str, Dict] = {} + self.tripLabels=[] def fit(self, trips: List[ecwc.Confirmedtrip]): @@ -184,9 +185,11 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]): logging.debug(f"adding trip to bin {bin_id} with features {trip_features}") self.bins[bin_id]['feature_rows'].append(trip_features) self.bins[bin_id]['labels'].append(trip_labels) + self.tripLabels.append(bin_id) else: # create new bin new_bin_id = str(len(self.bins)) + self.tripLabels.append(new_bin_id) new_bin_record = { 'feature_rows': [trip_features], 'labels': [trip_labels], From 2b5b06a058baf50aaebfe808f1ccff8e03d635b9 Mon Sep 17 00:00:00 2001 From: $aTyam Date: Tue, 15 Aug 2023 20:19:54 -0400 Subject: [PATCH 2/2] Updated builtin_timeseries.py These changes were done to return `entry` type data ( alongside dataframe) to clustering_example.ipynb. --- emission/storage/timeseries/builtin_timeseries.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/emission/storage/timeseries/builtin_timeseries.py b/emission/storage/timeseries/builtin_timeseries.py index 64898dcef..64e4d189c 100644 --- a/emission/storage/timeseries/builtin_timeseries.py +++ b/emission/storage/timeseries/builtin_timeseries.py @@ -23,6 +23,9 @@ INVALID_QUERY = {'metadata.key': 'invalid'} class BuiltinTimeSeries(esta.TimeSeries): + + entryList=[] + def __init__(self, user_id): super(BuiltinTimeSeries, self).__init__(user_id) self.key_query = lambda key: {"metadata.key": key} @@ -261,6 +264,9 @@ def get_entry_at_ts(self, key, ts_key, ts): logging.debug("get_entry_at_ts result = %s" % retValue) return retValue + def getEntryList(self): + return self.entryList + def get_data_df(self, key, time_query = None, geo_query = None, extra_query_list=None, map_fn = None): @@ -290,7 +296,11 @@ def to_data_df(key, entry_it, map_fn = None): if map_fn is None: map_fn = BuiltinTimeSeries._to_df_entry # Dataframe doesn't like to work off an iterator - it wants everything in memory - df = pd.DataFrame([map_fn(e) for e in entry_it]) + + for e in entry_it: + BuiltinTimeSeries.entryList.append(map_fn(e)) + df = pd.DataFrame(BuiltinTimeSeries.entryList) + logging.debug("Found %s results" % len(df)) if len(df) > 0: dedup_check_list = [item for item in ecwe.Entry.get_dedup_list(key)