diff --git a/TRB_label_assist/SVM_decision_boundaries.ipynb b/TRB_label_assist/SVM_decision_boundaries.ipynb
index 5ed5376..407aee3 100644
--- a/TRB_label_assist/SVM_decision_boundaries.ipynb
+++ b/TRB_label_assist/SVM_decision_boundaries.ipynb
@@ -30,6 +30,7 @@
                 "import emission.storage.timeseries.abstract_timeseries as esta\n",
                 "import emission.storage.decorations.trip_queries as esdtq\n",
                 "import emission.core.get_database as edb\n",
+                "import emission.analysis.modelling.trip_model.run_model as eamtr\n",
                 "\n",
                 "import data_wrangling\n",
                 "from clustering import add_loc_clusters"
@@ -60,10 +61,12 @@
                 "uuids = [suburban_uuid, college_campus_uuid]\n",
                 "confirmed_trip_df_map = {}\n",
                 "labeled_trip_df_map = {}\n",
+                "ct_entry={}\n",
                 "expanded_trip_df_map = {}\n",
                 "for u in uuids:\n",
                 "    ts = esta.TimeSeries.get_time_series(u)\n",
-                "    ct_df = ts.get_data_df(\"analysis/confirmed_trip\")\n",
+                "    ct_entry[u]=eamtr._get_training_data(u,None)\n",
+                "    ct_df = ts.to_data_df(\"analysis/confirmed_trip\",ct_entry[u])\n",
                 "    confirmed_trip_df_map[u] = ct_df\n",
                 "    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)\n",
                 "    expanded_trip_df_map[u] = esdtq.expand_userinputs(labeled_trip_df_map[u])"
@@ -110,6 +113,8 @@
                 "    df_for_cluster = all_trips_df if cluster_unlabeled else labeled_trips_df\n",
                 "\n",
                 "    df_for_cluster = add_loc_clusters(df_for_cluster,\n",
+                "                                      ct_entry,\n",
+                "                                      clustering_way='destination',\n",
                 "                                      radii=radii,\n",
                 "                                      alg=alg,\n",
                 "                                      loc_type=loc_type,\n",
diff --git a/TRB_label_assist/classification_performance.ipynb b/TRB_label_assist/classification_performance.ipynb
index 60d1300..6e61d79 100644
--- a/TRB_label_assist/classification_performance.ipynb
+++ b/TRB_label_assist/classification_performance.ipynb
@@ -19,7 +19,6 @@
                 "import pandas as pd\n",
                 "import numpy as np\n",
                 "from uuid import UUID\n",
-                "\n",
                 "import matplotlib.pyplot as plt\n",
                 "\n",
                 "# import logging\n",
@@ -27,7 +26,7 @@
                 "\n",
                 "import emission.storage.timeseries.abstract_timeseries as esta\n",
                 "import emission.storage.decorations.trip_queries as esdtq\n",
-                "\n",
+                "import emission.analysis.modelling.trip_model.run_model as eamtr\n",
                 "from performance_eval import get_clf_metrics, cv_for_all_algs, PREDICTORS"
             ]
         },
@@ -49,10 +48,11 @@
                 "labeled_trip_df_map = {}\n",
                 "expanded_labeled_trip_df_map = {}\n",
                 "expanded_all_trip_df_map = {}\n",
+                "ct_entry={}\n",
                 "for u in all_users:\n",
                 "    ts = esta.TimeSeries.get_time_series(u)\n",
-                "    ct_df = ts.get_data_df(\"analysis/confirmed_trip\")\n",
-                "\n",
+                "    ct_entry[u]=eamtr._get_training_data(u,None)\n",
+                "    ct_df = ts.to_data_df(\"analysis/confirmed_trip\",ct_entry[u])\n",
                 "    confirmed_trip_df_map[u] = ct_df\n",
                 "    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)\n",
                 "    expanded_labeled_trip_df_map[u] = esdtq.expand_userinputs(\n",
@@ -132,6 +132,7 @@
                 "# load in all runs\n",
                 "model_names = list(PREDICTORS.keys())\n",
                 "cv_results = cv_for_all_algs(\n",
+                "    ct_entry,\n",
                 "    uuid_list=all_users,\n",
                 "    expanded_trip_df_map=expanded_labeled_trip_df_map,\n",
                 "    model_names=model_names,\n",
diff --git a/TRB_label_assist/cluster_performance.ipynb b/TRB_label_assist/cluster_performance.ipynb
index b6eed6d..81c088d 100644
--- a/TRB_label_assist/cluster_performance.ipynb
+++ b/TRB_label_assist/cluster_performance.ipynb
@@ -15,11 +15,10 @@
             "source": [
                 "%load_ext autoreload\n",
                 "%autoreload 2\n",
-                "\n",
                 "import pandas as pd\n",
                 "import matplotlib.pyplot as plt\n",
                 "from matplotlib.gridspec import GridSpec\n",
-                "\n",
+                "import emission.analysis.modelling.trip_model.run_model as eamtr\n",
                 "import emission.storage.timeseries.abstract_timeseries as esta\n",
                 "import emission.storage.decorations.trip_queries as esdtq\n",
                 "import performance_eval\n",
@@ -45,10 +44,11 @@
                 "labeled_trip_df_map = {}\n",
                 "expanded_labeled_trip_df_map = {}\n",
                 "expanded_all_trip_df_map = {}\n",
+                "ct_entry={}\n",
                 "for u in all_users:\n",
                 "    ts = esta.TimeSeries.get_time_series(u)\n",
-                "    ct_df = ts.get_data_df(\"analysis/confirmed_trip\")\n",
-                "\n",
+                "    ct_entry[u]=eamtr._get_training_data(u,None)    \n",
+                "    ct_df = ts.to_data_df(\"analysis/confirmed_trip\",ct_entry[u])    \n",
                 "    confirmed_trip_df_map[u] = ct_df\n",
                 "    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)\n",
                 "    expanded_labeled_trip_df_map[u] = esdtq.expand_userinputs(\n",
@@ -87,6 +87,8 @@
                 "\n",
                 "    all_results_df = performance_eval.run_eval_cluster_metrics(\n",
                 "        expanded_labeled_trip_df_map,\n",
+                "        ct_entry,\n",
+                "        clustering_way='destination',\n",
                 "        user_list=all_users,\n",
                 "        radii=radii,\n",
                 "        loc_type='end',\n",
@@ -265,6 +267,8 @@
                 "\n",
                 "SVM_results_df = performance_eval.run_eval_cluster_metrics(\n",
                 "    expanded_labeled_trip_df_map,\n",
+                "    ct_entry,\n",
+                "    clustering_way=\"destination\",\n",
                 "    user_list=all_users,\n",
                 "    radii=radii,\n",
                 "    loc_type='end',\n",
diff --git a/TRB_label_assist/clustering.py b/TRB_label_assist/clustering.py
index fbe8a3b..d3924f3 100644
--- a/TRB_label_assist/clustering.py
+++ b/TRB_label_assist/clustering.py
@@ -16,8 +16,8 @@
 # our imports
 # NOTE: this requires changing the branch of e-mission-server to
 # eval-private-data-compatibility
-import emission.analysis.modelling.tour_model_extended.similarity as eamts
 import emission.storage.decorations.trip_queries as esdtq
+import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
 
 EARTH_RADIUS = 6371000
 ALG_OPTIONS = [
@@ -28,9 +28,27 @@
     'mean_shift'
 ]
 
+def cleanEntryTypeData(loc_df,trip_entry):
+
+    """
+    Helps weed out entries from the list of entries which were removed from the df using
+    esdtq.filter_labeled_trips() and esdtq.expand_userinputs()
+
+    loc_df : dataframe amde from entry type data
+    trip_entry : the entry type equivalent of loc_df ,
+                which was passed alongside the dataframe while loading the data
+
+    """
+
+    ids_in_df=loc_df['_id']
+    filtered_trip_entry = list(filter(lambda entry: entry['_id'] in ids_in_df.values, trip_entry))
+    return filtered_trip_entry
+
 
 def add_loc_clusters(
         loc_df,
+        trip_entry,
+        clustering_way,
         radii,
         loc_type,
         alg,
@@ -53,6 +71,9 @@ def add_loc_clusters(
         Args:
             loc_df (dataframe): must have columns 'start_lat' and 'start_lon' 
                 or 'end_lat' and 'end_lon'
+            trip_entry ( list of Entry/confirmedTrip): list consisting all entries from the
+                time data was loaded. loc_df was obtained from this by converting to df and 
+                then filtering out labeled trips and expanding user_inputs   
             radii (int list): list of radii to run the clustering algs with
             loc_type (str): 'start' or 'end'
             alg (str): 'DBSCAN', 'naive', 'OPTICS', 'SVM', 'fuzzy', or
@@ -98,19 +119,25 @@ def add_loc_clusters(
             loc_df.loc[:, f"{loc_type}_DBSCAN_clusters_{r}_m"] = labels
 
     elif alg == 'naive':
+
+        cleaned_trip_entry= cleanEntryTypeData(loc_df,trip_entry)
+
         for r in radii:
             # this is using a modified Similarity class that bins start/end
             # points separately before creating trip-level bins
-            sim_model = eamts.Similarity(loc_df,
-                                         radius_start=r,
-                                         radius_end=r,
-                                         shouldFilter=False,
-                                         cutoff=False)
-            # we only bin the loc_type points to speed up the alg. avoid
-            # unnecessary binning since this is really slow
-            sim_model.bin_helper(loc_type=loc_type)
-            labels = sim_model.data_df[loc_type + '_bin'].to_list()
 
+            model_config = {
+                "metric": "od_similarity",
+                "similarity_threshold_meters": r,  # meters,
+                "apply_cutoff": False,
+                "clustering_way": clustering_way,
+                "shouldFilter":False,
+                "incremental_evaluation": False
+            }    
+
+            sim_model = eamtg.GreedySimilarityBinning(model_config)       
+            sim_model.fit(cleaned_trip_entry)
+            labels = [int(l) for l in sim_model.tripLabels]
             # # pd.Categorical converts the type from int to category (so
             # # numerical operations aren't possible)
             # loc_df.loc[:, f"{loc_type}_{alg}_clusters_{r}_m"] = pd.Categorical(
diff --git a/TRB_label_assist/clustering_examples.ipynb b/TRB_label_assist/clustering_examples.ipynb
index 4eb8a67..998abab 100644
--- a/TRB_label_assist/clustering_examples.ipynb
+++ b/TRB_label_assist/clustering_examples.ipynb
@@ -26,12 +26,11 @@
                 "%autoreload 2\n",
                 "\n",
                 "from uuid import UUID\n",
-                "\n",
                 "import emission.storage.timeseries.abstract_timeseries as esta\n",
                 "import emission.storage.decorations.trip_queries as esdtq\n",
                 "import emission.core.get_database as edb\n",
-                "\n",
-                "import mapping"
+                "import emission.analysis.modelling.trip_model.run_model as eamtr\n",
+                "import mapping\n"
             ]
         },
         {
@@ -60,9 +59,11 @@
                 "confirmed_trip_df_map = {}\n",
                 "labeled_trip_df_map = {}\n",
                 "expanded_trip_df_map = {}\n",
+                "ct_entry={}\n",
                 "for u in uuids:\n",
                 "    ts = esta.TimeSeries.get_time_series(u)\n",
-                "    ct_df = ts.get_data_df(\"analysis/confirmed_trip\")\n",
+                "    ct_entry[u]=eamtr._get_training_data(u,None)    \n",
+                "    ct_df = ts.to_data_df(\"analysis/confirmed_trip\",ct_entry[u])    \n",
                 "    confirmed_trip_df_map[u] = ct_df\n",
                 "    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)\n",
                 "    expanded_trip_df_map[u] = esdtq.expand_userinputs(labeled_trip_df_map[u])"
@@ -83,8 +84,10 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[suburban_uuid],\n",
+                "                                 ct_entry[suburban_uuid],\n",
                 "                                 alg='naive',\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way=\"destination\",\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150])\n",
@@ -98,8 +101,10 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[college_campus_uuid],\n",
+                "                                 ct_entry[college_campus_uuid],\n",
                 "                                 alg='naive',\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way=\"destination\",\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150])\n",
@@ -121,9 +126,11 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[suburban_uuid],\n",
+                "                                 ct_entry[suburban_uuid],\n",
                 "                                 alg='DBSCAN',\n",
                 "                                 SVM=False,\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way=\"destination\",\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150, 200])\n",
@@ -137,9 +144,11 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[college_campus_uuid],\n",
+                "                                 ct_entry[college_campus_uuid],\n",
                 "                                 alg='DBSCAN',\n",
                 "                                 SVM=False,\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way=\"destination\",\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150, 200])\n",
@@ -161,9 +170,11 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[suburban_uuid],\n",
+                "                                 ct_entry[suburban_uuid],\n",
                 "                                 alg='DBSCAN',\n",
                 "                                 SVM=True,\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way=\"destination\",\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150, 200])\n",
@@ -177,9 +188,11 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[college_campus_uuid],\n",
+                "                                 ct_entry[college_campus_uuid],\n",
                 "                                 alg='DBSCAN',\n",
                 "                                 SVM=True,\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way=\"destination\",\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150, 200])\n",
diff --git a/TRB_label_assist/generate_figs_for_poster.ipynb b/TRB_label_assist/generate_figs_for_poster.ipynb
index f89ec7c..bc508fa 100644
--- a/TRB_label_assist/generate_figs_for_poster.ipynb
+++ b/TRB_label_assist/generate_figs_for_poster.ipynb
@@ -29,7 +29,6 @@
                 "import numpy as np\n",
                 "import matplotlib.pyplot as plt\n",
                 "import matplotlib\n",
-                "\n",
                 "from sklearn.pipeline import make_pipeline\n",
                 "from sklearn.preprocessing import StandardScaler\n",
                 "from sklearn import svm\n",
@@ -37,7 +36,7 @@
                 "import emission.storage.timeseries.abstract_timeseries as esta\n",
                 "import emission.storage.decorations.trip_queries as esdtq\n",
                 "import emission.core.get_database as edb\n",
-                "\n",
+                "import emission.analysis.modelling.trip_model.run_model as eamtr\n",
                 "import mapping\n",
                 "import data_wrangling\n",
                 "from clustering import add_loc_clusters"
@@ -67,9 +66,11 @@
                 "confirmed_trip_df_map = {}\n",
                 "labeled_trip_df_map = {}\n",
                 "expanded_trip_df_map = {}\n",
+                "ct_entry={}\n",
                 "for u in uuids:\n",
                 "    ts = esta.TimeSeries.get_time_series(u)\n",
-                "    ct_df = ts.get_data_df(\"analysis/confirmed_trip\")\n",
+                "    ct_entry[u]=eamtr._get_training_data(u,None)    \n",
+                "    ct_df = ts.to_data_df(\"analysis/confirmed_trip\",ct_entry[u])    \n",
                 "    confirmed_trip_df_map[u] = ct_df\n",
                 "    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)\n",
                 "    expanded_trip_df_map[u] = esdtq.expand_userinputs(labeled_trip_df_map[u])"
@@ -98,8 +99,10 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[user1_uuid],\n",
+                "                                 ct_entry[user1_uuid],\n",
                 "                                 alg='naive',\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way='destination',\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[50, 100, 150])\n",
@@ -137,9 +140,11 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[user2_uuid],\n",
+                "                                 ct_entry[user2_uuid],\n",
                 "                                 alg='DBSCAN',\n",
                 "                                 SVM=False,\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way='destination',\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[150])\n",
@@ -161,9 +166,11 @@
             "outputs": [],
             "source": [
                 "fig = mapping.find_plot_clusters(expanded_trip_df_map[user2_uuid],\n",
+                "                                 ct_entry[user2_uuid],\n",
                 "                                 alg='DBSCAN',\n",
                 "                                 SVM=True,\n",
                 "                                 loc_type='end',\n",
+                "                                 clustering_way='destination',\n",
                 "                                 plot_unlabeled=False,\n",
                 "                                 cluster_unlabeled=False,\n",
                 "                                 radii=[150])\n",
@@ -289,8 +296,14 @@
                 "\n",
                 "    labeled_trips_df = all_trips_df.loc[all_trips_df.user_input != {}]\n",
                 "    df_for_cluster = all_trips_df if cluster_unlabeled else labeled_trips_df\n",
-                "\n",
+                "    if loc_type=='start':\n",
+                "        clustering_way='origin'\n",
+                "    else:\n",
+                "        clustering_way='destination'\n",
+                "    \n",
                 "    df_for_cluster = add_loc_clusters(df_for_cluster,\n",
+                "                                      ct_entry,\n",
+                "                                      clustering_way=clustering_way,\n",
                 "                                      radii=radii,\n",
                 "                                      alg=alg,\n",
                 "                                      loc_type=loc_type,\n",
diff --git a/TRB_label_assist/get_performance_for_poster.ipynb b/TRB_label_assist/get_performance_for_poster.ipynb
index cfacc5e..063a6e6 100644
--- a/TRB_label_assist/get_performance_for_poster.ipynb
+++ b/TRB_label_assist/get_performance_for_poster.ipynb
@@ -25,6 +25,7 @@
                 "\n",
                 "import emission.storage.timeseries.abstract_timeseries as esta\n",
                 "import emission.storage.decorations.trip_queries as esdtq\n",
+                "import emission.analysis.modelling.trip_model.run_model as eamtr\n",
                 "\n",
                 "from performance_eval import get_clf_metrics, cv_for_all_algs, PREDICTORS"
             ]
@@ -48,10 +49,11 @@
                 "labeled_trip_df_map = {}\n",
                 "expanded_labeled_trip_df_map = {}\n",
                 "expanded_all_trip_df_map = {}\n",
+                "ct_entry={}\n",
                 "for u in all_users:\n",
                 "    ts = esta.TimeSeries.get_time_series(u)\n",
-                "    ct_df = ts.get_data_df(\"analysis/confirmed_trip\")\n",
-                "\n",
+                "    ct_entry[u]=eamtr._get_training_data(u,None)    \n",
+                "    ct_df = ts.to_data_df(\"analysis/confirmed_trip\",ct_entry[u])   \n",
                 "    confirmed_trip_df_map[u] = ct_df\n",
                 "    labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)\n",
                 "    expanded_labeled_trip_df_map[u] = esdtq.expand_userinputs(\n",
@@ -113,7 +115,7 @@
                 "    'random forests (O-D, destination clusters)',\n",
                 "    'random forests (coordinates)'\n",
                 "]\n",
-                "cv_results = cv_for_all_algs(\n",
+                "cv_results = cv_for_all_algs(ct_entry,\n",
                 "    uuid_list=all_users,\n",
                 "    expanded_trip_df_map=expanded_labeled_trip_df_map,\n",
                 "    model_names=model_names,\n",
diff --git a/TRB_label_assist/mapping.py b/TRB_label_assist/mapping.py
index 2ef54de..cd2d117 100644
--- a/TRB_label_assist/mapping.py
+++ b/TRB_label_assist/mapping.py
@@ -37,8 +37,10 @@
 
 
 def find_plot_clusters(user_df,
+                       user_entry,
                        loc_type,
                        alg,
+                       clustering_way,
                        SVM=False,
                        radii=[50, 100, 150, 200],
                        cluster_unlabeled=False,
@@ -64,6 +66,8 @@ def find_plot_clusters(user_df,
             alg (str): the clustering algorithm to be used. must be one of the 
                 following: 'DBSCAN', 'naive', 'OPTICS', 'SVM', 'fuzzy' or
                 'mean_shift'
+            clustering_way(str): 'origin'or 'destination' or 'origin-destination'.
+                Decides the way we can cluster trips geospatially. 
             SVM (bool): whether or not to sub-divide clusters with SVM
             radii (int list): list of radii to pass to the clustering alg
             cluster_unlabeled (bool): whether or not unlabeled points are used 
@@ -91,6 +95,7 @@ def find_plot_clusters(user_df,
     assert 'start_loc' in user_df.columns
     assert 'end_loc' in user_df.columns
     assert 'user_input' in user_df.columns
+    assert clustering_way in ['origin','destination','origin-destination']
     assert alg in ALG_OPTIONS
 
     fig = bre.Figure(figsize=(20, 20))
@@ -116,6 +121,8 @@ def find_plot_clusters(user_df,
 
     df_for_cluster = add_loc_clusters(
         df_for_cluster,
+        user_entry,
+        clustering_way,
         radii=radii,
         alg=alg,
         SVM=SVM,
diff --git a/TRB_label_assist/models.py b/TRB_label_assist/models.py
index 6f02277..f3026b6 100644
--- a/TRB_label_assist/models.py
+++ b/TRB_label_assist/models.py
@@ -19,11 +19,16 @@
 from clustering import get_distance_matrix, single_cluster_purity
 import data_wrangling
 import emission.storage.decorations.trip_queries as esdtq
-import emission.analysis.modelling.tour_model_first_only.build_save_model as bsm
-import emission.analysis.modelling.tour_model_first_only.evaluation_pipeline as ep
 from emission.analysis.classification.inference.labels.inferrers import predict_cluster_confidence_discounting
 import emission.core.wrapper.entry as ecwe
-import emission.analysis.modelling.tour_model_extended.similarity as eamts
+import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
+import emission.core.common as ecc
+import emission.analysis.modelling.trip_model.model_storage as eamums
+import emission.analysis.modelling.trip_model.model_type as eamumt
+import emission.analysis.modelling.trip_model.run_model as eamur
+
+
+import clustering
 # NOTE: tour_model_extended.similarity is on the
 # eval-private-data-compatibility branch in e-mission-server
 
@@ -116,12 +121,12 @@ class Cluster(SetupMixin, metaclass=ABCMeta):
     """ blueprint for clustering models. """
 
     @abstractmethod
-    def fit(self, train_df):
+    def fit(self, train_df,train_entry_list):
         """ Fit the clustering algorithm.  
         
             Args: 
                 train_df (DataFrame): dataframe of labeled trips
-            
+                train_entry_list (List) : A list of trips where each element is of Entry type
             Returns:
                 self
         """
@@ -159,12 +164,13 @@ def fit_predict(self, train_df):
 class TripClassifier(SetupMixin, metaclass=ABCMeta):
 
     @abstractmethod
-    def fit(self, train_df):
+    def fit(self, train_df,unused=None):
         """ Fit a classification model.  
         
             Args: 
                 train_df (DataFrame): dataframe of labeled trips
-            
+                unused (List) : A list of Entry type of labeled and unlabeled trips which is not used in current function. 
+                                Passed to keep fit function generic.            
             Returns:
                 self
         """
@@ -293,10 +299,10 @@ def set_params(self, params):
 
         return self
 
-    def fit(self, train_df):
+    def fit(self, unused,train_entry_list=None):
         # clean data
-        logging.info("PERF: Fitting RefactoredNaiveCluster with size %s" % len(train_df))
-        self.train_df = self._clean_data(train_df)
+        logging.info("PERF: Fitting RefactoredNaiveCluster with size %s" % len(unused))
+        self.train_df = self._clean_data(unused)
 
         # we can use all trips as long as they have purpose labels. it's ok if
         # they're missing mode/replaced-mode labels, because they aren't as
@@ -315,17 +321,23 @@ def fit(self, train_df):
         if len(self.train_df) == 0:
             # i.e. no valid trips after removing all nans
             raise Exception('no valid trips; nothing to fit')
-
+        
+        model_config = {
+                "metric": "od_similarity",
+                "similarity_threshold_meters": self.radius,  # meters,
+                "apply_cutoff": False,
+                "clustering_way":'origin' if self.loc_type=='start' 
+                                        else 'destination' if self.loc_type =='end' 
+                                        else 'origin-destination',
+                "incremental_evaluation": False
+            }   
+          
         # fit the bins
-        self.sim_model = eamts.Similarity(self.train_df,
-                                          radius_start=self.radius,
-                                          radius_end=self.radius,
-                                          shouldFilter=False,
-                                          cutoff=False)
-        # we only bin the loc_type points to speed up the alg. avoid
-        # unnecessary binning since this is really slow
-        self.sim_model.bin_helper(loc_type=self.loc_type)
-        labels = self.sim_model.data_df[self.loc_type + '_bin'].to_list()
+        self.sim_model= eamtg.GreedySimilarityBinning(model_config)
+        cleaned_trip_entry= clustering.cleanEntryTypeData(self.train_df,train_entry_list)
+        self.sim_model.fit(cleaned_trip_entry)
+
+        labels = [int(l) for l in self.sim_model.tripLabels]
         self.train_df.loc[:, f'{self.loc_type}_cluster_idx'] = labels
         return self
 
@@ -334,10 +346,32 @@ def predict(self, test_df):
         self.test_df = self._clean_data(test_df)
 
         if self.loc_type == 'start':
-            bins = self.sim_model.start_bins
+            bins = self.sim_model.bins
         elif self.loc_type == 'end':
-            bins = self.sim_model.end_bins
-
+            bins = self.sim_model.bins
+
+        # This looks weird but works
+        # >>> x = [(1, 'a'), (2, 'b'), (3, 'c')]
+        # >>> {int(key):value for key,value in x}
+        # {1: 'a', 2: 'b', 3: 'c'}
+        #
+        # bins = { '1': [ 'key1': [] , 'key2' :[],.. ....], 
+        #          '2': ['key1': [] , 'key2' :[],...], 
+        #          '3': ['key1': [] , 'key2' :[],.....] ...}
+        #
+        # the code below converts above to 
+        #
+        # bins = { 1: [ 'key1': [] , 'key2' :[],.. ....], 
+        #          2: ['key1': [] , 'key2' :[],...], 
+        #          3: ['key1': [] , 'key2' :[],.....] ....}
+        #
+        # This is why it works :
+        # 1. Iterate over (key,value) pairs in 'bins.items()'
+        # 2. for each pair, 'key' is a string . so  use int(key) to convert it into an integer.
+        # 3. Create a new dictionary(using {} within the dictionary comprehension) 
+        #     where the keys are now integers and the values are same
+
+        bins = {int(key):value for key,value in bins.items()}        
         labels = []
 
         # for each trip in the test list:
@@ -346,10 +380,15 @@ def predict(self, test_df):
                 logging.info("PERF: RefactoredNaiveCluster Working on trip %s/%s" % (idx, len(self.test_df)))
             # iterate over all bins
             trip_binned = False
-            for i, bin in enumerate(bins):
+            for i in bins:
                 # check if the trip can fit in the bin
-                # if so, get the bin index
-                if self._match(row, bin, self.loc_type):
+                # if so, get the bin index.
+                #
+                # 'feature_rows' is the key that contains the list of list where 
+                #  each of the inner list takes the form  :
+                #
+                #            [ start_lon,start_lat,end_lon,end_lat]
+                if self._match(row, bins[i]['feature_rows'], self.loc_type):
                     labels += [i]
                     trip_binned = True
                     break
@@ -366,8 +405,7 @@ def _match(self, trip, bin, loc_type):
         
             copied from the Similarity class on the e-mission-server. 
         """
-        for t_idx in bin:
-            trip_in_bin = self.train_df.iloc[t_idx]
+        for trip_in_bin in bin:            
             if not self._distance_helper(trip, trip_in_bin, loc_type):
                 return False
         return True
@@ -375,16 +413,20 @@ def _match(self, trip, bin, loc_type):
     def _distance_helper(self, tripa, tripb, loc_type):
         """ Check if two trips have start/end points within the distance 
             threshold. 
-        
-            copied from the Similarity class on the e-mission-server. 
         """
+        #tripa is taken from the test datframe. 
+        #tripb is taken from the stored bin list.
         pta_lat = tripa[[loc_type + '_lat']]
         pta_lon = tripa[[loc_type + '_lon']]
-        ptb_lat = tripb[[loc_type + '_lat']]
-        ptb_lon = tripb[[loc_type + '_lon']]
+        if loc_type == 'start':
+            ptb_lat = tripb[1]
+            ptb_lon = tripb[0]
+        elif loc_type == 'end':
+            ptb_lat = tripb[3]
+            ptb_lon = tripb[2]
 
-        return eamts.within_radius(pta_lat, pta_lon, ptb_lat, ptb_lon,
-                                   self.radius)
+        dist= ecc.calDistance([pta_lon,pta_lat],[ptb_lon,ptb_lat])                                   
+        return dist <= self.radius
 
 
 class DBSCANSVMCluster(Cluster):
@@ -444,7 +486,7 @@ def set_params(self, params):
 
         return self
 
-    def fit(self, train_df):
+    def fit(self, train_df,unused=None):
         """ Creates clusters of trip points. 
             self.train_df will be updated with columns containing base and 
             final clusters. 
@@ -455,7 +497,8 @@ def fit(self, train_df):
 
             Args:
                 train_df (dataframe): dataframe of labeled trips
-        """
+                unused (List) : A list of Entry type of labeled and unlabeled trips which is not used in current function. 
+                                Passed to keep fit function generic.        """
         ##################
         ### clean data ###
         ##################
@@ -648,7 +691,7 @@ def set_params(self, params):
 
         return self
 
-    def fit(self, train_df):
+    def fit(self, train_df,unused=None):
         logging.info("PERF: Fitting NaiveBinningClassifier")
         # (copied from bsm.build_user_model())
 
@@ -656,21 +699,29 @@ def fit(self, train_df):
         # only accepts lists of Entry objects
         train_trips = self._trip_df_to_list(train_df)
 
-        sim, bins, bin_trips, train_trips = ep.first_round(
-            train_trips, self.radius)
-
+        
+        model_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": self.radius,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin-destination", #cause thats what is set in performance_eval.py for this model
+            "incremental_evaluation": False
+        }    
+
+        sim_model = eamtg.GreedySimilarityBinning(model_config)
+        sim_model.fit(train_trips)
         # set instance variables so we can access results later as well
-        self.sim = sim
-        self.bins = bins
+        self.sim = sim_model
+        self.bins = sim_model.bins
 
         # save all user labels
         user_id = train_df.user_id.iloc[0]
-        bsm.save_models('user_labels',
-                        bsm.create_user_input_map(train_trips, bins), user_id)
+        model_type=eamumt.ModelType.GREEDY_SIMILARITY_BINNING
+        model_storage=eamums.ModelStorage.DOCUMENT_DATABASE
+        model_data_next=sim_model.to_dict()
+        last_done_ts = eamur._latest_timestamp(train_trips)
+        eamums.save_model(user_id, model_type, model_data_next, last_done_ts, model_storage)
 
-        # save location features of all bins
-        bsm.save_models('locations', bsm.create_location_map(train_trips, bins),
-                        user_id)
         return self
 
     def predict_proba(self, test_df):
@@ -880,13 +931,13 @@ def set_params(self, params):
 
         return self
 
-    def fit(self, train_df):
+    def fit(self, train_df,train_entry_list=None):
         # fit clustering model
-        self.end_cluster_model.fit(train_df)
+        self.end_cluster_model.fit(train_df,train_entry_list)
         self.train_df = self.end_cluster_model.train_df
 
         if self.cluster_method in ['trip', 'combination']:
-            self.start_cluster_model.fit(train_df)
+            self.start_cluster_model.fit(train_df,train_entry_list)
             self.train_df.loc[:, ['start_cluster_idx'
                                   ]] = self.start_cluster_model.train_df[[
                                       'start_cluster_idx'
@@ -1049,7 +1100,7 @@ class EnsembleClassifier(TripClassifier, metaclass=ABCMeta):
     replaced_predictor = NotImplemented
 
     # required methods
-    def fit(self, train_df):
+    def fit(self, train_df,unused=None):
         # get location features
         if self.loc_feature == 'cluster':
             # fit clustering model(s) and one-hot encode their indices
diff --git a/TRB_label_assist/performance_eval.py b/TRB_label_assist/performance_eval.py
index e63a576..aee5ad2 100644
--- a/TRB_label_assist/performance_eval.py
+++ b/TRB_label_assist/performance_eval.py
@@ -9,6 +9,7 @@
 import os
 import time
 from datetime import datetime
+import pathlib
 
 import sklearn.metrics as sm
 from sklearn.metrics.cluster import contingency_matrix
@@ -18,8 +19,6 @@
 import models
 from data_wrangling import expand_coords
 from clustering import add_loc_clusters, ALG_OPTIONS, purity_score
-import emission.analysis.modelling.tour_model_first_only.get_users as gu
-import emission.analysis.modelling.tour_model_first_only.data_preprocessing as pp
 
 # TODO: these may require further updating
 DEFAULT_MODES = [
@@ -120,6 +119,7 @@
 
 
 def cross_val_predict(model,
+                      ct_entry,
                       model_params=None,
                       user_df=None,
                       k=5,
@@ -171,8 +171,7 @@ def cross_val_predict(model,
 
         # train the model
         logging.info("About to fit the model %s" % model)
-        model_.fit(train_trips)
-
+        model_.fit(train_trips,ct_entry)
         logging.info("About to generate predictions for the model %s" % model)
         # generate predictions
         pred_df = model_.predict(test_trips)
@@ -216,6 +215,7 @@ def cross_val_predict(model,
 
 
 def cv_for_all_users(model,
+                     ct_entry,
                      uuid_list,
                      expanded_trip_df_map=None,
                      model_params=None,
@@ -233,6 +233,7 @@ def cv_for_all_users(model,
         logging.info("------ START: predictions for user %s and model %s" % (user, model))
         try:
             results = cross_val_predict(model,
+                                        ct_entry[user],
                                         model_params,
                                         user_df=expanded_trip_df_map[user],
                                         k=k,
@@ -265,7 +266,8 @@ def cv_for_all_users(model,
     return cross_val_all
 
 
-def cv_for_all_algs(uuid_list,
+def cv_for_all_algs(ct_entry,
+                    uuid_list,
                     expanded_trip_df_map,
                     model_names=list(PREDICTORS.keys()),
                     override_prior_runs=True,
@@ -274,6 +276,7 @@ def cv_for_all_algs(uuid_list,
                     min_samples=False,
                     raise_errors=False):
     cv_results = {}
+    pathlib.Path('first_trial_results').mkdir(parents=True,exist_ok=True) #needed first time
     for model_name in model_names:
         csv_path = f'first_trial_results/cv results {model_name}.csv'
         if not override_prior_runs and os.path.exists(csv_path):
@@ -289,6 +292,7 @@ def cv_for_all_algs(uuid_list,
             start_time = datetime.now()
             model, model_params = PREDICTORS[model_name]
             cv_df = cv_for_all_users(model,
+                                     ct_entry,
                                      uuid_list=uuid_list,
                                      expanded_trip_df_map=expanded_trip_df_map,
                                      model_params=model_params,
@@ -627,6 +631,8 @@ def get_cluster_metrics(trip_df):
 
 
 def run_eval_cluster_metrics(expanded_all_trip_df_map,
+                             ct_entry,
+                             clustering_way,
                              user_list,
                              radii,
                              loc_type,
@@ -730,6 +736,8 @@ def run_eval_cluster_metrics(expanded_all_trip_df_map,
 
                     user_trips = add_loc_clusters(
                         user_trips,
+                        ct_entry,
+                        clustering_way,
                         radii=radii,
                         alg=alg,
                         SVM=SVM,
diff --git a/TRB_label_assist/regenerate_classification_performance_results.py b/TRB_label_assist/regenerate_classification_performance_results.py
index 2f524c3..b549cc1 100644
--- a/TRB_label_assist/regenerate_classification_performance_results.py
+++ b/TRB_label_assist/regenerate_classification_performance_results.py
@@ -1,9 +1,9 @@
 import pandas as pd
 import numpy as np
 from uuid import UUID
-
 import emission.storage.timeseries.abstract_timeseries as esta
 import emission.storage.decorations.trip_queries as esdtq
+import emission.analysis.modelling.trip_model.run_model as eamtr
 
 from performance_eval import get_clf_metrics, cv_for_all_algs, PREDICTORS
 
@@ -15,10 +15,11 @@
 labeled_trip_df_map = {}
 expanded_labeled_trip_df_map = {}
 expanded_all_trip_df_map = {}
+ct_entry={}
 for u in all_users:
     ts = esta.TimeSeries.get_time_series(u)
-    ct_df = ts.get_data_df("analysis/confirmed_trip")
-
+    ct_entry[u]=eamtr._get_training_data(u,None)    
+    ct_df = ts.to_data_df("analysis/confirmed_trip",ct_entry[u])   
     confirmed_trip_df_map[u] = ct_df
     labeled_trip_df_map[u] = esdtq.filter_labeled_trips(ct_df)
     expanded_labeled_trip_df_map[u] = esdtq.expand_userinputs(
@@ -47,6 +48,7 @@
 # load in all runs
 model_names = list(PREDICTORS.keys())
 cv_results = cv_for_all_algs(
+    ct_entry,
     uuid_list=all_users,
     expanded_trip_df_map=expanded_labeled_trip_df_map,
     model_names=model_names,