From a269e74525a40b723aba60647506f66236f7d857 Mon Sep 17 00:00:00 2001
From: maxymnaumchyk <maxymnaumchyk@gmail.com>
Date: Sun, 19 Nov 2023 18:42:36 +0200
Subject: [PATCH 1/3] Fixed the issue with the dest_sites list

An error was occurring if the dest_sites list wasn't initialized and the page tried to access it in the other callbacks.
---
 src/pages/packet-loss-ml.py | 261 ++++++++++++++++++-----------------
 src/pages/throughput-ml.py  | 265 ++++++++++++++++++------------------
 2 files changed, 270 insertions(+), 256 deletions(-)

diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py
index c82381d..203e751 100644
--- a/src/pages/packet-loss-ml.py
+++ b/src/pages/packet-loss-ml.py
@@ -421,143 +421,150 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState):
 
     # creating the first plot
     plotly_fig = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites):
-        plsDf_onehot_site_plot = plsDf_onehot_plot.loc[
-            (plsDf_onehot_plot['src_site_' + allsites] == 1) | (plsDf_onehot_plot['dest_site_' + allsites] == 1)]
-        df_to_plot_site = df_to_plot.loc[
-            (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)]
-
-        fig = plt.figure(figsize=(14, 4))
-        plt.title('Packet loss alarms for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('packet loss')
-
-        plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co',
-                 label="all packet loss measurements")
-        plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go',
-                 label="complete loss alarms using ML")
-        plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro',
-                 label="partial loss alarms using ML")
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(),
-                 label='daily packet loss mean')
-
-        plotly_fig = mpl_to_plotly(fig)
-        plotly_fig.update_layout(layout)
-
-        plotly_fig = dcc.Graph(figure=plotly_fig)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY',
-                             style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites) & (allsites in dest_sites):
+            plsDf_onehot_site_plot = plsDf_onehot_plot.loc[
+                (plsDf_onehot_plot['src_site_' + allsites] == 1) | (plsDf_onehot_plot['dest_site_' + allsites] == 1)]
+            df_to_plot_site = df_to_plot.loc[
+                (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)]
+
+            fig = plt.figure(figsize=(14, 4))
+            plt.title('Packet loss alarms for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('packet loss')
+
+            plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co',
+                     label="all packet loss measurements")
+            plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go',
+                     label="complete loss alarms using ML")
+            plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro',
+                     label="partial loss alarms using ML")
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(),
+                     label='daily packet loss mean')
+
+            plotly_fig = mpl_to_plotly(fig)
+            plotly_fig.update_layout(layout)
+
+            plotly_fig = dcc.Graph(figure=plotly_fig)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY',
+                                 style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_mean = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites):
-        fig_mean = plt.figure(figsize=(14, 4))
-        plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('number of daily alarms')
-
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)["flag"].sum())
-        plotly_fig_mean = mpl_to_plotly(fig_mean)
-        plotly_fig_mean.update_layout(layout_mean)
-
-        plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY',
-                                 style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites) & (allsites in dest_sites):
+            fig_mean = plt.figure(figsize=(14, 4))
+            plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('number of daily alarms')
+
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)[
+                         "flag"].sum())
+            plotly_fig_mean = mpl_to_plotly(fig_mean)
+            plotly_fig_mean.update_layout(layout_mean)
+
+            plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY',
+                                      style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_src = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites):
-        plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['src_site_' + allsites] == 1)]
-        df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)]
-
-        fig_src = plt.figure(figsize=(14, 4))
-        plt.title('Packet loss alarms for the ' + allsites + ' site as a source only')
-        plt.xlabel('timestamp')
-        plt.ylabel('packet loss')
-
-        plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co',
-                 label="all packet loss measurements")
-        plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go',
-                 label="complete loss alarms using ML")
-        plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro',
-                 label="partial loss alarms using ML")
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(),
-                 label='daily packet loss mean')
-
-        plotly_fig_src = mpl_to_plotly(fig_src)
-        plotly_fig_src.update_layout(layout)
-
-        plotly_fig_src = dcc.Graph(figure=plotly_fig_src)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_src = html.H4('No measurements for this site as a source',
-                                 style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites):
+            plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['src_site_' + allsites] == 1)]
+            df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)]
+
+            fig_src = plt.figure(figsize=(14, 4))
+            plt.title('Packet loss alarms for the ' + allsites + ' site as a source only')
+            plt.xlabel('timestamp')
+            plt.ylabel('packet loss')
+
+            plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co',
+                     label="all packet loss measurements")
+            plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go',
+                     label="complete loss alarms using ML")
+            plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro',
+                     label="partial loss alarms using ML")
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(),
+                     label='daily packet loss mean')
+
+            plotly_fig_src = mpl_to_plotly(fig_src)
+            plotly_fig_src.update_layout(layout)
+
+            plotly_fig_src = dcc.Graph(figure=plotly_fig_src)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_src = html.H4('No measurements for this site as a source',
+                                     style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_mean_src = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites):
-        fig_mean = plt.figure(figsize=(14, 4))
-        plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('number of daily alarms')
-
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)["flag"].sum())
-        plotly_fig_mean_src = mpl_to_plotly(fig_mean)
-        plotly_fig_mean_src.update_layout(layout_mean)
-
-        plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_mean_src = html.H4('No measurements for this site as a source',
-                                      style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites):
+            fig_mean = plt.figure(figsize=(14, 4))
+            plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('number of daily alarms')
+
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)[
+                         "flag"].sum())
+            plotly_fig_mean_src = mpl_to_plotly(fig_mean)
+            plotly_fig_mean_src.update_layout(layout_mean)
+
+            plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_mean_src = html.H4('No measurements for this site as a source',
+                                          style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_dest = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites):
-
-        plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['dest_site_' + allsites] == 1)]
-        df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)]
-
-
-        fig_dest = plt.figure(figsize=(14, 4))
-        plt.title('Packet loss alarms for the ' + allsites + ' site as a destination only')
-        plt.xlabel('timestamp')
-        plt.ylabel('packet loss')
-
-        plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co',
-                 label="all packet loss measurements")
-        plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go',
-                 label="complete loss alarms using ML")
-        plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro',
-                 label="partial loss alarms using ML")
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(),
-                 label='daily packet loss mean')
-
-        plotly_fig_dest = mpl_to_plotly(fig_dest)
-        plotly_fig_dest.update_layout(layout)
-
-        plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_dest = html.H4('No measurements for this site as a destination',
-                                  style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in dest_sites):
+            plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['dest_site_' + allsites] == 1)]
+            df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)]
+
+            fig_dest = plt.figure(figsize=(14, 4))
+            plt.title('Packet loss alarms for the ' + allsites + ' site as a destination only')
+            plt.xlabel('timestamp')
+            plt.ylabel('packet loss')
+
+            plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co',
+                     label="all packet loss measurements")
+            plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go',
+                     label="complete loss alarms using ML")
+            plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro',
+                     label="partial loss alarms using ML")
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(),
+                     label='daily packet loss mean')
+
+            plotly_fig_dest = mpl_to_plotly(fig_dest)
+            plotly_fig_dest.update_layout(layout)
+
+            plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_dest = html.H4('No measurements for this site as a destination',
+                                      style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_mean_dest = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites):
-        fig_mean = plt.figure(figsize=(14, 4))
-        plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('number of daily alarms')
-
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)["flag"].sum())
-        plotly_fig_mean_dest = mpl_to_plotly(fig_mean)
-        plotly_fig_mean_dest.update_layout(layout_mean)
-
-        plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_mean_dest = html.H4('No measurements for this site as a destination',
-                                       style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in dest_sites):
+            fig_mean = plt.figure(figsize=(14, 4))
+            plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('number of daily alarms')
+
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)[
+                         "flag"].sum())
+            plotly_fig_mean_dest = mpl_to_plotly(fig_mean)
+            plotly_fig_mean_dest.update_layout(layout_mean)
+
+            plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_mean_dest = html.H4('No measurements for this site as a destination',
+                                           style={"padding-bottom": "1%", "padding-top": "1%"})
 
     return [plotly_fig,
             plotly_fig_src, plotly_fig_dest, plotly_fig_mean,
diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py
index ca04a99..16dca94 100644
--- a/src/pages/throughput-ml.py
+++ b/src/pages/throughput-ml.py
@@ -414,151 +414,158 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState):
 
     # creating plots for the site both as a source and dest
     plotly_fig = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites):
-        rawDf_onehot_site_plot = rawDf_onehot_plot.loc[
-            (rawDf_onehot_plot['src_site_' + allsites] == 1) | (rawDf_onehot_plot['dest_site_' + allsites] == 1)]
-        df_to_plot_site = df_to_plot.loc[
-            (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)]
-
-        fig = plt.figure(figsize=(14, 4))
-        plt.title('Bandwidth decreased alarms for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('throughput (Mbps)')
-
-        plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue',
-                 label="all throughput measurements")
-        plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'],
-                 rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', markersize=8.5,
-                 label="alarms using alarms system")
-        plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML")
-        plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(),
-                 label='daily throughput mean')
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[
-                     "value"].mean(), label='daily alarm measurements mean')
-
-        plotly_fig = mpl_to_plotly(fig)
-        plotly_fig.update_layout(layout)
-
-        plotly_fig = dcc.Graph(figure=plotly_fig)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY',
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites) & (allsites in dest_sites):
+            rawDf_onehot_site_plot = rawDf_onehot_plot.loc[
+                (rawDf_onehot_plot['src_site_' + allsites] == 1) | (rawDf_onehot_plot['dest_site_' + allsites] == 1)]
+            df_to_plot_site = df_to_plot.loc[
+                (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)]
+
+            fig = plt.figure(figsize=(14, 4))
+            plt.title('Bandwidth decreased alarms for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('throughput (Mbps)')
+
+            plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue',
+                     label="all throughput measurements")
+            plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'],
+                     rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go',
+                     markersize=8.5,
+                     label="alarms using alarms system")
+            plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML")
+            plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(),
+                     label='daily throughput mean')
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[
+                         "value"].mean(), label='daily alarm measurements mean')
+
+            plotly_fig = mpl_to_plotly(fig)
+            plotly_fig.update_layout(layout)
+
+            plotly_fig = dcc.Graph(figure=plotly_fig)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY',
                                  style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_mean = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites):
-        fig_mean = plt.figure(figsize=(14, 4))
-        plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('number of daily alarms')
-
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum())
-        plotly_fig_mean = mpl_to_plotly(fig_mean)
-        plotly_fig_mean.update_layout(layout_mean)
-
-        plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY',
-                                 style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites) & (allsites in dest_sites):
+            fig_mean = plt.figure(figsize=(14, 4))
+            plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('number of daily alarms')
+
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum())
+            plotly_fig_mean = mpl_to_plotly(fig_mean)
+            plotly_fig_mean.update_layout(layout_mean)
+
+            plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY',
+                                      style={"padding-bottom": "1%", "padding-top": "1%"})
 
     # creating plots for the site as a source only
     plotly_fig_src = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites):
-        rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['src_site_' + allsites] == 1)]
-        df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)]
-
-        fig_src = plt.figure(figsize=(14, 4))
-        plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a source only')
-        plt.xlabel('timestamp')
-        plt.ylabel('throughput (Mbps)')
-
-        plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue',
-                 label="all throughput measurements")
-        plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'],
-                 rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', markersize=8.5,
-                 label="alarms using alarms system")
-        plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML")
-        plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(),
-                 label='daily throughput mean')
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[
-                     "value"].mean(), label='daily alarm measurements mean')
-
-        plotly_fig_src = mpl_to_plotly(fig_src)
-        plotly_fig_src.update_layout(layout)
-
-        plotly_fig_src = dcc.Graph(figure=plotly_fig_src)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_src = html.H4('No measurements for this site as a source',
-                                      style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites):
+            rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['src_site_' + allsites] == 1)]
+            df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)]
+
+            fig_src = plt.figure(figsize=(14, 4))
+            plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a source only')
+            plt.xlabel('timestamp')
+            plt.ylabel('throughput (Mbps)')
+
+            plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue',
+                     label="all throughput measurements")
+            plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'],
+                     rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go',
+                     markersize=8.5,
+                     label="alarms using alarms system")
+            plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML")
+            plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(),
+                     label='daily throughput mean')
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[
+                         "value"].mean(), label='daily alarm measurements mean')
+
+            plotly_fig_src = mpl_to_plotly(fig_src)
+            plotly_fig_src.update_layout(layout)
+
+            plotly_fig_src = dcc.Graph(figure=plotly_fig_src)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_src = html.H4('No measurements for this site as a source',
+                                     style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_mean_src = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites):
-        fig_mean = plt.figure(figsize=(14, 4))
-        plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('number of daily alarms')
-
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum())
-        plotly_fig_mean_src = mpl_to_plotly(fig_mean)
-        plotly_fig_mean_src.update_layout(layout_mean)
-
-        plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_mean_src = html.H4('No measurements for this site as a source',
-                                      style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in src_sites):
+            fig_mean = plt.figure(figsize=(14, 4))
+            plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('number of daily alarms')
+
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum())
+            plotly_fig_mean_src = mpl_to_plotly(fig_mean)
+            plotly_fig_mean_src.update_layout(layout_mean)
+
+            plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_mean_src = html.H4('No measurements for this site as a source',
+                                          style={"padding-bottom": "1%", "padding-top": "1%"})
 
     # creating plots for the site as a dest only
     plotly_fig_dest = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites):
-
-        rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['dest_site_' + allsites] == 1)]
-        df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)]
-
-
-        fig_dest = plt.figure(figsize=(14, 4))
-        plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a destination only')
-        plt.xlabel('timestamp')
-        plt.ylabel('throughput (Mbps)')
-
-        plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue',
-                 label="all throughput measurements")
-        plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'],
-                 rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', markersize=8.5,
-                 label="alarms using alarms system")
-        plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'],
-                 df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML")
-        plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(),
-                 label='daily throughput mean')
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[
-                     "value"].mean(), label='daily alarm measurements mean')
-
-        plotly_fig_dest = mpl_to_plotly(fig_dest)
-        plotly_fig_dest.update_layout(layout)
-
-        plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_dest = html.H4('No measurements for this site as a destination',
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in dest_sites):
+            rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['dest_site_' + allsites] == 1)]
+            df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)]
+
+            fig_dest = plt.figure(figsize=(14, 4))
+            plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a destination only')
+            plt.xlabel('timestamp')
+            plt.ylabel('throughput (Mbps)')
+
+            plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue',
+                     label="all throughput measurements")
+            plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'],
+                     rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go',
+                     markersize=8.5,
+                     label="alarms using alarms system")
+            plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'],
+                     df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML")
+            plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(),
+                     label='daily throughput mean')
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[
+                         "value"].mean(), label='daily alarm measurements mean')
+
+            plotly_fig_dest = mpl_to_plotly(fig_dest)
+            plotly_fig_dest.update_layout(layout)
+
+            plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_dest = html.H4('No measurements for this site as a destination',
                                       style={"padding-bottom": "1%", "padding-top": "1%"})
 
     plotly_fig_mean_dest = {}
-    if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites):
-        fig_mean = plt.figure(figsize=(14, 4))
-        plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site')
-        plt.xlabel('timestamp')
-        plt.ylabel('number of daily alarms')
-
-        plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum())
-        plotly_fig_mean_dest = mpl_to_plotly(fig_mean)
-        plotly_fig_mean_dest.update_layout(layout_mean)
-
-        plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest)
-    elif (sitesState is not None and len(sitesState) > 0):
-        plotly_fig_mean_dest = html.H4('No measurements for this site as a destination',
-                                      style={"padding-bottom": "1%", "padding-top": "1%"})
+    if (sitesState is not None and len(sitesState) > 0):
+        if (allsites in dest_sites):
+            fig_mean = plt.figure(figsize=(14, 4))
+            plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site')
+            plt.xlabel('timestamp')
+            plt.ylabel('number of daily alarms')
+
+            plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum())
+            plotly_fig_mean_dest = mpl_to_plotly(fig_mean)
+            plotly_fig_mean_dest.update_layout(layout_mean)
+
+            plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest)
+        elif (sitesState is not None and len(sitesState) > 0):
+            plotly_fig_mean_dest = html.H4('No measurements for this site as a destination',
+                                           style={"padding-bottom": "1%", "padding-top": "1%"})
 
     return [plotly_fig,
-            plotly_fig_src,plotly_fig_dest, plotly_fig_mean,
+            plotly_fig_src, plotly_fig_dest, plotly_fig_mean,
             plotly_fig_mean_src, plotly_fig_mean_dest]
 
 # a callback for the third section of a page with two plots for a chosen destination-source pair

From 3dda29b4a7d304954362a9e1aa44d78a714874ea Mon Sep 17 00:00:00 2001
From: maxymnaumchyk <maxymnaumchyk@gmail.com>
Date: Tue, 21 Nov 2023 19:55:27 +0200
Subject: [PATCH 2/3] Add the ml_model preloading in a file

---
 src/ml/thrpt_dataset_model_train.py |  7 ++++--
 src/model/Updater.py                | 38 +++++++++++++++++++++++------
 src/pages/packet-loss-ml.py         | 21 ++++++++++------
 src/pages/throughput-ml.py          | 14 +++++++++--
 4 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/src/ml/thrpt_dataset_model_train.py b/src/ml/thrpt_dataset_model_train.py
index 5601238..8a421a0 100644
--- a/src/ml/thrpt_dataset_model_train.py
+++ b/src/ml/thrpt_dataset_model_train.py
@@ -65,6 +65,11 @@ def trainMLmodel(rawDf):
     # disp = disp.plot(cmap=plt.cm.YlGnBu,values_format='g')
     # plt.show()
 
+    return rawDf_onehot, model
+
+def predictData(rawDf_onehot, model):
+    rawDf_custom_x = rawDf_onehot.drop(['alarm_created'], axis=1)
+
     #preparing final datasets for further analysis
     y = model.predict(rawDf_custom_x)
     df_to_plot = rawDf_custom_x.copy()
@@ -83,5 +88,3 @@ def trainMLmodel(rawDf):
 
 
 
-
-
diff --git a/src/model/Updater.py b/src/model/Updater.py
index 7d26992..607ff34 100644
--- a/src/model/Updater.py
+++ b/src/model/Updater.py
@@ -10,9 +10,13 @@
 from utils.helpers import timer
 import model.queries as qrs
 import pandas as pd
+import pickle
 
 from ml.create_thrpt_dataset import createThrptDataset
+from ml.thrpt_dataset_model_train import trainMLmodel
 from ml.create_packet_loss_dataset import createPcktDataset
+from ml.packet_loss_one_month_onehot import one_month_data
+from ml.packet_loss_train_model import packet_loss_train_model
 import os
 from datetime import datetime, timedelta
 
@@ -34,8 +38,8 @@ def __init__(self):
             self.cacheIndexData()
             self.storeAlarms()
             self.storePathChangeDescDf()
-            self.storeThroughputData()
-            self.storePacketLossData()
+            self.storeThroughputDataAndModel()
+            self.storePacketLossDataAndModel()
 
         try:
             Scheduler(3600, self.cacheIndexData)
@@ -43,8 +47,8 @@ def __init__(self):
             Scheduler(1800, self.storePathChangeDescDf)
 
             # Store the data for the Major Alarms analysis
-            Scheduler(int(60*60*12), self.storeThroughputData)
-            Scheduler(int(60*60*12), self.storePacketLossData)
+            Scheduler(int(60*60*12), self.storeThroughputDataAndModel)
+            Scheduler(int(60*60*12), self.storePacketLossDataAndModel)
         except Exception as e:
             print(traceback.format_exc())
 
@@ -194,7 +198,7 @@ def createLocation(location):
             os.mkdir(location)
 
     @timer
-    def storeThroughputData(self):
+    def storeThroughputDataAndModel(self):
         now = hp.defaultTimeRange(days=60, datesOnly=True)
         start_date = now[0]
         end_date = now[1]
@@ -204,17 +208,37 @@ def storeThroughputData(self):
 
         self.pq.writeToFile(rawDf, f'{self.location}ml-datasets/rawDf.parquet')
 
+        # train the ML model on the loaded dataset
+        rawDf_onehot, model = trainMLmodel(rawDf)
+        del rawDf
+
+        self.pq.writeToFile(rawDf_onehot, f'{self.location}ml-datasets/rawDf_onehot.parquet')
+        # save the classification model as a pickle file
+        model_pkl_file = f'{self.location}ml-datasets/XGB_Classifier_model_throughput.pkl'
+        with open(model_pkl_file, 'wb') as file:
+            pickle.dump(model, file)
+
+
     @timer
-    def storePacketLossData(self):
+    def storePacketLossDataAndModel(self):
         now = hp.defaultTimeRange(days=60, datesOnly=True)
         start_date = now[0]
         end_date = now[1]
         start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T23:59:59.000Z']
 
         plsDf = createPcktDataset(start_date, end_date)
-
         self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/plsDf.parquet')
 
+        # onehot encode the whole dataset and leave only one month for further ML training
+        plsDf_onehot_month = one_month_data(plsDf)
+        # train the model on one month data
+        model = packet_loss_train_model(plsDf_onehot_month)
+        del plsDf_onehot_month
+
+        # save the classification model as a pickle file
+        model_pkl_file = f'{self.location}ml-datasets/XGB_Classifier_model_packet_loss.pkl'
+        with open(model_pkl_file, 'wb') as file:
+            pickle.dump(model, file)
 
 
 
diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py
index 203e751..45f8411 100644
--- a/src/pages/packet-loss-ml.py
+++ b/src/pages/packet-loss-ml.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 from elasticsearch.helpers import scan
+import pickle
 
 import utils.helpers as hp
 from utils.parquet import Parquet
@@ -231,7 +232,7 @@ def layout(**other_unknown_query_strings):
 
 def colorMap(eventTypes):
   colors = ['#75cbe6', '#3b6d8f', '#75E6DA', '#189AB4', '#2E8BC0', '#145DA0', '#05445E', '#0C2D48',
-          '#5EACE0', '#d6ebff', '#498bcc', '#82cbf9', 
+          '#5EACE0', '#d6ebff', '#498bcc', '#82cbf9',
           '#2894f8', '#fee838', '#3e6595', '#4adfe1', '#b14ae1'
           '#1f77b4', '#ff7f0e', '#2ca02c','#00224e', '#123570', '#3b496c', '#575d6d', '#707173', '#8a8678', '#a59c74',
           ]
@@ -239,7 +240,7 @@ def colorMap(eventTypes):
   paletteDict = {}
   for i,e in enumerate(eventTypes):
       paletteDict[e] = colors[i]
-  
+
   return paletteDict
 
 # a callback for the first section of a page with the list of Major alarms
@@ -285,16 +286,20 @@ def update_output(start_date, end_date, sensitivity, sitesState):
     if (start_date, end_date) == (start_date_check, end_date_check):
         pq = Parquet()
         plsDf = pq.readFile(f'parquet/ml-datasets/plsDf.parquet')
+
+        model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_packet_loss.pkl'
+        with open(model_pkl_file, 'rb') as file:
+            model = pickle.load(file)
     else:
         plsDf = createPcktDataset(start_date, end_date)
-    # plsDf = pd.read_csv('plsDf_sep_oct.csv')
+        # onehot encode the whole dataset and leave only one month for further ML training
+        plsDf_onehot_month = one_month_data(plsDf)
 
-    # onehot encode the whole dataset and leave only one month for further ML training
-    plsDf_onehot_month = one_month_data(plsDf)
+        # train the model on one month data
+        model = packet_loss_train_model(plsDf_onehot_month)
+        del plsDf_onehot_month
 
-    #train the model on one month data
-    model = packet_loss_train_model(plsDf_onehot_month)
-    del plsDf_onehot_month
+    # plsDf = pd.read_csv('plsDf_sep_oct.csv')
 
     # predict the alarms using ML model and return the dataset with original alarms and the ML alarms
     global plsDf_onehot_plot, df_to_plot
diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py
index 16dca94..7a14116 100644
--- a/src/pages/throughput-ml.py
+++ b/src/pages/throughput-ml.py
@@ -9,6 +9,7 @@
 from datetime import date
 import pandas as pd
 import matplotlib.pyplot as plt
+import pickle
 
 import utils.helpers as hp
 from utils.parquet import Parquet
@@ -18,6 +19,7 @@
 
 from ml.create_thrpt_dataset import createThrptDataset
 from ml.thrpt_dataset_model_train import trainMLmodel
+from ml.thrpt_dataset_model_train import predictData
 
 def title():
     return f"Search & explore"
@@ -289,14 +291,22 @@ def update_output(start_date, end_date, sensitivity, sitesState):
     if (start_date, end_date) == (start_date_check, end_date_check):
         pq = Parquet()
         rawDf = pq.readFile('parquet/ml-datasets/rawDf.parquet')
+        rawDf_onehot = pq.readFile('parquet/ml-datasets/rawDf_onehot.parquet')
+
+        model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_throughput.pkl'
+        with open(model_pkl_file, 'rb') as file:
+            model = pickle.load(file)
     else:
         rawDf = createThrptDataset(start_date, end_date)
+        # train the ML model on the loaded dataset
+        rawDf_onehot, model = trainMLmodel(rawDf)
+        del rawDf
 
     # rawDf = pd.read_csv('rawDf_sep_oct.csv')
 
-    # train the ML model on the loaded dataset and return the dataset with original alarms and the ML alarms
+    # predict the data on the model and return the dataset with original alarms and the ML alarms
     global rawDf_onehot_plot, df_to_plot
-    rawDf_onehot_plot, df_to_plot = trainMLmodel(rawDf)
+    rawDf_onehot_plot, df_to_plot = predictData(rawDf_onehot, model)
 
     # create a list with all sites as sources
     src_sites = rawDf_onehot_plot.loc[:, rawDf_onehot_plot.columns.str.startswith("src_site")].columns.values.tolist()

From 7623d58e3f1ddf4adc06ca4f84ace0c3e4321059 Mon Sep 17 00:00:00 2001
From: maxymnaumchyk <maxymnaumchyk@gmail.com>
Date: Tue, 21 Nov 2023 19:58:43 +0200
Subject: [PATCH 3/3] Change presaved datasets names

---
 src/model/Updater.py        | 6 +++---
 src/pages/packet-loss-ml.py | 2 +-
 src/pages/throughput-ml.py  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/model/Updater.py b/src/model/Updater.py
index 607ff34..4f05316 100644
--- a/src/model/Updater.py
+++ b/src/model/Updater.py
@@ -206,13 +206,13 @@ def storeThroughputDataAndModel(self):
 
         rawDf = createThrptDataset(start_date, end_date)
 
-        self.pq.writeToFile(rawDf, f'{self.location}ml-datasets/rawDf.parquet')
+        self.pq.writeToFile(rawDf, f'{self.location}ml-datasets/throughput_Df.parquet')
 
         # train the ML model on the loaded dataset
         rawDf_onehot, model = trainMLmodel(rawDf)
         del rawDf
 
-        self.pq.writeToFile(rawDf_onehot, f'{self.location}ml-datasets/rawDf_onehot.parquet')
+        self.pq.writeToFile(rawDf_onehot, f'{self.location}ml-datasets/throughput_onehot_Df.parquet')
         # save the classification model as a pickle file
         model_pkl_file = f'{self.location}ml-datasets/XGB_Classifier_model_throughput.pkl'
         with open(model_pkl_file, 'wb') as file:
@@ -227,7 +227,7 @@ def storePacketLossDataAndModel(self):
         start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T23:59:59.000Z']
 
         plsDf = createPcktDataset(start_date, end_date)
-        self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/plsDf.parquet')
+        self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/packet_loss_Df.parquet')
 
         # onehot encode the whole dataset and leave only one month for further ML training
         plsDf_onehot_month = one_month_data(plsDf)
diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py
index 45f8411..631ec3f 100644
--- a/src/pages/packet-loss-ml.py
+++ b/src/pages/packet-loss-ml.py
@@ -285,7 +285,7 @@ def update_output(start_date, end_date, sensitivity, sitesState):
     # query for the dataset
     if (start_date, end_date) == (start_date_check, end_date_check):
         pq = Parquet()
-        plsDf = pq.readFile(f'parquet/ml-datasets/plsDf.parquet')
+        plsDf = pq.readFile(f'parquet/ml-datasets/packet_loss_Df.parquet')
 
         model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_packet_loss.pkl'
         with open(model_pkl_file, 'rb') as file:
diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py
index 7a14116..5c4479e 100644
--- a/src/pages/throughput-ml.py
+++ b/src/pages/throughput-ml.py
@@ -290,8 +290,8 @@ def update_output(start_date, end_date, sensitivity, sitesState):
     # query for the dataset
     if (start_date, end_date) == (start_date_check, end_date_check):
         pq = Parquet()
-        rawDf = pq.readFile('parquet/ml-datasets/rawDf.parquet')
-        rawDf_onehot = pq.readFile('parquet/ml-datasets/rawDf_onehot.parquet')
+        rawDf = pq.readFile('parquet/ml-datasets/throughput_Df.parquet')
+        rawDf_onehot = pq.readFile('parquet/ml-datasets/throughput_onehot_Df.parquet')
 
         model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_throughput.pkl'
         with open(model_pkl_file, 'rb') as file: