From a269e74525a40b723aba60647506f66236f7d857 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Sun, 19 Nov 2023 18:42:36 +0200 Subject: [PATCH 1/3] Fixed the issue with the dest_sites list An error was occurring if the dest_sites list wasn't initialized and the page tried to access it in the other callbacks. --- src/pages/packet-loss-ml.py | 261 ++++++++++++++++++----------------- src/pages/throughput-ml.py | 265 ++++++++++++++++++------------------ 2 files changed, 270 insertions(+), 256 deletions(-) diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index c82381d..203e751 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -421,143 +421,150 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): # creating the first plot plotly_fig = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites): - plsDf_onehot_site_plot = plsDf_onehot_plot.loc[ - (plsDf_onehot_plot['src_site_' + allsites] == 1) | (plsDf_onehot_plot['dest_site_' + allsites] == 1)] - df_to_plot_site = df_to_plot.loc[ - (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)] - - fig = plt.figure(figsize=(14, 4)) - plt.title('Packet loss alarms for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('packet loss') - - plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co', - label="all packet loss measurements") - plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'], - df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go', - label="complete loss alarms using ML") - plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'], - df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro', - label="partial loss alarms using ML") - plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(), - label='daily packet loss mean') - - plotly_fig = mpl_to_plotly(fig) - plotly_fig.update_layout(layout) - - plotly_fig = dcc.Graph(figure=plotly_fig) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites) & (allsites in dest_sites): + plsDf_onehot_site_plot = plsDf_onehot_plot.loc[ + (plsDf_onehot_plot['src_site_' + allsites] == 1) | (plsDf_onehot_plot['dest_site_' + allsites] == 1)] + df_to_plot_site = df_to_plot.loc[ + (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)] + + fig = plt.figure(figsize=(14, 4)) + plt.title('Packet loss alarms for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('packet loss') + + plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co', + label="all packet loss measurements") + plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'], + df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go', + label="complete loss alarms using ML") + plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'], + df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro', + label="partial loss alarms using ML") + plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(), + label='daily packet loss mean') + + plotly_fig = mpl_to_plotly(fig) + plotly_fig.update_layout(layout) + + plotly_fig = dcc.Graph(figure=plotly_fig) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY', + style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_mean = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) - plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('number of daily alarms') - - plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)["flag"].sum()) - plotly_fig_mean = mpl_to_plotly(fig_mean) - plotly_fig_mean.update_layout(layout_mean) - - plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites) & (allsites in dest_sites): + fig_mean = plt.figure(figsize=(14, 4)) + plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('number of daily alarms') + + plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)[ + "flag"].sum()) + plotly_fig_mean = mpl_to_plotly(fig_mean) + plotly_fig_mean.update_layout(layout_mean) + + plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY', + style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_src = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites): - plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['src_site_' + allsites] == 1)] - df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)] - - fig_src = plt.figure(figsize=(14, 4)) - plt.title('Packet loss alarms for the ' + allsites + ' site as a source only') - plt.xlabel('timestamp') - plt.ylabel('packet loss') - - plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co', - label="all packet loss measurements") - plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'], - df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go', - label="complete loss alarms using ML") - plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'], - df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro', - label="partial loss alarms using ML") - plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(), - label='daily packet loss mean') - - plotly_fig_src = mpl_to_plotly(fig_src) - plotly_fig_src.update_layout(layout) - - plotly_fig_src = dcc.Graph(figure=plotly_fig_src) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_src = html.H4('No measurements for this site as a source', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites): + plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['src_site_' + allsites] == 1)] + df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)] + + fig_src = plt.figure(figsize=(14, 4)) + plt.title('Packet loss alarms for the ' + allsites + ' site as a source only') + plt.xlabel('timestamp') + plt.ylabel('packet loss') + + plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co', + label="all packet loss measurements") + plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'], + df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go', + label="complete loss alarms using ML") + plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'], + df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro', + label="partial loss alarms using ML") + plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(), + label='daily packet loss mean') + + plotly_fig_src = mpl_to_plotly(fig_src) + plotly_fig_src.update_layout(layout) + + plotly_fig_src = dcc.Graph(figure=plotly_fig_src) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_src = html.H4('No measurements for this site as a source', + style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_mean_src = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites): - fig_mean = plt.figure(figsize=(14, 4)) - plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('number of daily alarms') - - plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)["flag"].sum()) - plotly_fig_mean_src = mpl_to_plotly(fig_mean) - plotly_fig_mean_src.update_layout(layout_mean) - - plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_mean_src = html.H4('No measurements for this site as a source', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites): + fig_mean = plt.figure(figsize=(14, 4)) + plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('number of daily alarms') + + plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)[ + "flag"].sum()) + plotly_fig_mean_src = mpl_to_plotly(fig_mean) + plotly_fig_mean_src.update_layout(layout_mean) + + plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_mean_src = html.H4('No measurements for this site as a source', + style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_dest = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites): - - plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['dest_site_' + allsites] == 1)] - df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)] - - - fig_dest = plt.figure(figsize=(14, 4)) - plt.title('Packet loss alarms for the ' + allsites + ' site as a destination only') - plt.xlabel('timestamp') - plt.ylabel('packet loss') - - plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co', - label="all packet loss measurements") - plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'], - df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go', - label="complete loss alarms using ML") - plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'], - df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro', - label="partial loss alarms using ML") - plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(), - label='daily packet loss mean') - - plotly_fig_dest = mpl_to_plotly(fig_dest) - plotly_fig_dest.update_layout(layout) - - plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_dest = html.H4('No measurements for this site as a destination', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in dest_sites): + plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['dest_site_' + allsites] == 1)] + df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)] + + fig_dest = plt.figure(figsize=(14, 4)) + plt.title('Packet loss alarms for the ' + allsites + ' site as a destination only') + plt.xlabel('timestamp') + plt.ylabel('packet loss') + + plt.plot(plsDf_onehot_site_plot['dt'], plsDf_onehot_site_plot['avg_value'], 'co', + label="all packet loss measurements") + plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 2, 'dt'], + df_to_plot_site.loc[df_to_plot_site['flag'] == 2, 'avg_value'], 'go', + label="complete loss alarms using ML") + plt.plot(df_to_plot_site.loc[df_to_plot['flag'] == 1, 'dt'], + df_to_plot_site.loc[df_to_plot_site['flag'] == 1, 'avg_value'], 'ro', + label="partial loss alarms using ML") + plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["avg_value"].mean(), + label='daily packet loss mean') + + plotly_fig_dest = mpl_to_plotly(fig_dest) + plotly_fig_dest.update_layout(layout) + + plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_dest = html.H4('No measurements for this site as a destination', + style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_mean_dest = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) - plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('number of daily alarms') - - plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)["flag"].sum()) - plotly_fig_mean_dest = mpl_to_plotly(fig_mean) - plotly_fig_mean_dest.update_layout(layout_mean) - - plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_mean_dest = html.H4('No measurements for this site as a destination', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in dest_sites): + fig_mean = plt.figure(figsize=(14, 4)) + plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('number of daily alarms') + + plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[(df_to_plot_site['flag'] == 1), 'dt'].dt.date)[ + "flag"].sum()) + plotly_fig_mean_dest = mpl_to_plotly(fig_mean) + plotly_fig_mean_dest.update_layout(layout_mean) + + plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_mean_dest = html.H4('No measurements for this site as a destination', + style={"padding-bottom": "1%", "padding-top": "1%"}) return [plotly_fig, plotly_fig_src, plotly_fig_dest, plotly_fig_mean, diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index ca04a99..16dca94 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -414,151 +414,158 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): # creating plots for the site both as a source and dest plotly_fig = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites): - rawDf_onehot_site_plot = rawDf_onehot_plot.loc[ - (rawDf_onehot_plot['src_site_' + allsites] == 1) | (rawDf_onehot_plot['dest_site_' + allsites] == 1)] - df_to_plot_site = df_to_plot.loc[ - (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)] - - fig = plt.figure(figsize=(14, 4)) - plt.title('Bandwidth decreased alarms for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('throughput (Mbps)') - - plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue', - label="all throughput measurements") - plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'], - rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', markersize=8.5, - label="alarms using alarms system") - plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'], - df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML") - plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(), - label='daily throughput mean') - plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[ - "value"].mean(), label='daily alarm measurements mean') - - plotly_fig = mpl_to_plotly(fig) - plotly_fig.update_layout(layout) - - plotly_fig = dcc.Graph(figure=plotly_fig) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY', + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites) & (allsites in dest_sites): + rawDf_onehot_site_plot = rawDf_onehot_plot.loc[ + (rawDf_onehot_plot['src_site_' + allsites] == 1) | (rawDf_onehot_plot['dest_site_' + allsites] == 1)] + df_to_plot_site = df_to_plot.loc[ + (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)] + + fig = plt.figure(figsize=(14, 4)) + plt.title('Bandwidth decreased alarms for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('throughput (Mbps)') + + plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue', + label="all throughput measurements") + plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'], + rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', + markersize=8.5, + label="alarms using alarms system") + plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'], + df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML") + plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(), + label='daily throughput mean') + plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[ + "value"].mean(), label='daily alarm measurements mean') + + plotly_fig = mpl_to_plotly(fig) + plotly_fig.update_layout(layout) + + plotly_fig = dcc.Graph(figure=plotly_fig) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY', style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_mean = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites) & (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) - plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('number of daily alarms') - - plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum()) - plotly_fig_mean = mpl_to_plotly(fig_mean) - plotly_fig_mean.update_layout(layout_mean) - - plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites) & (allsites in dest_sites): + fig_mean = plt.figure(figsize=(14, 4)) + plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('number of daily alarms') + + plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum()) + plotly_fig_mean = mpl_to_plotly(fig_mean) + plotly_fig_mean.update_layout(layout_mean) + + plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY', + style={"padding-bottom": "1%", "padding-top": "1%"}) # creating plots for the site as a source only plotly_fig_src = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites): - rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['src_site_' + allsites] == 1)] - df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)] - - fig_src = plt.figure(figsize=(14, 4)) - plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a source only') - plt.xlabel('timestamp') - plt.ylabel('throughput (Mbps)') - - plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue', - label="all throughput measurements") - plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'], - rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', markersize=8.5, - label="alarms using alarms system") - plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'], - df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML") - plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(), - label='daily throughput mean') - plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[ - "value"].mean(), label='daily alarm measurements mean') - - plotly_fig_src = mpl_to_plotly(fig_src) - plotly_fig_src.update_layout(layout) - - plotly_fig_src = dcc.Graph(figure=plotly_fig_src) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_src = html.H4('No measurements for this site as a source', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites): + rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['src_site_' + allsites] == 1)] + df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)] + + fig_src = plt.figure(figsize=(14, 4)) + plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a source only') + plt.xlabel('timestamp') + plt.ylabel('throughput (Mbps)') + + plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue', + label="all throughput measurements") + plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'], + rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', + markersize=8.5, + label="alarms using alarms system") + plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'], + df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML") + plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(), + label='daily throughput mean') + plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[ + "value"].mean(), label='daily alarm measurements mean') + + plotly_fig_src = mpl_to_plotly(fig_src) + plotly_fig_src.update_layout(layout) + + plotly_fig_src = dcc.Graph(figure=plotly_fig_src) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_src = html.H4('No measurements for this site as a source', + style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_mean_src = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in src_sites): - fig_mean = plt.figure(figsize=(14, 4)) - plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('number of daily alarms') - - plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum()) - plotly_fig_mean_src = mpl_to_plotly(fig_mean) - plotly_fig_mean_src.update_layout(layout_mean) - - plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_mean_src = html.H4('No measurements for this site as a source', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in src_sites): + fig_mean = plt.figure(figsize=(14, 4)) + plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('number of daily alarms') + + plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum()) + plotly_fig_mean_src = mpl_to_plotly(fig_mean) + plotly_fig_mean_src.update_layout(layout_mean) + + plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_mean_src = html.H4('No measurements for this site as a source', + style={"padding-bottom": "1%", "padding-top": "1%"}) # creating plots for the site as a dest only plotly_fig_dest = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites): - - rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['dest_site_' + allsites] == 1)] - df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)] - - - fig_dest = plt.figure(figsize=(14, 4)) - plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a destination only') - plt.xlabel('timestamp') - plt.ylabel('throughput (Mbps)') - - plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue', - label="all throughput measurements") - plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'], - rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', markersize=8.5, - label="alarms using alarms system") - plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'], - df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML") - plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(), - label='daily throughput mean') - plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[ - "value"].mean(), label='daily alarm measurements mean') - - plotly_fig_dest = mpl_to_plotly(fig_dest) - plotly_fig_dest.update_layout(layout) - - plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_dest = html.H4('No measurements for this site as a destination', + if (sitesState is not None and len(sitesState) > 0): + if (allsites in dest_sites): + rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['dest_site_' + allsites] == 1)] + df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)] + + fig_dest = plt.figure(figsize=(14, 4)) + plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a destination only') + plt.xlabel('timestamp') + plt.ylabel('throughput (Mbps)') + + plt.plot(rawDf_onehot_site_plot['dt'], rawDf_onehot_site_plot['value'], 'o', color='lightblue', + label="all throughput measurements") + plt.plot(rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'dt'], + rawDf_onehot_site_plot.loc[rawDf_onehot_site_plot['alarm_created'] == 1, 'value'], 'go', + markersize=8.5, + label="alarms using alarms system") + plt.plot(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'], + df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'value'], 'ro', label="alarms using ML") + plt.plot(rawDf_onehot_site_plot.groupby(rawDf_onehot_site_plot['dt'].dt.date)["value"].mean(), + label='daily throughput mean') + plt.plot(df_to_plot_site.groupby(df_to_plot_site.loc[df_to_plot_site['alarm_created'] == 1, 'dt'].dt.date)[ + "value"].mean(), label='daily alarm measurements mean') + + plotly_fig_dest = mpl_to_plotly(fig_dest) + plotly_fig_dest.update_layout(layout) + + plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_dest = html.H4('No measurements for this site as a destination', style={"padding-bottom": "1%", "padding-top": "1%"}) plotly_fig_mean_dest = {} - if (sitesState is not None and len(sitesState) > 0) & (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) - plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') - plt.xlabel('timestamp') - plt.ylabel('number of daily alarms') - - plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum()) - plotly_fig_mean_dest = mpl_to_plotly(fig_mean) - plotly_fig_mean_dest.update_layout(layout_mean) - - plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest) - elif (sitesState is not None and len(sitesState) > 0): - plotly_fig_mean_dest = html.H4('No measurements for this site as a destination', - style={"padding-bottom": "1%", "padding-top": "1%"}) + if (sitesState is not None and len(sitesState) > 0): + if (allsites in dest_sites): + fig_mean = plt.figure(figsize=(14, 4)) + plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') + plt.xlabel('timestamp') + plt.ylabel('number of daily alarms') + + plt.plot(df_to_plot_site.groupby(df_to_plot_site['dt'].dt.date)["alarm_created"].sum()) + plotly_fig_mean_dest = mpl_to_plotly(fig_mean) + plotly_fig_mean_dest.update_layout(layout_mean) + + plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest) + elif (sitesState is not None and len(sitesState) > 0): + plotly_fig_mean_dest = html.H4('No measurements for this site as a destination', + style={"padding-bottom": "1%", "padding-top": "1%"}) return [plotly_fig, - plotly_fig_src,plotly_fig_dest, plotly_fig_mean, + plotly_fig_src, plotly_fig_dest, plotly_fig_mean, plotly_fig_mean_src, plotly_fig_mean_dest] # a callback for the third section of a page with two plots for a chosen destination-source pair From 3dda29b4a7d304954362a9e1aa44d78a714874ea Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Tue, 21 Nov 2023 19:55:27 +0200 Subject: [PATCH 2/3] Add the ml_model preloading in a file --- src/ml/thrpt_dataset_model_train.py | 7 ++++-- src/model/Updater.py | 38 +++++++++++++++++++++++------ src/pages/packet-loss-ml.py | 21 ++++++++++------ src/pages/throughput-ml.py | 14 +++++++++-- 4 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/ml/thrpt_dataset_model_train.py b/src/ml/thrpt_dataset_model_train.py index 5601238..8a421a0 100644 --- a/src/ml/thrpt_dataset_model_train.py +++ b/src/ml/thrpt_dataset_model_train.py @@ -65,6 +65,11 @@ def trainMLmodel(rawDf): # disp = disp.plot(cmap=plt.cm.YlGnBu,values_format='g') # plt.show() + return rawDf_onehot, model + +def predictData(rawDf_onehot, model): + rawDf_custom_x = rawDf_onehot.drop(['alarm_created'], axis=1) + #preparing final datasets for further analysis y = model.predict(rawDf_custom_x) df_to_plot = rawDf_custom_x.copy() @@ -83,5 +88,3 @@ def trainMLmodel(rawDf): - - diff --git a/src/model/Updater.py b/src/model/Updater.py index 7d26992..607ff34 100644 --- a/src/model/Updater.py +++ b/src/model/Updater.py @@ -10,9 +10,13 @@ from utils.helpers import timer import model.queries as qrs import pandas as pd +import pickle from ml.create_thrpt_dataset import createThrptDataset +from ml.thrpt_dataset_model_train import trainMLmodel from ml.create_packet_loss_dataset import createPcktDataset +from ml.packet_loss_one_month_onehot import one_month_data +from ml.packet_loss_train_model import packet_loss_train_model import os from datetime import datetime, timedelta @@ -34,8 +38,8 @@ def __init__(self): self.cacheIndexData() self.storeAlarms() self.storePathChangeDescDf() - self.storeThroughputData() - self.storePacketLossData() + self.storeThroughputDataAndModel() + self.storePacketLossDataAndModel() try: Scheduler(3600, self.cacheIndexData) @@ -43,8 +47,8 @@ def __init__(self): Scheduler(1800, self.storePathChangeDescDf) # Store the data for the Major Alarms analysis - Scheduler(int(60*60*12), self.storeThroughputData) - Scheduler(int(60*60*12), self.storePacketLossData) + Scheduler(int(60*60*12), self.storeThroughputDataAndModel) + Scheduler(int(60*60*12), self.storePacketLossDataAndModel) except Exception as e: print(traceback.format_exc()) @@ -194,7 +198,7 @@ def createLocation(location): os.mkdir(location) @timer - def storeThroughputData(self): + def storeThroughputDataAndModel(self): now = hp.defaultTimeRange(days=60, datesOnly=True) start_date = now[0] end_date = now[1] @@ -204,17 +208,37 @@ def storeThroughputData(self): self.pq.writeToFile(rawDf, f'{self.location}ml-datasets/rawDf.parquet') + # train the ML model on the loaded dataset + rawDf_onehot, model = trainMLmodel(rawDf) + del rawDf + + self.pq.writeToFile(rawDf_onehot, f'{self.location}ml-datasets/rawDf_onehot.parquet') + # save the classification model as a pickle file + model_pkl_file = f'{self.location}ml-datasets/XGB_Classifier_model_throughput.pkl' + with open(model_pkl_file, 'wb') as file: + pickle.dump(model, file) + + @timer - def storePacketLossData(self): + def storePacketLossDataAndModel(self): now = hp.defaultTimeRange(days=60, datesOnly=True) start_date = now[0] end_date = now[1] start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T23:59:59.000Z'] plsDf = createPcktDataset(start_date, end_date) - self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/plsDf.parquet') + # onehot encode the whole dataset and leave only one month for further ML training + plsDf_onehot_month = one_month_data(plsDf) + # train the model on one month data + model = packet_loss_train_model(plsDf_onehot_month) + del plsDf_onehot_month + + # save the classification model as a pickle file + model_pkl_file = f'{self.location}ml-datasets/XGB_Classifier_model_packet_loss.pkl' + with open(model_pkl_file, 'wb') as file: + pickle.dump(model, file) diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 203e751..45f8411 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -10,6 +10,7 @@ import pandas as pd import matplotlib.pyplot as plt from elasticsearch.helpers import scan +import pickle import utils.helpers as hp from utils.parquet import Parquet @@ -231,7 +232,7 @@ def layout(**other_unknown_query_strings): def colorMap(eventTypes): colors = ['#75cbe6', '#3b6d8f', '#75E6DA', '#189AB4', '#2E8BC0', '#145DA0', '#05445E', '#0C2D48', - '#5EACE0', '#d6ebff', '#498bcc', '#82cbf9', + '#5EACE0', '#d6ebff', '#498bcc', '#82cbf9', '#2894f8', '#fee838', '#3e6595', '#4adfe1', '#b14ae1' '#1f77b4', '#ff7f0e', '#2ca02c','#00224e', '#123570', '#3b496c', '#575d6d', '#707173', '#8a8678', '#a59c74', ] @@ -239,7 +240,7 @@ def colorMap(eventTypes): paletteDict = {} for i,e in enumerate(eventTypes): paletteDict[e] = colors[i] - + return paletteDict # a callback for the first section of a page with the list of Major alarms @@ -285,16 +286,20 @@ def update_output(start_date, end_date, sensitivity, sitesState): if (start_date, end_date) == (start_date_check, end_date_check): pq = Parquet() plsDf = pq.readFile(f'parquet/ml-datasets/plsDf.parquet') + + model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_packet_loss.pkl' + with open(model_pkl_file, 'rb') as file: + model = pickle.load(file) else: plsDf = createPcktDataset(start_date, end_date) - # plsDf = pd.read_csv('plsDf_sep_oct.csv') + # onehot encode the whole dataset and leave only one month for further ML training + plsDf_onehot_month = one_month_data(plsDf) - # onehot encode the whole dataset and leave only one month for further ML training - plsDf_onehot_month = one_month_data(plsDf) + # train the model on one month data + model = packet_loss_train_model(plsDf_onehot_month) + del plsDf_onehot_month - #train the model on one month data - model = packet_loss_train_model(plsDf_onehot_month) - del plsDf_onehot_month + # plsDf = pd.read_csv('plsDf_sep_oct.csv') # predict the alarms using ML model and return the dataset with original alarms and the ML alarms global plsDf_onehot_plot, df_to_plot diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index 16dca94..7a14116 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -9,6 +9,7 @@ from datetime import date import pandas as pd import matplotlib.pyplot as plt +import pickle import utils.helpers as hp from utils.parquet import Parquet @@ -18,6 +19,7 @@ from ml.create_thrpt_dataset import createThrptDataset from ml.thrpt_dataset_model_train import trainMLmodel +from ml.thrpt_dataset_model_train import predictData def title(): return f"Search & explore" @@ -289,14 +291,22 @@ def update_output(start_date, end_date, sensitivity, sitesState): if (start_date, end_date) == (start_date_check, end_date_check): pq = Parquet() rawDf = pq.readFile('parquet/ml-datasets/rawDf.parquet') + rawDf_onehot = pq.readFile('parquet/ml-datasets/rawDf_onehot.parquet') + + model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_throughput.pkl' + with open(model_pkl_file, 'rb') as file: + model = pickle.load(file) else: rawDf = createThrptDataset(start_date, end_date) + # train the ML model on the loaded dataset + rawDf_onehot, model = trainMLmodel(rawDf) + del rawDf # rawDf = pd.read_csv('rawDf_sep_oct.csv') - # train the ML model on the loaded dataset and return the dataset with original alarms and the ML alarms + # predict the data on the model and return the dataset with original alarms and the ML alarms global rawDf_onehot_plot, df_to_plot - rawDf_onehot_plot, df_to_plot = trainMLmodel(rawDf) + rawDf_onehot_plot, df_to_plot = predictData(rawDf_onehot, model) # create a list with all sites as sources src_sites = rawDf_onehot_plot.loc[:, rawDf_onehot_plot.columns.str.startswith("src_site")].columns.values.tolist() From 7623d58e3f1ddf4adc06ca4f84ace0c3e4321059 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Tue, 21 Nov 2023 19:58:43 +0200 Subject: [PATCH 3/3] Change presaved datasets names --- src/model/Updater.py | 6 +++--- src/pages/packet-loss-ml.py | 2 +- src/pages/throughput-ml.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/model/Updater.py b/src/model/Updater.py index 607ff34..4f05316 100644 --- a/src/model/Updater.py +++ b/src/model/Updater.py @@ -206,13 +206,13 @@ def storeThroughputDataAndModel(self): rawDf = createThrptDataset(start_date, end_date) - self.pq.writeToFile(rawDf, f'{self.location}ml-datasets/rawDf.parquet') + self.pq.writeToFile(rawDf, f'{self.location}ml-datasets/throughput_Df.parquet') # train the ML model on the loaded dataset rawDf_onehot, model = trainMLmodel(rawDf) del rawDf - self.pq.writeToFile(rawDf_onehot, f'{self.location}ml-datasets/rawDf_onehot.parquet') + self.pq.writeToFile(rawDf_onehot, f'{self.location}ml-datasets/throughput_onehot_Df.parquet') # save the classification model as a pickle file model_pkl_file = f'{self.location}ml-datasets/XGB_Classifier_model_throughput.pkl' with open(model_pkl_file, 'wb') as file: @@ -227,7 +227,7 @@ def storePacketLossDataAndModel(self): start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T23:59:59.000Z'] plsDf = createPcktDataset(start_date, end_date) - self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/plsDf.parquet') + self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/packet_loss_Df.parquet') # onehot encode the whole dataset and leave only one month for further ML training plsDf_onehot_month = one_month_data(plsDf) diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 45f8411..631ec3f 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -285,7 +285,7 @@ def update_output(start_date, end_date, sensitivity, sitesState): # query for the dataset if (start_date, end_date) == (start_date_check, end_date_check): pq = Parquet() - plsDf = pq.readFile(f'parquet/ml-datasets/plsDf.parquet') + plsDf = pq.readFile(f'parquet/ml-datasets/packet_loss_Df.parquet') model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_packet_loss.pkl' with open(model_pkl_file, 'rb') as file: diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index 7a14116..5c4479e 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -290,8 +290,8 @@ def update_output(start_date, end_date, sensitivity, sitesState): # query for the dataset if (start_date, end_date) == (start_date_check, end_date_check): pq = Parquet() - rawDf = pq.readFile('parquet/ml-datasets/rawDf.parquet') - rawDf_onehot = pq.readFile('parquet/ml-datasets/rawDf_onehot.parquet') + rawDf = pq.readFile('parquet/ml-datasets/throughput_Df.parquet') + rawDf_onehot = pq.readFile('parquet/ml-datasets/throughput_onehot_Df.parquet') model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_throughput.pkl' with open(model_pkl_file, 'rb') as file: