From d6cb0ae61ad19a636563c04c3ee4d529161306a6 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Thu, 30 Nov 2023 19:40:39 +0200 Subject: [PATCH 1/5] Added responsive width of the plots --- src/pages/packet-loss-ml.py | 32 ++++++++++++++++-------------- src/pages/throughput-ml.py | 39 +++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 631ec3f..56ecac7 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -411,6 +411,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): # creating a global layout for the plots global layout, layout_mean layout = dict(xaxis_range=[start_date - timedelta(days=2), end_date + timedelta(days=2)], + height = 400, showlegend=True, margin=dict(l=5, r=5, t=50, b=20), paper_bgcolor='rgba(0,0,0,0)', @@ -433,7 +434,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): df_to_plot_site = df_to_plot.loc[ (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)] - fig = plt.figure(figsize=(14, 4)) + fig = plt.figure() plt.title('Packet loss alarms for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('packet loss') @@ -452,7 +453,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig = mpl_to_plotly(fig) plotly_fig.update_layout(layout) - plotly_fig = dcc.Graph(figure=plotly_fig) + plotly_fig = dcc.Graph(figure=plotly_fig, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -460,7 +461,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean = {} if (sitesState is not None and len(sitesState) > 0): if (allsites in src_sites) & (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -470,7 +471,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean = mpl_to_plotly(fig_mean) plotly_fig_mean.update_layout(layout_mean) - plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean) + plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -481,7 +482,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['src_site_' + allsites] == 1)] df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)] - fig_src = plt.figure(figsize=(14, 4)) + fig_src = plt.figure() plt.title('Packet loss alarms for the ' + allsites + ' site as a source only') plt.xlabel('timestamp') plt.ylabel('packet loss') @@ -500,7 +501,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_src = mpl_to_plotly(fig_src) plotly_fig_src.update_layout(layout) - plotly_fig_src = dcc.Graph(figure=plotly_fig_src) + plotly_fig_src = dcc.Graph(figure=plotly_fig_src, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_src = html.H4('No measurements for this site as a source', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -508,7 +509,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_src = {} if (sitesState is not None and len(sitesState) > 0): if (allsites in src_sites): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -518,7 +519,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_src = mpl_to_plotly(fig_mean) plotly_fig_mean_src.update_layout(layout_mean) - plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src) + plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_mean_src = html.H4('No measurements for this site as a source', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -529,7 +530,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plsDf_onehot_site_plot = plsDf_onehot_plot.loc[(plsDf_onehot_plot['dest_site_' + allsites] == 1)] df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)] - fig_dest = plt.figure(figsize=(14, 4)) + fig_dest = plt.figure() plt.title('Packet loss alarms for the ' + allsites + ' site as a destination only') plt.xlabel('timestamp') plt.ylabel('packet loss') @@ -548,7 +549,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_dest = mpl_to_plotly(fig_dest) plotly_fig_dest.update_layout(layout) - plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest) + plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_dest = html.H4('No measurements for this site as a destination', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -556,7 +557,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_dest = {} if (sitesState is not None and len(sitesState) > 0): if (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Packet loss alarms aggregated by days for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -566,7 +567,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_dest = mpl_to_plotly(fig_mean) plotly_fig_mean_dest.update_layout(layout_mean) - plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest) + plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_mean_dest = html.H4('No measurements for this site as a destination', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -599,7 +600,7 @@ def update_output(src_site, dest_site, sites_src_State, sites_dest_State): df_to_plot_site = df_to_plot.loc[ (df_to_plot['src_site_' + src_site] == 1) & (df_to_plot['dest_site_' + dest_site] == 1)] - fig = plt.figure(figsize=(14, 4)) + fig = plt.figure() plt.title('Measurements and packet loss alarms for the ' + src_site + ' and ' + dest_site + ' sites pair') plt.xlabel('timestamp') plt.ylabel('packet loss') @@ -620,7 +621,7 @@ def update_output(src_site, dest_site, sites_src_State, sites_dest_State): plotly_fig_mean_src_dest = {} if (sites_src_State is not None and len(sites_src_State) > 0) & (sites_dest_State is not None and len(sites_dest_State) > 0): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Packet loss alarms aggregated by days for the ' + src_site + ' and ' + dest_site + ' sites pair') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -629,7 +630,8 @@ def update_output(src_site, dest_site, sites_src_State, sites_dest_State): plotly_fig_mean_src_dest = mpl_to_plotly(fig_mean) plotly_fig_mean_src_dest.update_layout(layout_mean) - return [dcc.Graph(figure=plotly_fig_scr_dest),dcc.Graph(figure=plotly_fig_mean_src_dest), + return [dcc.Graph(figure=plotly_fig_scr_dest, responsive=True, style= {'height':'400'}) + ,dcc.Graph(figure=plotly_fig_mean_src_dest, responsive=True, style= {'height':'400'}), False if (sites_src_State is not None and len(sites_src_State) > 0) else True] # a callback for the third section of a page. Filters out the dest sites with measurements to the source site selected diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index 5c4479e..54deaad 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -237,7 +237,7 @@ def layout(**other_unknown_query_strings): def colorMap(eventTypes): colors = ['#75cbe6', '#3b6d8f', '#75E6DA', '#189AB4', '#2E8BC0', '#145DA0', '#05445E', '#0C2D48', - '#5EACE0', '#d6ebff', '#498bcc', '#82cbf9', + '#5EACE0', '#d6ebff', '#498bcc', '#82cbf9', '#2894f8', '#fee838', '#3e6595', '#4adfe1', '#b14ae1' '#1f77b4', '#ff7f0e', '#2ca02c','#00224e', '#123570', '#3b496c', '#575d6d', '#707173', '#8a8678', '#a59c74', ] @@ -245,7 +245,7 @@ def colorMap(eventTypes): paletteDict = {} for i,e in enumerate(eventTypes): paletteDict[e] = colors[i] - + return paletteDict # a callback for the first section of a page with the list of Major alarms @@ -298,12 +298,11 @@ def update_output(start_date, end_date, sensitivity, sitesState): model = pickle.load(file) else: rawDf = createThrptDataset(start_date, end_date) + # rawDf = pd.read_csv('rawDf_sep_nov.csv') # train the ML model on the loaded dataset rawDf_onehot, model = trainMLmodel(rawDf) del rawDf - # rawDf = pd.read_csv('rawDf_sep_oct.csv') - # predict the data on the model and return the dataset with original alarms and the ML alarms global rawDf_onehot_plot, df_to_plot rawDf_onehot_plot, df_to_plot = predictData(rawDf_onehot, model) @@ -409,6 +408,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): # creating a global layout for the plots global layout, layout_mean layout = dict(xaxis_range=[start_date - timedelta(days=2), end_date + timedelta(days=2)], + height=400, showlegend=True, margin=dict(l=5, r=5, t=50, b=20), paper_bgcolor='rgba(0,0,0,0)', @@ -431,7 +431,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): df_to_plot_site = df_to_plot.loc[ (df_to_plot['src_site_' + allsites] == 1) | (df_to_plot['dest_site_' + allsites] == 1)] - fig = plt.figure(figsize=(14, 4)) + fig = plt.figure() plt.title('Bandwidth decreased alarms for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('throughput (Mbps)') @@ -452,7 +452,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig = mpl_to_plotly(fig) plotly_fig.update_layout(layout) - plotly_fig = dcc.Graph(figure=plotly_fig) + plotly_fig = dcc.Graph(figure= plotly_fig, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig = html.H4('Measurements for this site are present as a source or destination ONLY', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -460,7 +460,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean = {} if (sitesState is not None and len(sitesState) > 0): if (allsites in src_sites) & (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -469,7 +469,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean = mpl_to_plotly(fig_mean) plotly_fig_mean.update_layout(layout_mean) - plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean) + plotly_fig_mean = dcc.Graph(figure=plotly_fig_mean, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_mean = html.H4('Measurements for this site are present as a source or destination ONLY', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -481,7 +481,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['src_site_' + allsites] == 1)] df_to_plot_site = df_to_plot.loc[(df_to_plot['src_site_' + allsites] == 1)] - fig_src = plt.figure(figsize=(14, 4)) + fig_src = plt.figure() plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a source only') plt.xlabel('timestamp') plt.ylabel('throughput (Mbps)') @@ -502,7 +502,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_src = mpl_to_plotly(fig_src) plotly_fig_src.update_layout(layout) - plotly_fig_src = dcc.Graph(figure=plotly_fig_src) + plotly_fig_src = dcc.Graph(figure=plotly_fig_src, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_src = html.H4('No measurements for this site as a source', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -510,7 +510,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_src = {} if (sitesState is not None and len(sitesState) > 0): if (allsites in src_sites): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -519,7 +519,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_src = mpl_to_plotly(fig_mean) plotly_fig_mean_src.update_layout(layout_mean) - plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src) + plotly_fig_mean_src = dcc.Graph(figure=plotly_fig_mean_src, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_mean_src = html.H4('No measurements for this site as a source', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -531,7 +531,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): rawDf_onehot_site_plot = rawDf_onehot_plot.loc[(rawDf_onehot_plot['dest_site_' + allsites] == 1)] df_to_plot_site = df_to_plot.loc[(df_to_plot['dest_site_' + allsites] == 1)] - fig_dest = plt.figure(figsize=(14, 4)) + fig_dest = plt.figure() plt.title('Bandwidth decreased alarms for the ' + allsites + ' site as a destination only') plt.xlabel('timestamp') plt.ylabel('throughput (Mbps)') @@ -552,7 +552,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_dest = mpl_to_plotly(fig_dest) plotly_fig_dest.update_layout(layout) - plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest) + plotly_fig_dest = dcc.Graph(figure=plotly_fig_dest, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_dest = html.H4('No measurements for this site as a destination', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -560,7 +560,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_dest = {} if (sitesState is not None and len(sitesState) > 0): if (allsites in dest_sites): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Bandwidth decreased alarms aggregated by days for the ' + allsites + ' site') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -569,7 +569,7 @@ def update_analysis(start_date, end_date, allsites, src_sites, sitesState): plotly_fig_mean_dest = mpl_to_plotly(fig_mean) plotly_fig_mean_dest.update_layout(layout_mean) - plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest) + plotly_fig_mean_dest = dcc.Graph(figure=plotly_fig_mean_dest, responsive=True, style= {'height':'400'}) elif (sitesState is not None and len(sitesState) > 0): plotly_fig_mean_dest = html.H4('No measurements for this site as a destination', style={"padding-bottom": "1%", "padding-top": "1%"}) @@ -602,7 +602,7 @@ def update_output(src_site, dest_site, sites_src_State, sites_dest_State): df_to_plot_site = df_to_plot.loc[ (df_to_plot['src_site_' + src_site] == 1) & (df_to_plot['dest_site_' + dest_site] == 1)] - fig = plt.figure(figsize=(14, 4)) + fig = plt.figure() plt.title('Measurements and bandwidth decreased alarms for the ' + src_site + ' and ' + dest_site + ' sites pair') plt.xlabel('timestamp') plt.ylabel('throughput (Mbps)') @@ -624,7 +624,7 @@ def update_output(src_site, dest_site, sites_src_State, sites_dest_State): plotly_fig_mean_src_dest = {} if (sites_src_State is not None and len(sites_src_State) > 0) & (sites_dest_State is not None and len(sites_dest_State) > 0): - fig_mean = plt.figure(figsize=(14, 4)) + fig_mean = plt.figure() plt.title('Bandwidth decreased alarms aggregated by days for the ' + src_site + ' and ' + dest_site + ' sites pair') plt.xlabel('timestamp') plt.ylabel('number of daily alarms') @@ -632,7 +632,8 @@ def update_output(src_site, dest_site, sites_src_State, sites_dest_State): plotly_fig_mean_src_dest = mpl_to_plotly(fig_mean) plotly_fig_mean_src_dest.update_layout(layout_mean) - return [dcc.Graph(figure=plotly_fig_scr_dest),dcc.Graph(figure=plotly_fig_mean_src_dest), + return [dcc.Graph(figure=plotly_fig_scr_dest, responsive=True, style= {'height':'400'}) + ,dcc.Graph(figure=plotly_fig_mean_src_dest, responsive=True, style= {'height':'400'}), False if (sites_src_State is not None and len(sites_src_State) > 0) else True] # a callback for the third section of a page. Filters out the dest sites with measurements to the source site selected From 251d46b52a2666134bff5b41fe53767c50971884 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Thu, 30 Nov 2023 21:17:03 +0200 Subject: [PATCH 2/5] Added sorting of the dest sites Added sorting by the number of the (alarmed) measures in the third section of the page --- src/pages/packet-loss-ml.py | 3 +++ src/pages/throughput-ml.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 56ecac7..2180c58 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -656,6 +656,9 @@ def update_output(src_site, check, sites_src_State, sites_dest_State): is_src_sites = is_src.drop(['from', 'to', 'avg_value', 'doc_count_x', 'doc_count_y', 'tests_done', 'dt'], axis=1).sum( axis=0) is_src_sites = is_src_sites[is_src_sites.values != 0] + + # sort dest sites by the number of (alarmed) measurements + is_src_sites = is_src_sites.sort_values(ascending=False) commonprefix = 'dest_site_' dest_dropdown_items = [x[len(commonprefix):] for x, y in is_src_sites.items() if x.startswith(commonprefix)] diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index 54deaad..8755220 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -657,6 +657,9 @@ def update_output(src_site, check, sites_src_State, sites_dest_State): is_src_sites = is_src.drop(['from', 'to', 'dt', 'ipv6', 'value', 'doc_count', 'ipv_ipv4', 'ipv_ipv6'], axis=1).sum( axis=0) is_src_sites = is_src_sites[is_src_sites.values != 0] + + # sort dest sites by the number of (alarmed) measurements + is_src_sites = is_src_sites.sort_values(ascending=False) commonprefix = 'dest_site_' dest_dropdown_items = [x[len(commonprefix):] for x, y in is_src_sites.items() if x.startswith(commonprefix)] From fe6fdfdf55271f1458e809c84f9a9713c44a8b42 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Thu, 30 Nov 2023 21:45:42 +0200 Subject: [PATCH 3/5] Put back 60 days preloading for packetloss --- src/model/Updater.py | 2 +- src/pages/packet-loss-ml.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/model/Updater.py b/src/model/Updater.py index 4f05316..7ab64a0 100644 --- a/src/model/Updater.py +++ b/src/model/Updater.py @@ -199,7 +199,7 @@ def createLocation(location): @timer def storeThroughputDataAndModel(self): - now = hp.defaultTimeRange(days=60, datesOnly=True) + now = hp.defaultTimeRange(days=90, datesOnly=True) start_date = now[0] end_date = now[1] start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T23:59:59.000Z'] diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 2180c58..15020e7 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -37,7 +37,7 @@ def description(q=None): ) def layout(**other_unknown_query_strings): - now = hp.defaultTimeRange(days=90, datesOnly=True) + now = hp.defaultTimeRange(days=60, datesOnly=True) #Packet loss alarms page return \ @@ -275,11 +275,11 @@ def update_output(start_date, end_date, sensitivity, sitesState): if start_date and end_date: start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T00:01:00.000Z'] else: - start_date, end_date = hp.defaultTimeRange(days=90, datesOnly=True) + start_date, end_date = hp.defaultTimeRange(days=60, datesOnly=True) start_date, end_date = [f'{start_date}T00:01:00.000Z', f'{end_date}T00:01:00.000Z'] # check if the date range is default - start_date_check, end_date_check = hp.defaultTimeRange(days=90, datesOnly=True) + start_date_check, end_date_check = hp.defaultTimeRange(days=60, datesOnly=True) start_date_check, end_date_check = [f'{start_date_check}T00:01:00.000Z', f'{end_date_check}T00:01:00.000Z'] # query for the dataset From 2bde29e63c74caa0994328a57c804120c71c78df Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Thu, 30 Nov 2023 22:31:06 +0200 Subject: [PATCH 4/5] Preload onehot dataset Preload onehot dataset for packet loss page so it loads quicker --- src/ml/packet_loss_one_month_onehot.py | 2 +- src/ml/packet_loss_preprocess_data.py | 9 --------- src/model/Updater.py | 5 ++++- src/pages/packet-loss-ml.py | 9 +++++---- src/pages/throughput-ml.py | 3 ++- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/ml/packet_loss_one_month_onehot.py b/src/ml/packet_loss_one_month_onehot.py index 7de11fa..e95e23d 100644 --- a/src/ml/packet_loss_one_month_onehot.py +++ b/src/ml/packet_loss_one_month_onehot.py @@ -24,5 +24,5 @@ def one_month_data(plsDf_custom): first_month_n = round(len(plsDf_onehot.index)*percentile) - return plsDf_onehot.iloc[:first_month_n] + return plsDf_onehot.iloc[:first_month_n], plsDf_onehot diff --git a/src/ml/packet_loss_preprocess_data.py b/src/ml/packet_loss_preprocess_data.py index 313737b..343d714 100644 --- a/src/ml/packet_loss_preprocess_data.py +++ b/src/ml/packet_loss_preprocess_data.py @@ -3,15 +3,6 @@ @timer def packet_loss_preprocess(plsDf_custom_x, model): - #Preprocessing - plsDf_custom_x = plsDf_custom_x.drop(['src', 'dest', 'pair', 'src_host', 'dest_host'], axis=1) - plsDf_custom_x['dt'] = plsDf_custom_x['to'] - print('plsDf_custom_x', plsDf_custom_x.shape) - - plsDf_custom_x['tests_done'] = plsDf_custom_x['tests_done'].str.rstrip('%').astype('float') / 100.0 - - #one hot encode the dataset 'case loading a pre-encoded dataset apparently crashes a jupyter notebook kernel - plsDf_custom_x = pd.get_dummies(plsDf_custom_x, dtype=int) plsDf_custom_y = plsDf_custom_x['flag'] plsDf_custom_x = plsDf_custom_x.drop(['flag'], axis=1) diff --git a/src/model/Updater.py b/src/model/Updater.py index 7ab64a0..17d5047 100644 --- a/src/model/Updater.py +++ b/src/model/Updater.py @@ -230,7 +230,10 @@ def storePacketLossDataAndModel(self): self.pq.writeToFile(plsDf, f'{self.location}ml-datasets/packet_loss_Df.parquet') # onehot encode the whole dataset and leave only one month for further ML training - plsDf_onehot_month = one_month_data(plsDf) + plsDf_onehot_month, plsDf_onehot = one_month_data(plsDf) + self.pq.writeToFile(plsDf_onehot, f'{self.location}ml-datasets/packet_loss_onehot_Df.parquet') + del plsDf_onehot + # train the model on one month data model = packet_loss_train_model(plsDf_onehot_month) del plsDf_onehot_month diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 15020e7..9255e52 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -285,7 +285,7 @@ def update_output(start_date, end_date, sensitivity, sitesState): # query for the dataset if (start_date, end_date) == (start_date_check, end_date_check): pq = Parquet() - plsDf = pq.readFile(f'parquet/ml-datasets/packet_loss_Df.parquet') + plsDf_onehot = pq.readFile(f'parquet/ml-datasets/packet_loss_onehot_Df.parquet') model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_packet_loss.pkl' with open(model_pkl_file, 'rb') as file: @@ -293,7 +293,8 @@ def update_output(start_date, end_date, sensitivity, sitesState): else: plsDf = createPcktDataset(start_date, end_date) # onehot encode the whole dataset and leave only one month for further ML training - plsDf_onehot_month = one_month_data(plsDf) + plsDf_onehot_month, plsDf_onehot = one_month_data(plsDf) + del plsDf # train the model on one month data model = packet_loss_train_model(plsDf_onehot_month) @@ -303,8 +304,8 @@ def update_output(start_date, end_date, sensitivity, sitesState): # predict the alarms using ML model and return the dataset with original alarms and the ML alarms global plsDf_onehot_plot, df_to_plot - df_to_plot, plsDf_onehot_plot = packet_loss_preprocess(plsDf, model) - del plsDf, model + df_to_plot, plsDf_onehot_plot = packet_loss_preprocess(plsDf_onehot, model) + del model, plsDf_onehot print('+++++++ plsDf_onehot_plot', plsDf_onehot_plot.shape) # create a list with all sites as sources diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index 8755220..8b6cbcc 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -290,7 +290,7 @@ def update_output(start_date, end_date, sensitivity, sitesState): # query for the dataset if (start_date, end_date) == (start_date_check, end_date_check): pq = Parquet() - rawDf = pq.readFile('parquet/ml-datasets/throughput_Df.parquet') + # rawDf = pq.readFile('parquet/ml-datasets/throughput_Df.parquet') rawDf_onehot = pq.readFile('parquet/ml-datasets/throughput_onehot_Df.parquet') model_pkl_file = f'parquet/ml-datasets/XGB_Classifier_model_throughput.pkl' @@ -306,6 +306,7 @@ def update_output(start_date, end_date, sensitivity, sitesState): # predict the data on the model and return the dataset with original alarms and the ML alarms global rawDf_onehot_plot, df_to_plot rawDf_onehot_plot, df_to_plot = predictData(rawDf_onehot, model) + del rawDf_onehot # create a list with all sites as sources src_sites = rawDf_onehot_plot.loc[:, rawDf_onehot_plot.columns.str.startswith("src_site")].columns.values.tolist() From 1f9331aaab27d1ddb2e6a23a415429870c94f9ab Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Fri, 1 Dec 2023 12:58:26 +0200 Subject: [PATCH 5/5] Fixed a possible bug with the last two plots Fixed a possible bug with the plots on the third section of a page, where they could overlap each other. --- src/pages/packet-loss-ml.py | 4 ++-- src/pages/throughput-ml.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pages/packet-loss-ml.py b/src/pages/packet-loss-ml.py index 9255e52..c95544f 100644 --- a/src/pages/packet-loss-ml.py +++ b/src/pages/packet-loss-ml.py @@ -215,7 +215,7 @@ def layout(**other_unknown_query_strings): html.Hr(className="my-2"), html.Br(), dcc.Loading( - html.Div(id='results-table-pl-dest-src'), + html.Div(id='results-table-pl-dest-src', style={'height':'400px'}), style={'height': '0.5rem'}, color='#00245A') ], className="m-2", style={"padding-top": "1%"}), dbc.Row([ @@ -223,7 +223,7 @@ def layout(**other_unknown_query_strings): html.Hr(className="my-2"), html.Br(), dcc.Loading( - html.Div(id='results-table-pl-mean-dest-src'), + html.Div(id='results-table-pl-mean-dest-src', style={'height':'400px'}), style={'height': '0.5rem'}, color='#00245A') ], className="m-2"), ], className="p-2 site boxwithshadow page-cont mb-2 g-0", align="center"), diff --git a/src/pages/throughput-ml.py b/src/pages/throughput-ml.py index 8b6cbcc..6a75386 100644 --- a/src/pages/throughput-ml.py +++ b/src/pages/throughput-ml.py @@ -218,7 +218,7 @@ def layout(**other_unknown_query_strings): html.Hr(className="my-2"), html.Br(), dcc.Loading( - html.Div(id='results-table-thrpt-dest-src'), + html.Div(id='results-table-thrpt-dest-src', style={'height':'400px'}), style={'height': '0.5rem'}, color='#00245A') ], className="m-2", style={"padding-top": "1%"}), dbc.Row([ @@ -226,7 +226,7 @@ def layout(**other_unknown_query_strings): html.Hr(className="my-2"), html.Br(), dcc.Loading( - html.Div(id='results-table-thrpt-mean-dest-src'), + html.Div(id='results-table-thrpt-mean-dest-src', style={'height':'400px'}), style={'height': '0.5rem'}, color='#00245A') ], className="m-2"), ], className="p-2 site boxwithshadow page-cont mb-2 g-0", align="center"),