From 57e6c1cef70aea7307e8a9eefd4f8b18596220f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20M=C3=A9ndez=20Hern=C3=A1ndez?= Date: Tue, 3 Oct 2023 12:57:17 +0200 Subject: [PATCH 1/4] feat(tests/test_data.py): Add histogram test --- tests/test_data.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_data.py b/tests/test_data.py index 36e0898..88f40db 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -42,6 +42,24 @@ def test_data_stats_datetime(self): self.assertEqual(stats['min'], datetime.datetime.fromisoformat('2021-03-22T11:00:00.000000+00:00')) self.assertEqual(stats['range'].total_seconds(), 3600) + def test_get_hist(self): + hist = opl.data.get_hist([0, 1, 1, 2, 2, 1, 1, 0]) + self.assertEqual( + hist, + [ + ((0.0, 0.2), 2.0), + ((0.2, 0.4), 0.0), + ((0.4, 0.6000000000000001), 0.0), + ((0.6000000000000001, 0.8), 0.0), + ((0.8, 1.0), 0.0), + ((1.0, 1.2000000000000002), 4.0), + ((1.2000000000000002, 1.4000000000000001), 0.0), + ((1.4000000000000001, 1.6), 0.0), + ((1.6, 1.8), 0.0), + ((1.8, 2.0), 2.0), + ], + ) + def test_get_rps(self): rps_vals = opl.data.get_rps([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], bucket_size=10, granularity=1) self.assertEqual(len(rps_vals), 10) From d6f47f5bb51d68ce56740ac6568e977a38c8695e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20M=C3=A9ndez=20Hern=C3=A1ndez?= Date: Tue, 3 Oct 2023 12:59:33 +0200 Subject: [PATCH 2/4] feat(opl/data.py): Provide and use non-NumPy histogram function --- core/opl/data.py | 35 ++++++++++++++++++++++++++++++++++- opl/data.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/core/opl/data.py b/core/opl/data.py index efb5f16..fd81651 100644 --- a/core/opl/data.py +++ b/core/opl/data.py @@ -156,6 +156,39 @@ def percentile(data, percent): return d0 + d1 +def create_bins(data, precision, bins_number=10): + bins = [] + borders = [] + min_data = min(data) + max_data = max(data) + bin_size = (max_data - min_data) / bins_number + + borders.append(min_data) + for x in range(bins_number): + bins.append((min_data + (bin_size * x), min_data + (bin_size * (x + 1)))) + borders.append(min_data + (bin_size * (x + 1))) + + return bins, borders + + +def find_bin(value, bins): + for i in range(0, len(bins)): + if bins[i][0] <= value < bins[i][1]: + return i + return -1 + + +def histogram(data, precision=1): + bins, borders = create_bins(data, precision) + counts = [0] * len(bins) + + for value in data: + bin_index = find_bin(value, bins) + counts[bin_index] += 1 + + return counts, borders + + def data_stats(data): if len(data) == 0: return {"samples": 0} @@ -201,7 +234,7 @@ def data_stats(data): def get_hist(data): - hist_counts, hist_borders = numpy.histogram(data) + hist_counts, hist_borders = histogram(data) hist_counts = [float(i) for i in hist_counts] hist_borders = [float(i) for i in hist_borders] out = [] diff --git a/opl/data.py b/opl/data.py index efb5f16..fd81651 100644 --- a/opl/data.py +++ b/opl/data.py @@ -156,6 +156,39 @@ def percentile(data, percent): return d0 + d1 +def create_bins(data, precision, bins_number=10): + bins = [] + borders = [] + min_data = min(data) + max_data = max(data) + bin_size = (max_data - min_data) / bins_number + + borders.append(min_data) + for x in range(bins_number): + bins.append((min_data + (bin_size * x), min_data + (bin_size * (x + 1)))) + borders.append(min_data + (bin_size * (x + 1))) + + return bins, borders + + +def find_bin(value, bins): + for i in range(0, len(bins)): + if bins[i][0] <= value < bins[i][1]: + return i + return -1 + + +def histogram(data, precision=1): + bins, borders = create_bins(data, precision) + counts = [0] * len(bins) + + for value in data: + bin_index = find_bin(value, bins) + counts[bin_index] += 1 + + return counts, borders + + def data_stats(data): if len(data) == 0: return {"samples": 0} @@ -201,7 +234,7 @@ def data_stats(data): def get_hist(data): - hist_counts, hist_borders = numpy.histogram(data) + hist_counts, hist_borders = histogram(data) hist_counts = [float(i) for i in hist_counts] hist_borders = [float(i) for i in hist_borders] out = [] From 7ebc37de15f52f4122b9129c8fc7ce6e9b34af7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20M=C3=A9ndez=20Hern=C3=A1ndez?= Date: Tue, 3 Oct 2023 13:32:34 +0200 Subject: [PATCH 3/4] feat(opl/data.py): Remove `numpy` as dependency --- core/opl/data.py | 1 - opl/data.py | 1 - 2 files changed, 2 deletions(-) diff --git a/core/opl/data.py b/core/opl/data.py index fd81651..69bc3a7 100644 --- a/core/opl/data.py +++ b/core/opl/data.py @@ -3,7 +3,6 @@ import math import statistics import time -import numpy class WaitForDataAndSave: diff --git a/opl/data.py b/opl/data.py index fd81651..69bc3a7 100644 --- a/opl/data.py +++ b/opl/data.py @@ -3,7 +3,6 @@ import math import statistics import time -import numpy class WaitForDataAndSave: From 2f23c9411cf99900e6d6e1e671b201d53488f627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20M=C3=A9ndez=20Hern=C3=A1ndez?= Date: Wed, 4 Oct 2023 13:37:17 +0200 Subject: [PATCH 4/4] cosmetic(tests/test_data.py): Appease `flake8` and `black` style --- tests/test_data.py | 53 ++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/tests/test_data.py b/tests/test_data.py index 88f40db..ecb24c3 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -7,40 +7,45 @@ class TestSkelet(unittest.TestCase): - def test_data_stats(self): stats = opl.data.data_stats([0, 1, 1, 2, 2, 1, 1, 0]) - self.assertEqual(stats['samples'], 8) - self.assertEqual(stats['min'], 0) - self.assertEqual(stats['mean'], 1) - self.assertEqual(stats['max'], 2) - self.assertEqual(stats['range'], 2) - self.assertEqual(stats['percentile25'], 0.75) - self.assertEqual(stats['percentile75'], 1.25) - self.assertEqual(stats['iqr'], 0.5) + self.assertEqual(stats["samples"], 8) + self.assertEqual(stats["min"], 0) + self.assertEqual(stats["mean"], 1) + self.assertEqual(stats["max"], 2) + self.assertEqual(stats["range"], 2) + self.assertEqual(stats["percentile25"], 0.75) + self.assertEqual(stats["percentile75"], 1.25) + self.assertEqual(stats["iqr"], 0.5) def test_data_stats_empty(self): stats = opl.data.data_stats([]) - self.assertEqual(stats['samples'], 0) + self.assertEqual(stats["samples"], 0) self.assertEqual(len(stats), 1) def test_data_stats_short(self): stats = opl.data.data_stats([1]) - self.assertEqual(stats['samples'], 1) - self.assertEqual(stats['stdev'], 0.0) + self.assertEqual(stats["samples"], 1) + self.assertEqual(stats["stdev"], 0.0) def test_data_stats_datetime(self): data = [ - datetime.datetime.fromisoformat('2021-03-22T12:00:00.000000+00:00'), - datetime.datetime.fromisoformat('2021-03-22T11:50:00.000000+00:00'), - datetime.datetime.fromisoformat('2021-03-22T11:30:00.000000+00:00'), - datetime.datetime.fromisoformat('2021-03-22T11:00:00.000000+00:00'), + datetime.datetime.fromisoformat("2021-03-22T12:00:00.000000+00:00"), + datetime.datetime.fromisoformat("2021-03-22T11:50:00.000000+00:00"), + datetime.datetime.fromisoformat("2021-03-22T11:30:00.000000+00:00"), + datetime.datetime.fromisoformat("2021-03-22T11:00:00.000000+00:00"), ] stats = opl.data.data_stats(data) - self.assertEqual(stats['samples'], 4) - self.assertEqual(stats['max'], datetime.datetime.fromisoformat('2021-03-22T12:00:00.000000+00:00')) - self.assertEqual(stats['min'], datetime.datetime.fromisoformat('2021-03-22T11:00:00.000000+00:00')) - self.assertEqual(stats['range'].total_seconds(), 3600) + self.assertEqual(stats["samples"], 4) + self.assertEqual( + stats["max"], + datetime.datetime.fromisoformat("2021-03-22T12:00:00.000000+00:00"), + ) + self.assertEqual( + stats["min"], + datetime.datetime.fromisoformat("2021-03-22T11:00:00.000000+00:00"), + ) + self.assertEqual(stats["range"].total_seconds(), 3600) def test_get_hist(self): hist = opl.data.get_hist([0, 1, 1, 2, 2, 1, 1, 0]) @@ -61,10 +66,12 @@ def test_get_hist(self): ) def test_get_rps(self): - rps_vals = opl.data.get_rps([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], bucket_size=10, granularity=1) + rps_vals = opl.data.get_rps( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], bucket_size=10, granularity=1 + ) self.assertEqual(len(rps_vals), 10) - self.assertEqual(sum(rps_vals)/len(rps_vals), 1.0) + self.assertEqual(sum(rps_vals) / len(rps_vals), 1.0) rps_vals = opl.data.get_rps([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) self.assertEqual(len(rps_vals), 5) - self.assertEqual(sum(rps_vals)/len(rps_vals), 1.0) + self.assertEqual(sum(rps_vals) / len(rps_vals), 1.0)