From 45ac0b2ed44dee8dbf720b5b349be1f720d36dab Mon Sep 17 00:00:00 2001 From: mohammedelgammal Date: Wed, 11 Dec 2024 03:55:19 +0200 Subject: [PATCH 1/4] refactor[DataTomeAnalysis]!: Optimize Median method from TC O(N log N) to O(N) using IntroSort Algorithm Median now uses introSelect algorithm merging between quickSelect and medianOfMedians Algorithm feat[DataTomeUtils]: Adding dt_min helper function to get minimum value between two values and swap to swap two values resolves (#18) --- src/DataTomeAnalysis.h | 88 +++++++++++++++++++++++++++++++++--------- src/DataTomeUtils.h | 14 ++++++- 2 files changed, 82 insertions(+), 20 deletions(-) diff --git a/src/DataTomeAnalysis.h b/src/DataTomeAnalysis.h index d74991f..5ef2572 100644 --- a/src/DataTomeAnalysis.h +++ b/src/DataTomeAnalysis.h @@ -35,26 +35,26 @@ class DataTomeAnalysis : public DataTomeMvAvg { return result; } - TypeOfArray median() { - TypeOfArray median = 0; - size_t current_size = this->point_count(); - - TypeOfArray *temp = - (typeof(temp))malloc(current_size * sizeof(typeof(temp))); - - memcpy(temp, this->_array, current_size * sizeof(TypeOfArray)); - - qsort(temp, current_size, sizeof(TypeOfArray), sort_ascend); - - if (current_size % 2 == 0) { - median = (temp[current_size / 2 - 1] + temp[current_size / 2]) / 2; - } else { - median = temp[current_size / 2]; + double median() { + double calculated_median; + size_t current_size = this->point_count(), + m = current_size / 2; + TypeOfArray *temp_array = (TypeOfArray *)malloc(sizeof(TypeOfArray) * current_size); + + memcpy(temp_array, this->_array, sizeof(TypeOfArray) * current_size); + + if (current_size % 2 == 0) + { + size_t m1 = quickSelect(0, current_size - 1, m - 1, temp_array), + m2 = quickSelect(0, current_size - 1, m, temp_array); + calculated_median = (m1 + m2) / 2.0; } - - free(temp); - - return median; + else + { + calculated_median = quickSelect(0, current_size - 1, m, temp_array); + } + free(temp_array); + return calculated_median; } TypeOfArray lowest_mode() { @@ -275,6 +275,56 @@ class DataTomeAnalysis : public DataTomeMvAvg { return sqrt(partial_var(partial_id) / this->partial_point_count(partial_id)); } + + private: + TypeOfArray medianOfMedians(int l, int r, TypeOfArray nums[]) { + int k = 5; + size_t size = r - l + 1, + medians_size = (size + k - 1) / k; + TypeOfArray medians[medians_size]; + for (int i = l, m_count = 0; i <= r; i += k, m_count++) + { + int left = i, right = min(i + k, r + 1), + mid = left + (right - left) / 2; + qsort(nums + left, right - left, sizeof(TypeOfArray), sort_ascend); + medians[m_count] = (nums[mid]); + } + qsort(medians, medians_size, sizeof(TypeOfArray), sort_ascend); + return medians[medians_size / 2]; + }; + + TypeOfArray quickSelect(int l, int r, int m, TypeOfArray nums[]) { + int pivot = medianOfMedians(l, r, nums), + index = r, + p = l; + for (int i = l; i <= r; i++) + { + if (nums[i] == pivot) + { + index = i; + swap(nums[index], nums[r]); + break; + } + } + for (int i = l; i < r; i++) + { + if (nums[i] < nums[r]) + { + swap(nums[i], nums[p]); + p += 1; + } + } + swap(nums[p], nums[r]); + if (p > m) + { + return quickSelect(l, p - 1, m, nums); + } + else if (p < m) + { + return quickSelect(p + 1, r, m, nums); + } + return nums[p]; + }; }; #endif // DATA_TOME_ANALYSIS_H \ No newline at end of file diff --git a/src/DataTomeUtils.h b/src/DataTomeUtils.h index eb5cb8c..309bdd0 100644 --- a/src/DataTomeUtils.h +++ b/src/DataTomeUtils.h @@ -14,4 +14,16 @@ int sort_ascend(const void *cmp1, const void *cmp2) { return (int)a - b; } -#endif // DATA_TOME_UTILS_H \ No newline at end of file +template +void swap(TypeOfArray &a, TypeOfArray &b) { + TypeOfArray temp = a; + a = b; + b = temp; +} + +template +auto dt_min(const T1 &a, const T2 &b) -> decltype(a < b ? a : b) { + return (a < b) ? a : b; +} + +#endif // DATA_TOME_UTILS_H \ No newline at end of file From 2fc4734d5354b1305077864b22c6108309b57192 Mon Sep 17 00:00:00 2001 From: mohammedelgammal Date: Mon, 16 Dec 2024 17:56:18 +0200 Subject: [PATCH 2/4] fix[DataTomeAnalysis]: renaming min to dt_min to use utility minimum function instead of cpp std lib --- src/DataTomeAnalysis.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTomeAnalysis.h b/src/DataTomeAnalysis.h index 5ef2572..c6aab36 100644 --- a/src/DataTomeAnalysis.h +++ b/src/DataTomeAnalysis.h @@ -284,7 +284,7 @@ class DataTomeAnalysis : public DataTomeMvAvg { TypeOfArray medians[medians_size]; for (int i = l, m_count = 0; i <= r; i += k, m_count++) { - int left = i, right = min(i + k, r + 1), + int left = i, right = dt_min(i + k, r + 1), mid = left + (right - left) / 2; qsort(nums + left, right - left, sizeof(TypeOfArray), sort_ascend); medians[m_count] = (nums[mid]); From d074fbe318b20843b6b22139a6f3b4d9331eb3c6 Mon Sep 17 00:00:00 2001 From: mohammedelgammal Date: Mon, 16 Dec 2024 20:26:57 +0200 Subject: [PATCH 3/4] fix:[DataTomeUtils]: Refactoring dt_min utility function --- src/DataTomeUtils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTomeUtils.h b/src/DataTomeUtils.h index 309bdd0..fb4b380 100644 --- a/src/DataTomeUtils.h +++ b/src/DataTomeUtils.h @@ -21,8 +21,8 @@ void swap(TypeOfArray &a, TypeOfArray &b) { b = temp; } -template -auto dt_min(const T1 &a, const T2 &b) -> decltype(a < b ? a : b) { +template +const T &dt_min(const T &a, const T &b) { return (a < b) ? a : b; } From 0d7971e25c0fc2f57625a0de7d5bbe1a9704a39c Mon Sep 17 00:00:00 2001 From: mohammedelgammal Date: Fri, 27 Dec 2024 02:40:17 +0200 Subject: [PATCH 4/4] fix[DataTomeUtils]: Refactor sort_ascend Utility Function to precisely sort decimal numbers resolves #24 --- src/DataTomeUtils.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/DataTomeUtils.h b/src/DataTomeUtils.h index fb4b380..542675d 100644 --- a/src/DataTomeUtils.h +++ b/src/DataTomeUtils.h @@ -11,7 +11,11 @@ int sort_ascend(const void *cmp1, const void *cmp2) { TypeOfArray b = *((TypeOfArray *)cmp2); TypeOfArray a = *((TypeOfArray *)cmp1); - return (int)a - b; + if (a > b) + return 1; + else if (a < b) + return -1; + return 0; } template @@ -21,8 +25,8 @@ void swap(TypeOfArray &a, TypeOfArray &b) { b = temp; } -template -const T &dt_min(const T &a, const T &b) { +template +const TypeOfArray &dt_min(const TypeOfArray &a, const TypeOfArray &b) { return (a < b) ? a : b; }