From 25de26d346860a9330c109cdf0557c52f4a6cf2f Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Fri, 16 Jun 2023 19:34:48 +0000 Subject: [PATCH 1/8] max rank outside comprehensive list --- src/triage/component/catwalk/baselines/rankers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/triage/component/catwalk/baselines/rankers.py b/src/triage/component/catwalk/baselines/rankers.py index d8686e63b..3b2386bdb 100644 --- a/src/triage/component/catwalk/baselines/rankers.py +++ b/src/triage/component/catwalk/baselines/rankers.py @@ -181,7 +181,8 @@ def predict_proba(self, x): prev = rec # normalize to 0 to 1 range - x['score'] = [r/max(ranks) for r in ranks] + max_rank = ranks[-1] + x['score'] = [r/max_rank for r in ranks] # reset back to original sort order, calculate "score" for "0 class" scores_1 = x.sort_index()['score'].values From 44f78d7fb2829af66146798ada4e99fdcbc92544 Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Fri, 23 Jun 2023 20:47:33 +0000 Subject: [PATCH 2/8] update runs-on github workflow --- .github/workflows/build-mkdocs.yaml | 2 +- .github/workflows/publish-to-pypi.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-mkdocs.yaml b/.github/workflows/build-mkdocs.yaml index 817ed2692..4f7b12194 100644 --- a/.github/workflows/build-mkdocs.yaml +++ b/.github/workflows/build-mkdocs.yaml @@ -8,7 +8,7 @@ on: jobs: docs: name: Build Docs and Serve to Github Pages - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@master with: diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index bfd151b8f..e7745b4a0 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -5,7 +5,7 @@ on: push jobs: build-n-publish: name: Build and publish python distributions to PyPI - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@master - name: Set up Python 3.8 From c35fe98fd280882ee2158e396a8e03682ca6dffb Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Fri, 23 Jun 2023 20:52:05 +0000 Subject: [PATCH 3/8] updating package versions --- requirement/dev.txt | 2 +- requirement/include/build.txt | 2 +- requirement/include/test-management.txt | 2 +- requirement/main.txt | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirement/dev.txt b/requirement/dev.txt index 45f5418a4..9e3dfcb5a 100644 --- a/requirement/dev.txt +++ b/requirement/dev.txt @@ -1,7 +1,7 @@ -r include/build.txt bumpversion==0.6.0 mkdocs==1.3.0 -pymdown-extensions==9.4 +pymdown-extensions==10.0.1 mkdocs-material==8.2.12 mkdocstrings==0.18.1 black==22.3.0 diff --git a/requirement/include/build.txt b/requirement/include/build.txt index e75560c7c..03417781a 100644 --- a/requirement/include/build.txt +++ b/requirement/include/build.txt @@ -1 +1 @@ -wheel==0.37.1 +wheel==0.38.2 diff --git a/requirement/include/test-management.txt b/requirement/include/test-management.txt index b4b6c778c..a756109af 100644 --- a/requirement/include/test-management.txt +++ b/requirement/include/test-management.txt @@ -1,3 +1,3 @@ -codecov==2.1.12 +codecov==2.1.13 coverage>=4.4 tox==3.25.0 diff --git a/requirement/main.txt b/requirement/main.txt index d4044d746..f59266abf 100644 --- a/requirement/main.txt +++ b/requirement/main.txt @@ -13,11 +13,11 @@ Dickens==1.0.1 signalled-timeout==1.0.0 wrapt==1.14.0 argcmdr==0.7.0 -sqlparse==0.4.2 +sqlparse==0.4.4 pebble==4.6.3 adjustText==0.7.3 graphviz==0.20 -requests==2.27.1 +requests==2.31.0 coloredlogs==15.0.1 verboselogs==1.7 s3fs==0.4.2 # pyup: ignore From 6a4e1eb456ec7981773b669d3083e5ef312ea596 Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Tue, 25 Jul 2023 15:33:08 +0000 Subject: [PATCH 4/8] pytests for normalization multi rank feature --- src/tests/catwalk_tests/test_baselines.py | 51 ++++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/src/tests/catwalk_tests/test_baselines.py b/src/tests/catwalk_tests/test_baselines.py index db917b562..3e1a05f80 100644 --- a/src/tests/catwalk_tests/test_baselines.py +++ b/src/tests/catwalk_tests/test_baselines.py @@ -4,6 +4,7 @@ import pytest from unittest import TestCase +from numpy.testing import assert_array_equal from triage.component.catwalk.baselines.rankers import PercentileRankOneFeature from triage.component.catwalk.baselines.rankers import BaselineRankMultiFeature @@ -15,12 +16,14 @@ @pytest.fixture(scope="class") def data(request): - X_train = pd.DataFrame( + X_train = pd.DataFrame( { "x1": [0, 1, 2, 56, 25, 8, -3, 89], "x2": [0, 23, 1, 6, 5, 3, 18, 7], "x3": [1, 12, 5, -6, 2, 5, 3, -3], "x4": [6, 13, 4, 5, 35, 6, 43, 74], + "x6": [400, 400, 400, 300, 300, 300, 300, 300], + "x7": [1, 12, 12, -6, -6, 5, 5, -3], } ) y_train = [0, 1, 0, 1, 1, 1, 3, 0] @@ -30,6 +33,8 @@ def data(request): "x2": [6, -1, 1, 24, 5, 3, 18, 39], "x3": [1, 7, 4, 57, 2, 5, 3, 2], "x4": [7, 3, 6, 39, 35, 6, 43, -6], + "x6": [305, 305, 305, 305, 401, 401, 401, 401], + "x7": [1, 7, 7, 57, 2, 5, 3, 2], } ) y_test = [1, 3, 0, 0, 0, 0, 0, 1] @@ -41,7 +46,6 @@ def data(request): "y_test": y_test, } - @pytest.fixture(scope="class") def rules(request): request.cls.rules = ["x1 > 0", "x2 <= 1"] @@ -93,14 +97,16 @@ def test_scores_align_with_ranks(): assert not scores_align_with_ranks([1,2,2,3], [0,0.5,0.7,1.0]) + @pytest.mark.usefixtures("data") class TestRankOneFeature(TestCase): + def test_fit(self): ranker = PercentileRankOneFeature(feature="x3") assert ranker.feature_importances_ is None ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) np.testing.assert_array_equal( - ranker.feature_importances_, np.array([0, 0, 1, 0]) + ranker.feature_importances_, np.array([0, 0, 1, 0, 0, 0]) ) def test_ranking_on_unavailable_feature_raises_error(self): @@ -129,7 +135,7 @@ def test_fit(self): assert ranker.feature_importances_ is None ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) np.testing.assert_array_equal( - ranker.feature_importances_, np.array([0, 0, 1, 0]) + ranker.feature_importances_, np.array([0, 0, 1, 0, 0, 0]) ) def test_ranking_on_unavailable_feature_raises_error(self): @@ -165,6 +171,41 @@ def test_predict_proba_multi_feature(self): assert scores_align_with_ranks(expected_ranks, results[:,1]) + def test_predict_proba_no_ties(self): + for direction_value in [True, False]: + rules = [{'feature': 'x2', 'low_value_high_score': direction_value}] + + ranker = BaselineRankMultiFeature(rules=rules) + ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) + results_new = ranker.predict_proba(self.data["X_test"]) + results_deprecated = ranker.predict_proba_deprecated(self.data["X_test"]) + + assert_array_equal(results_new, results_deprecated) + + + def test_predict_proba_half_ties(self): + for direction_value in [True, False]: + rules = [{'feature': 'x6', 'low_value_high_score': direction_value}] + + ranker = BaselineRankMultiFeature(rules=rules) + ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) + results_new = ranker.predict_proba(self.data["X_test"]) + results_deprecated = ranker.predict_proba_deprecated(self.data["X_test"]) + + assert_array_equal(results_new, results_deprecated) + + + def test_predict_proba_some_ties(self): + for direction_value in [True, False]: + rules = [{'feature': 'x7', 'low_value_high_score': direction_value}] + + ranker = BaselineRankMultiFeature(rules=rules) + ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) + results_new = ranker.predict_proba(self.data["X_test"]) + results_deprecated = ranker.predict_proba_deprecated(self.data["X_test"]) + + assert_array_equal(results_new, results_deprecated) + @pytest.mark.parametrize('operator', OPERATOR_METHODS.keys()) def test_get_operator_method(operator): @@ -191,7 +232,7 @@ def test_fit(self): assert thresholder.feature_importances_ is None thresholder.fit(x=self.data["X_train"], y=self.data["y_train"]) np.testing.assert_array_equal( - thresholder.feature_importances_, np.array([1, 1, 0, 0]) + thresholder.feature_importances_, np.array([1, 1, 0, 0, 0, 0]) ) def test_rule_with_unavailable_feature_raises_error(self): From 882c58a0a87bb1c5d07271fd99a75319a31d19ef Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Tue, 25 Jul 2023 15:33:37 +0000 Subject: [PATCH 5/8] adding predict_proba_deprecated --- .../component/catwalk/baselines/rankers.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/triage/component/catwalk/baselines/rankers.py b/src/triage/component/catwalk/baselines/rankers.py index 3b2386bdb..385b5763e 100644 --- a/src/triage/component/catwalk/baselines/rankers.py +++ b/src/triage/component/catwalk/baselines/rankers.py @@ -189,3 +189,33 @@ def predict_proba(self, x): scores_0 = np.array([1-s for s in scores_1]) return np.array([scores_0, scores_1]).transpose() + + def predict_proba_deprecated(self, x): + """ Generate the rank scores and return these. + """ + # reduce x to the selected set of features + x = x[self.all_feature_names].reset_index(drop=True) + + x = x.sort_values(self.all_feature_names, ascending=self.all_sort_directions) + + # initialize curr_rank to -1 so the first record will have rank 0 (hence "score" + # will range from 0 to 1) + ranks = [] + curr_rank = -1 + prev = [] + + # calculate ranks over sorted records, giving ties the same rank + for rec in x.values: + if not np.array_equal(prev, rec): + curr_rank += 1 + ranks.append(curr_rank) + prev = rec + + # normalize to 0 to 1 range + x['score'] = [r/max(ranks) for r in ranks] + + # reset back to original sort order, calculate "score" for "0 class" + scores_1 = x.sort_index()['score'].values + scores_0 = np.array([1-s for s in scores_1]) + + return np.array([scores_0, scores_1]).transpose() \ No newline at end of file From cc0d5332c5b31779ff0d47e7dab1f08282f62f95 Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Tue, 25 Jul 2023 18:48:59 +0000 Subject: [PATCH 6/8] removing predict_proba_deprecated --- .../component/catwalk/baselines/rankers.py | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/src/triage/component/catwalk/baselines/rankers.py b/src/triage/component/catwalk/baselines/rankers.py index 385b5763e..3b2386bdb 100644 --- a/src/triage/component/catwalk/baselines/rankers.py +++ b/src/triage/component/catwalk/baselines/rankers.py @@ -189,33 +189,3 @@ def predict_proba(self, x): scores_0 = np.array([1-s for s in scores_1]) return np.array([scores_0, scores_1]).transpose() - - def predict_proba_deprecated(self, x): - """ Generate the rank scores and return these. - """ - # reduce x to the selected set of features - x = x[self.all_feature_names].reset_index(drop=True) - - x = x.sort_values(self.all_feature_names, ascending=self.all_sort_directions) - - # initialize curr_rank to -1 so the first record will have rank 0 (hence "score" - # will range from 0 to 1) - ranks = [] - curr_rank = -1 - prev = [] - - # calculate ranks over sorted records, giving ties the same rank - for rec in x.values: - if not np.array_equal(prev, rec): - curr_rank += 1 - ranks.append(curr_rank) - prev = rec - - # normalize to 0 to 1 range - x['score'] = [r/max(ranks) for r in ranks] - - # reset back to original sort order, calculate "score" for "0 class" - scores_1 = x.sort_index()['score'].values - scores_0 = np.array([1-s for s in scores_1]) - - return np.array([scores_0, scores_1]).transpose() \ No newline at end of file From 7ae7d2c37b8bdaa17c4f418328d04e88f22bb60a Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Tue, 25 Jul 2023 18:49:31 +0000 Subject: [PATCH 7/8] adding predict_proba_deprecated logic to test --- src/tests/catwalk_tests/test_baselines.py | 35 +++++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/tests/catwalk_tests/test_baselines.py b/src/tests/catwalk_tests/test_baselines.py index 3e1a05f80..17c0f81a2 100644 --- a/src/tests/catwalk_tests/test_baselines.py +++ b/src/tests/catwalk_tests/test_baselines.py @@ -50,6 +50,35 @@ def data(request): def rules(request): request.cls.rules = ["x1 > 0", "x2 <= 1"] +def predict_proba_deprecated(ranker, x): + """ Generate the rank scores and return these. + """ + # reduce x to the selected set of features + x = x[ranker.all_feature_names].reset_index(drop=True) + + x = x.sort_values(ranker.all_feature_names, ascending=ranker.all_sort_directions) + + # initialize curr_rank to -1 so the first record will have rank 0 (hence "score" + # will range from 0 to 1) + ranks = [] + curr_rank = -1 + prev = [] + + # calculate ranks over sorted records, giving ties the same rank + for rec in x.values: + if not np.array_equal(prev, rec): + curr_rank += 1 + ranks.append(curr_rank) + prev = rec + + # normalize to 0 to 1 range + x['score'] = [r/max(ranks) for r in ranks] + + # reset back to original sort order, calculate "score" for "0 class" + scores_1 = x.sort_index()['score'].values + scores_0 = np.array([1-s for s in scores_1]) + + return np.array([scores_0, scores_1]).transpose() def scores_align_with_ranks(expected_ranks, returned_scores): ''' @@ -178,7 +207,7 @@ def test_predict_proba_no_ties(self): ranker = BaselineRankMultiFeature(rules=rules) ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) results_new = ranker.predict_proba(self.data["X_test"]) - results_deprecated = ranker.predict_proba_deprecated(self.data["X_test"]) + results_deprecated = predict_proba_deprecated(ranker, self.data["X_test"]) assert_array_equal(results_new, results_deprecated) @@ -190,7 +219,7 @@ def test_predict_proba_half_ties(self): ranker = BaselineRankMultiFeature(rules=rules) ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) results_new = ranker.predict_proba(self.data["X_test"]) - results_deprecated = ranker.predict_proba_deprecated(self.data["X_test"]) + results_deprecated = predict_proba_deprecated(ranker, self.data["X_test"]) assert_array_equal(results_new, results_deprecated) @@ -202,7 +231,7 @@ def test_predict_proba_some_ties(self): ranker = BaselineRankMultiFeature(rules=rules) ranker.fit(x=self.data["X_train"], y=self.data["y_train"]) results_new = ranker.predict_proba(self.data["X_test"]) - results_deprecated = ranker.predict_proba_deprecated(self.data["X_test"]) + results_deprecated = predict_proba_deprecated(ranker, self.data["X_test"]) assert_array_equal(results_new, results_deprecated) From 531953a34801785eb8a7476af0a143ea7cb95785 Mon Sep 17 00:00:00 2001 From: Liliana Millan Date: Fri, 28 Jul 2023 17:25:51 +0000 Subject: [PATCH 8/8] specific ubuntu version --- .github/workflows/build-mkdocs.yaml | 2 +- .github/workflows/publish-to-pypi.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-mkdocs.yaml b/.github/workflows/build-mkdocs.yaml index 4f7b12194..817ed2692 100644 --- a/.github/workflows/build-mkdocs.yaml +++ b/.github/workflows/build-mkdocs.yaml @@ -8,7 +8,7 @@ on: jobs: docs: name: Build Docs and Serve to Github Pages - runs-on: ubuntu-latest + runs-on: ubuntu-18.04 steps: - uses: actions/checkout@master with: diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index e7745b4a0..bfd151b8f 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -5,7 +5,7 @@ on: push jobs: build-n-publish: name: Build and publish python distributions to PyPI - runs-on: ubuntu-latest + runs-on: ubuntu-18.04 steps: - uses: actions/checkout@master - name: Set up Python 3.8