From b207768fa0451857d827be1097ca4bd127f6f108 Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Sun, 5 Nov 2023 18:34:42 -0800 Subject: [PATCH 1/9] Update load_boston in README.md --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7db3459..1f221f5 100644 --- a/README.md +++ b/README.md @@ -32,11 +32,15 @@ Probabilistic regression example on the Boston housing dataset: ```python from ngboost import NGBRegressor -from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error -X, Y = load_boston(True) +#Load Boston housing dataset +data_url = "http://lib.stat.cmu.edu/datasets/boston" +raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) +X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) +Y = raw_df.values[1::2, 2] + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBRegressor().fit(X_train, Y_train) From 0154ac5880e159f73ef65519c653c11e80a846a5 Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:00:47 -0800 Subject: [PATCH 2/9] Remove load_boston in regression.py --- examples/regression.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/regression.py b/examples/regression.py index ee06ade..50ed0dc 100644 --- a/examples/regression.py +++ b/examples/regression.py @@ -1,4 +1,3 @@ -from sklearn.datasets import load_boston from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split @@ -7,7 +6,12 @@ if __name__ == "__main__": - X, Y = load_boston(return_X_y=True) + #Load Boston housing dataset + data_url = "http://lib.stat.cmu.edu/datasets/boston" + raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) + X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) + Y = raw_df.values[1::2, 2] + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBRegressor(Dist=Normal).fit(X_train, Y_train) From 17108402129fcc98ffc2bf65877b869f6c1a8888 Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:01:21 -0800 Subject: [PATCH 3/9] Remove load_boston in survival.py --- examples/survival.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/survival.py b/examples/survival.py index c830db2..90037a2 100644 --- a/examples/survival.py +++ b/examples/survival.py @@ -1,5 +1,4 @@ import numpy as np -from sklearn.datasets import load_boston from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split @@ -8,7 +7,12 @@ if __name__ == "__main__": - X, Y = load_boston(return_X_y=True) + #Load Boston housing dataset + data_url = "http://lib.stat.cmu.edu/datasets/boston" + raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) + X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) + Y = raw_df.values[1::2, 2] + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) # introduce administrative censoring From ec669918677c47b01d0055856abbc6e914912ac9 Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:07:26 -0800 Subject: [PATCH 4/9] Add import in regression.py --- examples/regression.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/regression.py b/examples/regression.py index 50ed0dc..735d47e 100644 --- a/examples/regression.py +++ b/examples/regression.py @@ -1,6 +1,9 @@ from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split +import numpy as np +import pandas as pd + from ngboost import NGBRegressor from ngboost.distns import Normal From 44b505835754b7f0272ac0b752bec02824a08c2f Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:07:49 -0800 Subject: [PATCH 5/9] Add import in survival.py --- examples/survival.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/survival.py b/examples/survival.py index 90037a2..4af3ea4 100644 --- a/examples/survival.py +++ b/examples/survival.py @@ -1,4 +1,6 @@ import numpy as np +import pandas as pd + from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split From 2ca3c9f298416adcf75a55acc95d084b79c4f445 Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Mon, 6 Nov 2023 13:15:36 -0800 Subject: [PATCH 6/9] Add pandas in pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 233df06..c6f9f12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ numpy = ">=1.21.2" scipy = ">=1.7.2" tqdm = ">=4.3" lifelines = ">=0.25" +pandas = ">=1.3" [tool.poetry.dev-dependencies] pytest = "^6.1.2" From 36ffc514c520385f59184b1c5dd9986ac2c95398 Mon Sep 17 00:00:00 2001 From: Yixing <108294040+jyx-su@users.noreply.github.com> Date: Wed, 8 Nov 2023 13:10:02 -0800 Subject: [PATCH 7/9] Remove pandas from pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c6f9f12..233df06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ numpy = ">=1.21.2" scipy = ">=1.7.2" tqdm = ">=4.3" lifelines = ">=0.25" -pandas = ">=1.3" [tool.poetry.dev-dependencies] pytest = "^6.1.2" From 428d9e9e2a7cd5ead3ece6d2b2522874fae6b66b Mon Sep 17 00:00:00 2001 From: jyx-su <108294040+jyx-su@users.noreply.github.com> Date: Wed, 8 Nov 2023 13:21:05 -0800 Subject: [PATCH 8/9] black format --- examples/regression.py | 5 ++--- examples/survival.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/regression.py b/examples/regression.py index 735d47e..88122cd 100644 --- a/examples/regression.py +++ b/examples/regression.py @@ -8,13 +8,12 @@ from ngboost.distns import Normal if __name__ == "__main__": - - #Load Boston housing dataset + # Load Boston housing dataset data_url = "http://lib.stat.cmu.edu/datasets/boston" raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) Y = raw_df.values[1::2, 2] - + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBRegressor(Dist=Normal).fit(X_train, Y_train) diff --git a/examples/survival.py b/examples/survival.py index 4af3ea4..db59caf 100644 --- a/examples/survival.py +++ b/examples/survival.py @@ -8,13 +8,12 @@ from ngboost.distns import LogNormal if __name__ == "__main__": - - #Load Boston housing dataset + # Load Boston housing dataset data_url = "http://lib.stat.cmu.edu/datasets/boston" raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) Y = raw_df.values[1::2, 2] - + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) # introduce administrative censoring From c6e4c88427368902a4ed023a795958182ef9ec7f Mon Sep 17 00:00:00 2001 From: jyx-su <108294040+jyx-su@users.noreply.github.com> Date: Wed, 8 Nov 2023 22:01:57 -0800 Subject: [PATCH 9/9] make lint --- examples/regression.py | 5 ++--- examples/survival.py | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/regression.py b/examples/regression.py index 88122cd..971a7b2 100644 --- a/examples/regression.py +++ b/examples/regression.py @@ -1,8 +1,7 @@ -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import train_test_split - import numpy as np import pandas as pd +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split from ngboost import NGBRegressor from ngboost.distns import Normal diff --git a/examples/survival.py b/examples/survival.py index db59caf..32be7a5 100644 --- a/examples/survival.py +++ b/examples/survival.py @@ -1,6 +1,5 @@ import numpy as np import pandas as pd - from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split