Skip to content

Commit

Permalink
Merge pull request #27 from Techtonique/clustering2
Browse files Browse the repository at this point in the history
Clustering2
  • Loading branch information
thierrymoudiki authored Apr 18, 2024
2 parents fe2b99b + 6950466 commit b2276fc
Show file tree
Hide file tree
Showing 17 changed files with 440 additions and 60 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# version 0.13.0

- add clustering to `LSBoostRegressor`, `LSBoostClassifier`, and `AdaOpt`

# version 0.12.3

- add prediction intervals to `LSBoostRegressor` (split conformal prediction,
Expand Down
102 changes: 50 additions & 52 deletions examples/adaopt_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@
print(obj.score(X_test, y_test))
print(time()-start)

obj = ms.AdaOpt(n_jobs=4, type_dist="euclidean", verbose=1,
n_clusters_input=2)
#obj = ms.AdaOpt()
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)


# data 2
wine = load_wine()
Expand All @@ -52,6 +62,13 @@
print(obj.score(X_test, y_test))
print(time()-start)

obj = ms.AdaOpt(n_clusters_input=3)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

# data 3
iris = load_iris()
Expand All @@ -70,6 +87,14 @@
print(obj.score(X_test, y_test))
print(time()-start)

obj = ms.AdaOpt(n_clusters_input=3)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)


# data 4
digits = load_digits()
Expand Down Expand Up @@ -105,57 +130,30 @@
print(obj.score(X_test, y_test))
print(time()-start)

obj = ms.AdaOpt(n_iterations=50,
learning_rate=0.3,
reg_lambda=0.1,
reg_alpha=0.5,
eta=0.01,
gamma=0.01,
tolerance=1e-4,
row_sample=1,
k=1, n_clusters_input=5,
n_jobs=3, type_dist="euclidean", verbose=1)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

# # data 5

# zip_dir = "/Users/moudiki/Documents/Papers/adaopt/data/zip"
# data_train = pd.read_csv(zip_dir + "/zip_train.csv",
# index_col=0)
# data_test = pd.read_csv(zip_dir + "/zip_test.csv",
# index_col=0)

# y_train = data_train.y.values
# y_test = data_test.y.values
# X_train = np.ascontiguousarray(np.delete(data_train.values, 0, axis=1))
# X_test = np.ascontiguousarray(np.delete(data_test.values, 0, axis=1))

# obj = ms.AdaOpt(type_dist="euclidean-f",
# k=1, row_sample=1)
# start = time()
# obj.fit(X_train, y_train)
# print(time()-start)
# start = time()
# print(obj.score(X_test, y_test))
# print(time()-start)


# # data 6

# letter_dir = "/Users/moudiki/Documents/Papers/adaopt/data/letter"
# data_letter = pd.read_csv(letter_dir + "/letter_recognition.csv",
# index_col=0)


# y = data_letter.V1.values
# X = np.asarray(np.ascontiguousarray(np.delete(data_letter.values, 0,
# axis=1)), dtype='float64')

# np.random.seed(1323)
# X_train, X_test, y_train, y_test = train_test_split(X, y,
# test_size=0.3)


# obj = ms.AdaOpt(type_dist="euclidean-f",
# k=1, row_sample=1)
# start = time()
# obj.fit(X_train, y_train)
# print(time()-start)
# start = time()
# print(obj.score(X_test, y_test))
# print(time()-start)

# start = time()
# preds = obj.predict(X_test)
# print(time() - start)
# print(metrics.classification_report(preds, y_test))
# with clustering
obj = ms.AdaOpt(n_clusters=25, k=1,
n_clusters_input=3)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

55 changes: 53 additions & 2 deletions examples/lsboost_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
#ridge

print("\n")
print("ridge -----")
print("lsboost ridge -----")
print("\n")

print("\n")
print("breast_cancer data -----")

# data 1
breast_cancer = load_breast_cancer()
X = breast_cancer.data
Expand All @@ -42,6 +45,17 @@

print(obj.obj['loss'])

obj = ms.LSBoostClassifier(tolerance=1e-2, n_clusters=2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
obj = ms.LSBoostClassifier(backend="gpu")
print(obj.get_params())
Expand All @@ -55,6 +69,9 @@
print(obj.obj['loss'])

# data 2
print("\n")
print("wine data -----")

wine = load_wine()
Z = wine.data
t = wine.target
Expand All @@ -73,6 +90,17 @@

print(obj.obj['loss'])

obj = ms.LSBoostClassifier(n_clusters=3)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
obj = ms.LSBoostClassifier(backend="gpu")
print(obj.get_params())
Expand All @@ -86,6 +114,9 @@
print(obj.obj['loss'])

# data 3
print("\n")
print("iris data -----")

iris = load_iris()
Z = iris.data
t = iris.target
Expand Down Expand Up @@ -119,10 +150,13 @@
#lasso

print("\n")
print("lasso -----")
print("lsboost lasso -----")
print("\n")

# data 1
print("\n")
print("breast_cancer data -----")

breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
Expand Down Expand Up @@ -152,6 +186,9 @@
# print(time()-start)

# data 2
print("\n")
print("wine data -----")

wine = load_wine()
Z = wine.data
t = wine.target
Expand Down Expand Up @@ -179,6 +216,9 @@
# print(time()-start)

# data 3
print("\n")
print("iris data -----")

iris = load_iris()
Z = iris.data
t = iris.target
Expand All @@ -196,6 +236,17 @@
print(obj.score(X_test, y_test))
print(time()-start)

obj = ms.LSBoostClassifier(solver="lasso",
n_clusters=3,
clustering_method="gmm")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
# obj = ms.LSBoostClassifier(backend="gpu", solver="lasso")
# print(obj.get_params())
Expand Down
29 changes: 29 additions & 0 deletions examples/lsboost_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
print("\n")

# data 2

print("\n")
print("diabetes data -----")

diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target
Expand All @@ -42,6 +46,17 @@

print(obj.obj['loss'])

obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9, n_clusters=2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)

print(obj.obj['loss'])

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
obj = ms.LSBoostRegressor(backend="gpu")
print(obj.get_params())
Expand All @@ -61,6 +76,9 @@
print("\n")

# data 2
print("\n")
print("diabetes data -----")

diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target
Expand All @@ -80,6 +98,17 @@

print(obj.obj['loss'])

obj = ms.LSBoostRegressor(solver="lasso", n_clusters=2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)

print(obj.obj['loss'])

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
# obj = ms.LSBoostRegressor(backend="gpu", solver="lasso")
# print(obj.get_params())
Expand Down
36 changes: 36 additions & 0 deletions mlsauce.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Metadata-Version: 2.1
Name: mlsauce
Version: 0.13.0
Summary: Miscellaneous Statistical/Machine Learning tools
Maintainer: T. Moudiki
Maintainer-email: [email protected]
License: BSD3 Clause Clear
Platform: linux
Platform: macosx
Platform: windows
Classifier: Development Status :: 2 - Pre-Alpha
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: BSD License
Classifier: Natural Language :: English
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Requires-Python: >=3.5
License-File: LICENSE
Requires-Dist: numpy
Requires-Dist: Cython
Requires-Dist: joblib
Requires-Dist: pandas
Requires-Dist: requests
Requires-Dist: scikit-learn
Requires-Dist: scipy
Requires-Dist: tqdm
Requires-Dist: jax
Requires-Dist: jaxlib
Provides-Extra: alldeps
Requires-Dist: numpy>=1.13.0; extra == "alldeps"
Requires-Dist: scipy>=0.19.0; extra == "alldeps"

Miscellaneous Statistical/Machine Learning tools
Loading

0 comments on commit b2276fc

Please sign in to comment.