Skip to content
This repository has been archived by the owner on Aug 19, 2024. It is now read-only.

[WIP] benchmarks for nested parallelism in sklearn #3

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions scikit-learn/bench_cross-valid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import print_function

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals.joblib import Memory
from timeit import default_timer as timer

mem = Memory(cachedir='cache')

@mem.cache
def make_data():
return datasets.make_classification(n_samples=70000, n_features=200, random_state=0)

X, y = make_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

clf = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)

t0 = timer()
cross_val_score(clf, X_train, y_train, n_jobs=5, cv=5)
t1 = timer()
print(t1-t0)

33 changes: 33 additions & 0 deletions scikit-learn/bench_grid_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import print_function

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals.joblib import Memory
from timeit import default_timer as timer

mem = Memory(cachedir='cache')

@mem.cache
def make_data():
return datasets.make_classification(n_samples=700, n_features=200, random_state=0)

X, y = make_data()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

# Set the parameters by cross-validation
parameters = {
'n_estimators': [10, 50, 100, 500],
'max_depth': [5, 10, None],
'max_features': [.1, .5, .8, 1.]
}

clf = GridSearchCV(RandomForestClassifier(n_jobs=-1), parameters, cv=5, n_jobs=-1,
scoring='precision_macro' % score)
t0 = timer()
clf.fit(X_train, y_train)
t1 = timer()
print(t1-t0)