Skip to content

Commit

Permalink
ENH: OneDAL python ifaces for Random Forest in preview namespace [Bat…
Browse files Browse the repository at this point in the history
…ch] (#949)
  • Loading branch information
samir-nasibli authored Feb 24, 2023
1 parent ce2f5dd commit f7fc14a
Show file tree
Hide file tree
Showing 22 changed files with 2,613 additions and 153 deletions.
116 changes: 61 additions & 55 deletions daal4py/sklearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#===============================================================================
# ===============================================================================
# Copyright 2014 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#===============================================================================
# ===============================================================================

import numpy as np

Expand Down Expand Up @@ -64,16 +64,15 @@ def _to_absolute_max_features(
"explicitly set `max_features=1.0` or remove this "
"parameter as it is also the default value for "
"RandomForestRegressors and ExtraTreesRegressors.",
FutureWarning,
)
FutureWarning, )
return max(1, int(np.sqrt(n_features))
) if is_classification else n_features
if max_features == 'sqrt':
return max(1, int(np.sqrt(n_features)))
if max_features == "log2":
return max(1, int(np.log2(n_features)))
allowed_string_values = '"sqrt" or "log2"' if sklearn_check_version('1.3') \
else '"auto", "sqrt" or "log2"'
allowed_string_values = '"sqrt" or "log2"' if sklearn_check_version(
'1.3') else '"auto", "sqrt" or "log2"'
raise ValueError(
'Invalid value for max_features. Allowed string '
f'values are {allowed_string_values}.')
Expand Down Expand Up @@ -277,7 +276,8 @@ def _daal_fit_classifier(self, X, y, sample_weight=None):
self.oob_score_ = dfc_trainingResult.outOfBagErrorAccuracy[0][0]
self.oob_decision_function_ = dfc_trainingResult.outOfBagErrorDecisionFunction
if self.oob_decision_function_.shape[-1] == 1:
self.oob_decision_function_ = self.oob_decision_function_.squeeze(axis=-1)
self.oob_decision_function_ = self.oob_decision_function_.squeeze(
axis=-1)

return self

Expand Down Expand Up @@ -462,7 +462,8 @@ def _daal_fit_regressor(self, X, y, sample_weight=None):

if self.oob_score:
self.oob_score_ = dfr_trainingResult.outOfBagErrorR2[0][0]
self.oob_prediction_ = dfr_trainingResult.outOfBagErrorPrediction.squeeze(axis=1)
self.oob_prediction_ = dfr_trainingResult.outOfBagErrorPrediction.squeeze(
axis=1)
if self.oob_prediction_.shape[-1] == 1:
self.oob_prediction_ = self.oob_prediction_.squeeze(axis=-1)

Expand Down Expand Up @@ -592,27 +593,28 @@ class RandomForestClassifier(RandomForestClassifier_original):
}

if sklearn_check_version('1.0'):
def __init__(self,
n_estimators=100,
criterion="gini",
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.,
max_features='sqrt' if sklearn_check_version('1.1') else 'auto',
max_leaf_nodes=None,
min_impurity_decrease=0.,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
class_weight=None,
ccp_alpha=0.0,
max_samples=None,
maxBins=256,
minBinSize=1):
def __init__(
self,
n_estimators=100,
criterion="gini",
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.,
max_features='sqrt' if sklearn_check_version('1.1') else 'auto',
max_leaf_nodes=None,
min_impurity_decrease=0.,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
class_weight=None,
ccp_alpha=0.0,
max_samples=None,
maxBins=256,
minBinSize=1):
super(RandomForestClassifier, self).__init__(
n_estimators=n_estimators,
criterion=criterion,
Expand Down Expand Up @@ -818,8 +820,7 @@ def predict_proba(self, X):
if sklearn_check_version('1.0'):
@deprecated(
"Attribute `n_features_` was deprecated in version 1.0 and will be "
"removed in 1.2. Use `n_features_in_` instead."
)
"removed in 1.2. Use `n_features_in_` instead.")
@property
def n_features_(self):
return self.n_features_in_
Expand Down Expand Up @@ -858,7 +859,9 @@ def _estimators_(self):
for i in range(self.n_estimators):
est_i = clone(est)
est_i.set_params(
random_state=random_state_checked.randint(np.iinfo(np.int32).max))
random_state=random_state_checked.randint(
np.iinfo(
np.int32).max))
if sklearn_check_version('1.0'):
est_i.n_features_in_ = self.n_features_in_
else:
Expand Down Expand Up @@ -907,26 +910,28 @@ class RandomForestRegressor(RandomForestRegressor_original):
}

if sklearn_check_version('1.0'):
def __init__(self,
n_estimators=100, *,
criterion="squared_error",
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.,
max_features=1.0 if sklearn_check_version('1.1') else 'auto',
max_leaf_nodes=None,
min_impurity_decrease=0.,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
ccp_alpha=0.0,
max_samples=None,
maxBins=256,
minBinSize=1):
def __init__(
self,
n_estimators=100,
*,
criterion="squared_error",
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.,
max_features=1.0 if sklearn_check_version('1.1') else 'auto',
max_leaf_nodes=None,
min_impurity_decrease=0.,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
ccp_alpha=0.0,
max_samples=None,
maxBins=256,
minBinSize=1):
super(RandomForestRegressor, self).__init__(
n_estimators=n_estimators,
criterion=criterion,
Expand Down Expand Up @@ -1071,8 +1076,7 @@ def predict(self, X):
if sklearn_check_version('1.0'):
@deprecated(
"Attribute `n_features_` was deprecated in version 1.0 and will be "
"removed in 1.2. Use `n_features_in_` instead."
)
"removed in 1.2. Use `n_features_in_` instead.")
@property
def n_features_(self):
return self.n_features_in_
Expand Down Expand Up @@ -1109,7 +1113,9 @@ def _estimators_(self):
for i in range(self.n_estimators):
est_i = clone(est)
est_i.set_params(
random_state=random_state_checked.randint(np.iinfo(np.int32).max))
random_state=random_state_checked.randint(
np.iinfo(
np.int32).max))
if sklearn_check_version('1.0'):
est_i.n_features_in_ = self.n_features_in_
else:
Expand Down
3 changes: 2 additions & 1 deletion onedal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
import onedal._onedal_py_host as _backend
_is_dpc_backend = False

__all__ = ['decomposition', 'neighbors', 'primitives', 'svm']

__all__ = ['decomposition', 'ensemble', 'neighbors', 'primitives', 'svm']

if _is_dpc_backend:
__all__.append('spmd')
Expand Down
18 changes: 18 additions & 0 deletions onedal/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@

#pragma once

#define OVERFLOW_CHECK_BY_ADDING(type, op1, op2) \
{ \
volatile type r = (op1) + (op2); \
r -= (op1); \
if (!(r == (op2))) \
throw std::runtime_error("Integer overflow by adding"); \
}

#define OVERFLOW_CHECK_BY_MULTIPLICATION(type, op1, op2) \
{ \
if (!(0 == (op1)) && !(0 == (op2))) { \
volatile type r = (op1) * (op2); \
r /= (op1); \
if (!(r == (op2))) \
throw std::runtime_error("Integer overflow by multiplication"); \
} \
}

#include "onedal/common/dispatch_utils.hpp"
#include "onedal/common/instantiate_utils.hpp"
#include "onedal/common/pybind11_helpers.hpp"
Expand Down
9 changes: 6 additions & 3 deletions onedal/dal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@ ONEDAL_PY_INIT_MODULE(spmd_policy);
ONEDAL_PY_INIT_MODULE(table);

/* primitives */
ONEDAL_PY_INIT_MODULE(get_tree);
ONEDAL_PY_INIT_MODULE(covariance);
ONEDAL_PY_INIT_MODULE(linear_kernel);
ONEDAL_PY_INIT_MODULE(rbf_kernel);
ONEDAL_PY_INIT_MODULE(polynomial_kernel);
ONEDAL_PY_INIT_MODULE(sigmoid_kernel);

/* algorithms */
ONEDAL_PY_INIT_MODULE(ensemble);
ONEDAL_PY_INIT_MODULE(decomposition);
#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100
ONEDAL_PY_INIT_MODULE(linear_model);
Expand All @@ -61,12 +63,13 @@ PYBIND11_MODULE(_onedal_py_host, m) {
init_rbf_kernel(m);
init_polynomial_kernel(m);
init_sigmoid_kernel(m);

init_get_tree(m);

init_decomposition(m);
#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100
init_ensemble(m);
#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100
init_linear_model(m);
#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100
#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100
init_neighbors(m);
init_svm(m);
}
Expand Down
19 changes: 19 additions & 0 deletions onedal/ensemble/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ===============================================================================
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================

from .forest import RandomForestClassifier, RandomForestRegressor

__all__ = ['RandomForestClassifier', 'RandomForestRegressor']
Loading

0 comments on commit f7fc14a

Please sign in to comment.