Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] remove convert_to_supported function #2217

Open
wants to merge 54 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 53 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
2bb22f7
make the switch
icfaust Dec 9, 2024
2a46dd0
further refinement
icfaust Dec 9, 2024
ce2dbc3
further refinement
icfaust Dec 9, 2024
066c86a
extract convert_to_supported
icfaust Dec 9, 2024
4b41760
move around to_table because of parameters
icfaust Dec 9, 2024
2af260e
kMeans fix
icfaust Dec 9, 2024
f0360be
alias namespace
icfaust Dec 9, 2024
70c32b2
change serialization for new setup
icfaust Dec 9, 2024
1e08b6c
add default argument
icfaust Dec 9, 2024
940cff9
add default argument
icfaust Dec 9, 2024
0885e6f
surefire fix
icfaust Dec 9, 2024
896f8ca
forgotten ;
icfaust Dec 9, 2024
95d4946
comment out
icfaust Dec 9, 2024
cee12f2
fixes
icfaust Dec 9, 2024
ed6e39f
fixes2
icfaust Dec 9, 2024
fb4c70f
try again
icfaust Dec 9, 2024
d0f8655
transition for sparse support
icfaust Dec 9, 2024
f04f66d
fixes attempt 3
icfaust Dec 9, 2024
1c8eda0
fixes attempt 4
icfaust Dec 9, 2024
4d909a5
add header
icfaust Dec 9, 2024
47f6f6e
change for pybind11 deprecation
icfaust Dec 9, 2024
acdfe23
misread suggestion
icfaust Dec 9, 2024
1559938
fix dec_ref
icfaust Dec 9, 2024
9e79bab
make constexpr
icfaust Dec 9, 2024
e3d9468
fix init
icfaust Dec 9, 2024
d30692a
hopefully will start working
icfaust Dec 9, 2024
438b0b1
make explicit
icfaust Dec 9, 2024
50a2ab8
try again
icfaust Dec 9, 2024
3e697af
try again
icfaust Dec 9, 2024
5c7c1ce
try again
icfaust Dec 9, 2024
f2e88b2
try again
icfaust Dec 9, 2024
65ac9ad
fix dbscan
icfaust Dec 9, 2024
22ea481
try to fix kmeans
icfaust Dec 9, 2024
e187d33
fix policy queue
icfaust Dec 9, 2024
81df289
fix incrementalRidge
icfaust Dec 9, 2024
3d4451a
add test
icfaust Dec 9, 2024
8f89577
fix mistake in #2180
icfaust Dec 9, 2024
610437a
fix neighbors
icfaust Dec 9, 2024
bfe7f35
move test to include warning check
icfaust Dec 9, 2024
eb5944d
move test to include warning check
icfaust Dec 9, 2024
37be22a
final changes
icfaust Dec 9, 2024
b484751
Merge branch 'uxlfoundation:main' into dev/remove_convert_to_supported
icfaust Dec 9, 2024
79b2eba
Update data_conversion.cpp
icfaust Dec 9, 2024
8cf9156
work on memory leak possibility
icfaust Dec 9, 2024
98680cf
remove from inconclusive values
icfaust Dec 9, 2024
48bddb7
Update data_conversion.cpp
icfaust Dec 9, 2024
a8d48fe
Update data_conversion.cpp
icfaust Dec 10, 2024
3679366
Update data_conversion.cpp
icfaust Dec 10, 2024
1dc7d41
fix spmd
icfaust Dec 10, 2024
b6c31ea
missed one
icfaust Dec 10, 2024
cd04cc2
fixes for IncPCA
icfaust Dec 10, 2024
0c08fa8
move DummySyclQueue
icfaust Dec 11, 2024
764cd7a
remove unused import
icfaust Dec 11, 2024
d1bd343
Merge branch 'uxlfoundation:main' into dev/remove_convert_to_supported
icfaust Dec 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions onedal/basic_statistics/basic_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,12 @@ void init_partial_compute_result(py::module_& m) {
if (t.size() != 6)
throw std::runtime_error("Invalid state!");
result_t res;
if (py::cast<int>(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0].ptr()));
if (py::cast<int>(t[1].attr("size")) != 0) res.set_partial_min(convert_to_table(t[1].ptr()));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_max(convert_to_table(t[2].ptr()));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[3].ptr()));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_sum_squares(convert_to_table(t[4].ptr()));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_sum_squares_centered(convert_to_table(t[5].ptr()));
if (py::cast<int>(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0]));
if (py::cast<int>(t[1].attr("size")) != 0) res.set_partial_min(convert_to_table(t[1]));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_max(convert_to_table(t[2]));
if (py::cast<int>(t[3].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[3]));
if (py::cast<int>(t[4].attr("size")) != 0) res.set_partial_sum_squares(convert_to_table(t[4]));
if (py::cast<int>(t[5].attr("size")) != 0) res.set_partial_sum_squares_centered(convert_to_table(t[5]));

return res;
}
Expand Down
7 changes: 3 additions & 4 deletions onedal/basic_statistics/basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import numpy as np

from ..common._base import BaseEstimator
from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table
from ..utils import _is_csr
from ..utils.validation import _check_array

Expand Down Expand Up @@ -81,11 +81,10 @@ def fit(self, data, sample_weight=None, queue=None):
if sample_weight is not None:
sample_weight = _check_array(sample_weight, ensure_2d=False)

data, sample_weight = _convert_to_supported(policy, data, sample_weight)
is_single_dim = data.ndim == 1
data_table, weights_table = to_table(data, sample_weight)
data_table, weights_table = to_table(data, sample_weight, queue=queue)

dtype = data.dtype
dtype = data_table.dtype
raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr)
for opt, raw_value in raw_result.items():
value = from_table(raw_value).ravel()
Expand Down
5 changes: 2 additions & 3 deletions onedal/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from daal4py.sklearn._utils import get_dtype

from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table
from ..utils import _check_array
from .basic_statistics import BaseBasicStatistics

Expand Down Expand Up @@ -106,7 +106,6 @@ def partial_fit(self, X, weights=None, queue=None):
"""
self._queue = queue
policy = self._get_policy(queue, X)
X, weights = _convert_to_supported(policy, X, weights)

X = _check_array(
X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
Expand All @@ -123,7 +122,7 @@ def partial_fit(self, X, weights=None, queue=None):
dtype = get_dtype(X)
self._onedal_params = self._get_onedal_params(False, dtype=dtype)

X_table, weights_table = to_table(X, weights)
X_table, weights_table = to_table(X, weights, queue=queue)
self._partial_result = self._get_backend(
"basic_statistics",
None,
Expand Down
13 changes: 4 additions & 9 deletions onedal/cluster/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from ..common._base import BaseEstimator
from ..common._mixin import ClusterMixin
from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table
from ..utils import _check_array


Expand Down Expand Up @@ -60,15 +60,10 @@ def _fit(self, X, y, sample_weight, module, queue):
policy = self._get_policy(queue, X)
X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
sample_weight = make2d(sample_weight) if sample_weight is not None else None
X = make2d(X)
X_table, sample_weight_table = to_table(X, sample_weight, queue=queue)

types = [np.float32, np.float64]
if get_dtype(X) not in types:
X = X.astype(np.float64)
X = _convert_to_supported(policy, X)
dtype = get_dtype(X)
params = self._get_onedal_params(dtype)
result = module.compute(policy, params, to_table(X), to_table(sample_weight))
params = self._get_onedal_params(X_table.dtype)
result = module.compute(policy, params, X_table, sample_weight_table)

self.labels_ = from_table(result.responses).ravel()
if result.core_observation_indices is not None:
Expand Down
18 changes: 6 additions & 12 deletions onedal/cluster/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from ..common._base import BaseEstimator as onedal_BaseEstimator
from ..common._mixin import ClusterMixin, TransformerMixin
from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table
from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr


Expand Down Expand Up @@ -205,8 +205,7 @@ def _init_centroids_onedal(
assert centers.shape[1] == X_table.column_count
# KMeans is implemented on both CPU and GPU for Dense and CSR data
# The original policy can be used here
centers = _convert_to_supported(policy, centers)
centers_table = to_table(centers)
centers_table = to_table(centers, queue=getattr(policy, "_queue", None))
else:
raise TypeError("Unsupported type of the `init` value")

Expand Down Expand Up @@ -240,8 +239,7 @@ def _init_centroids_sklearn(self, X, init, random_state, policy, dtype=np.float3
f"callable, got '{ init }' instead."
)

centers = _convert_to_supported(policy, centers)
return to_table(centers)
return to_table(centers, queue=getattr(policy, "_queue", None))

def _fit_backend(
self, X_table, centroids_table, module, policy, dtype=np.float32, is_csr=False
Expand All @@ -266,14 +264,11 @@ def _fit(self, X, module, queue=None):
X = _check_array(
X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False
)
X = _convert_to_supported(policy, X)
dtype = get_dtype(X)
X_table = to_table(X)
X_table = to_table(X, queue=queue)
dtype = X_table.dtype

self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype)

params = self._get_onedal_params(is_csr, dtype)

self.n_features_in_ = X_table.column_count

best_model, best_n_iter = None, None
Expand Down Expand Up @@ -381,8 +376,7 @@ def _predict(self, X, module, queue=None, result_options=None):
is_csr = _is_csr(X)

policy = self._get_policy(queue, X)
X = _convert_to_supported(policy, X)
X_table = to_table(X)
X_table = to_table(X, queue=queue)
params = self._get_onedal_params(is_csr, X_table.dtype, result_options)

result = module.infer(policy, params, self.model_, X_table)
Expand Down
15 changes: 6 additions & 9 deletions onedal/cluster/kmeans_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from daal4py.sklearn._utils import daal_check_version, get_dtype

from ..common._base import BaseEstimator as onedal_BaseEstimator
from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table
from ..utils import _check_array

if daal_check_version((2023, "P", 200)):
Expand Down Expand Up @@ -57,19 +57,16 @@ def _get_onedal_params(self, dtype=np.float32):
"cluster_count": self.cluster_count,
}

def _get_params_and_input(self, X, policy):
def _get_params_and_input(self, X, queue):
X = _check_array(
X,
dtype=[np.float64, np.float32],
accept_sparse="csr",
force_all_finite=False,
)

X = _convert_to_supported(policy, X)

dtype = get_dtype(X)
params = self._get_onedal_params(dtype)
return (params, to_table(X), dtype)
X = to_table(X, queue=queue)
params = self._get_onedal_params(X.dtype)
return (params, X, X.dtype)

def _compute_raw(self, X_table, module, policy, dtype=np.float32):
params = self._get_onedal_params(dtype)
Expand All @@ -83,7 +80,7 @@ def _compute(self, X, module, queue):
# oneDAL KMeans Init for sparse data does not have GPU support
if issparse(X):
policy = self._get_policy(None, None)
_, X_table, dtype = self._get_params_and_input(X, policy)
_, X_table, dtype = self._get_params_and_input(X, queue)

centroids = self._compute_raw(X_table, module, policy, dtype)

Expand Down
6 changes: 3 additions & 3 deletions onedal/covariance/covariance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ inline void init_partial_compute_result(pybind11::module_& m) {
if (t.size() != 3)
throw std::runtime_error("Invalid state!");
result_t res;
if (py::cast<int>(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0].ptr()));
if (py::cast<int>(t[1].attr("size")) != 0) res.set_partial_crossproduct(convert_to_table(t[1].ptr()));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[2].ptr()));
if (py::cast<int>(t[0].attr("size")) != 0) res.set_partial_n_rows(convert_to_table(t[0]));
if (py::cast<int>(t[1].attr("size")) != 0) res.set_partial_crossproduct(convert_to_table(t[1]));
if (py::cast<int>(t[2].attr("size")) != 0) res.set_partial_sum(convert_to_table(t[2]));
return res;
}
));
Expand Down
13 changes: 5 additions & 8 deletions onedal/covariance/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from ..common._base import BaseEstimator
from ..common.hyperparameters import get_hyperparameters
from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table


class BaseEmpiricalCovariance(BaseEstimator, metaclass=ABCMeta):
Expand Down Expand Up @@ -95,9 +95,8 @@ def fit(self, X, y=None, queue=None):
"""
policy = self._get_policy(queue, X)
X = _check_array(X, dtype=[np.float64, np.float32])
X = _convert_to_supported(policy, X)
dtype = get_dtype(X)
params = self._get_onedal_params(dtype)
X = to_table(X, queue=queue)
params = self._get_onedal_params(X.dtype)
hparams = get_hyperparameters("covariance", "compute")
if hparams is not None and not hparams.is_default:
result = self._get_backend(
Expand All @@ -107,12 +106,10 @@ def fit(self, X, y=None, queue=None):
policy,
params,
hparams.backend,
to_table(X),
X,
)
else:
result = self._get_backend(
"covariance", None, "compute", policy, params, to_table(X)
)
result = self._get_backend("covariance", None, "compute", policy, params, X)
if daal_check_version((2024, "P", 1)) or (not self.bias):
self.covariance_ = from_table(result.cov_matrix)
else:
Expand Down
9 changes: 4 additions & 5 deletions onedal/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from daal4py.sklearn._utils import daal_check_version, get_dtype

from ..datatypes import _convert_to_supported, from_table, to_table
from ..datatypes import from_table, to_table
from ..utils import _check_array
from .covariance import BaseEmpiricalCovariance

Expand Down Expand Up @@ -101,21 +101,20 @@ def partial_fit(self, X, y=None, queue=None):

policy = self._get_policy(queue, X)

X = _convert_to_supported(policy, X)
X_table = to_table(X, queue=queue)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see 2 variants of implementation in this PR:

  1. X = to_table(X, queue=queue)
  2. X_table = to_table(X, queue=queue)

Here 2) is used, but it seems X is not needed after the conversion anyway. Why not to use 1) everywhere possible?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, although it looks like generally the name is just matching the name that was previously used after to_table was called before these changes


if not hasattr(self, "_dtype"):
self._dtype = get_dtype(X)
self._dtype = X_table.dtype

params = self._get_onedal_params(self._dtype)
table_X = to_table(X)
self._partial_result = self._get_backend(
"covariance",
None,
"partial_compute",
policy,
params,
self._partial_result,
table_X,
X_table,
)
self._need_to_finalize = True

Expand Down
4 changes: 2 additions & 2 deletions onedal/datatypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
# limitations under the License.
# ==============================================================================

from ._data_conversion import _convert_to_supported, from_table, to_table
from ._data_conversion import from_table, to_table

__all__ = ["from_table", "to_table", "_convert_to_supported"]
__all__ = ["from_table", "to_table"]
41 changes: 4 additions & 37 deletions onedal/datatypes/_data_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ def _apply_and_pass(func, *args, **kwargs):
return tuple(map(lambda arg: func(arg, **kwargs), args))


def _convert_one_to_table(arg):
def _convert_one_to_table(arg, queue=None):
# All inputs for table conversion must be array-like or sparse, not scalars
return _backend.to_table(np.atleast_2d(arg) if np.isscalar(arg) else arg)
return _backend.to_table(np.atleast_2d(arg) if np.isscalar(arg) else arg, queue)


def to_table(*args):
def to_table(*args, queue=None):
"""Create oneDAL tables from scalars and/or arrays.

Note: this implementation can be used with scipy.sparse, numpy ndarrays,
Expand All @@ -51,7 +51,7 @@ def to_table(*args):
-------
tables: {oneDAL homogeneous tables}
"""
return _apply_and_pass(_convert_one_to_table, *args)
return _apply_and_pass(_convert_one_to_table, *args, queue=queue)


if _is_dpc_backend:
Expand Down Expand Up @@ -81,33 +81,6 @@ def _table_to_array(table, xp=None):

from ..common._policy import _HostInteropPolicy

def _convert_to_supported(policy, *data):
def func(x):
return x

# CPUs support FP64 by default
if isinstance(policy, _HostInteropPolicy):
return _apply_and_pass(func, *data)

# It can be either SPMD or DPCPP policy
device = policy._queue.sycl_device

def convert_or_pass(x):
if (x is not None) and (x.dtype == np.float64):
warnings.warn(
"Data will be converted into float32 from "
"float64 because device does not support it",
RuntimeWarning,
)
return x.astype(np.float32)
else:
return x

if not device.has_aspect_fp64:
func = convert_or_pass

return _apply_and_pass(func, *data)

def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
# Currently only `__sycl_usm_array_interface__` protocol used to
# convert into dpnp/dpctl tensors.
Expand All @@ -132,12 +105,6 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):

else:

def _convert_to_supported(policy, *data):
def func(x):
return x

return _apply_and_pass(func, *data)

def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
# Currently only `__sycl_usm_array_interface__` protocol used to
# convert into dpnp/dpctl tensors.
Expand Down
Loading
Loading