Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor incremental spmd algos #2248

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
14 changes: 10 additions & 4 deletions onedal/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ def __init__(self, result_options="all"):

def _reset(self):
self._need_to_finalize = False
self._partial_result = self._get_backend(
"basic_statistics", None, "partial_compute_result"
# Not supported with spmd policy so IncrementalBasicStatistics must be specified
self._partial_result = IncrementalBasicStatistics._get_backend(
ethanglaser marked this conversation as resolved.
Show resolved Hide resolved
IncrementalBasicStatistics, "basic_statistics", None, "partial_compute_result"
)

def __getstate__(self):
Expand Down Expand Up @@ -105,7 +106,10 @@ def partial_fit(self, X, weights=None, queue=None):
Returns the instance itself.
"""
self._queue = queue
policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalBasicStatistics must be specified
policy = IncrementalBasicStatistics._get_policy(
IncrementalBasicStatistics, queue, X
)

X = _check_array(
X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
Expand All @@ -123,7 +127,9 @@ def partial_fit(self, X, weights=None, queue=None):
self._onedal_params = self._get_onedal_params(False, dtype=dtype)

X_table, weights_table = to_table(X, weights, queue=queue)
self._partial_result = self._get_backend(
# Not supported with spmd policy so IncrementalBasicStatistics must be specified
self._partial_result = IncrementalBasicStatistics._get_backend(
IncrementalBasicStatistics,
"basic_statistics",
None,
"partial_compute",
Expand Down
14 changes: 10 additions & 4 deletions onedal/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def __init__(self, method="dense", bias=False, assume_centered=False):

def _reset(self):
self._need_to_finalize = False
self._partial_result = self._get_backend(
"covariance", None, "partial_compute_result"
# Not supported with spmd policy so IncrementalEmpiricalCovariance must be specified
self._partial_result = IncrementalEmpiricalCovariance._get_backend(
IncrementalEmpiricalCovariance, "covariance", None, "partial_compute_result"
)

def __getstate__(self):
Expand Down Expand Up @@ -99,15 +100,20 @@ def partial_fit(self, X, y=None, queue=None):

self._queue = queue

policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalEmpiricalCovariance must be specified
policy = IncrementalEmpiricalCovariance._get_policy(
IncrementalEmpiricalCovariance, queue, X
)

X_table = to_table(X, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype

params = self._get_onedal_params(self._dtype)
self._partial_result = self._get_backend(
# Not supported with spmd policy so IncrementalEmpiricalCovariance must be specified
self._partial_result = IncrementalEmpiricalCovariance._get_backend(
IncrementalEmpiricalCovariance,
"covariance",
None,
"partial_compute",
Expand Down
12 changes: 9 additions & 3 deletions onedal/decomposition/incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ def __init__(

def _reset(self):
self._need_to_finalize = False
module = self._get_backend("decomposition", "dim_reduction")
# Not supported with spmd policy so IncrementalPCA must be specified
module = IncrementalPCA._get_backend(
IncrementalPCA, "decomposition", "dim_reduction"
)
if hasattr(self, "components_"):
del self.components_
self._partial_result = module.partial_train_result()
Expand Down Expand Up @@ -154,14 +157,17 @@ def partial_fit(self, X, queue):

self._queue = queue

policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalPCA must be specified
policy = IncrementalPCA._get_policy(IncrementalPCA, queue, X)
X_table = to_table(X, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype
self._params = self._get_onedal_params(X_table)

self._partial_result = self._get_backend(
# Not supported with spmd policy so IncrementalPCA must be specified
self._partial_result = IncrementalPCA._get_backend(
IncrementalPCA,
"decomposition",
"dim_reduction",
"partial_train",
Expand Down
14 changes: 10 additions & 4 deletions onedal/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def _compute_noise_variance(self, n_components, n_sf_min):
return 0.0

def _create_model(self):
m = self._get_backend("decomposition", "dim_reduction", "model")
# Not supported with spmd policy so BasePCA must be specified
m = BasePCA._get_backend(BasePCA, "decomposition", "dim_reduction", "model")
m.eigenvectors = to_table(self.components_)
m.means = to_table(self.mean_)
if self.whiten:
Expand All @@ -128,15 +129,20 @@ def _create_model(self):
return m

def predict(self, X, queue=None):
policy = self._get_policy(queue, X)
# Not supported with spmd policy so BasePCA must be specified
policy = BasePCA._get_policy(BasePCA, queue, X)
model = self._create_model()
X_table = to_table(X, queue=queue)
params = self._get_onedal_params(X_table, stage="predict")

result = self._get_backend(
"decomposition", "dim_reduction", "infer", policy, params, model, X_table
# Not supported with spmd policy so BasePCA must be specified
result = BasePCA._get_backend(
BasePCA, "decomposition", "dim_reduction", "infer", policy, params, model, X_table
)
return from_table(result.transformed_data)

def transform(self, X, queue=None):
return self.predict(X, queue=queue)


class PCA(BasePCA):
Expand Down
4 changes: 2 additions & 2 deletions onedal/decomposition/tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_on_gold_data(queue, is_deterministic, whiten, num_blocks, dtype):

result = incpca.finalize_fit()

transformed_data = incpca.predict(X, queue=queue)
transformed_data = incpca.transform(X, queue=queue)

expected_n_components_ = 2
expected_components_ = np.array([[0.83849224, 0.54491354], [-0.54491354, 0.83849224]])
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_on_random_data(

incpca.finalize_fit()

transformed_data = incpca.predict(X, queue=queue)
transformed_data = incpca.transform(X, queue=queue)
tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6

n_components = incpca.n_components_
Expand Down
18 changes: 14 additions & 4 deletions onedal/linear_model/incremental_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@ def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"):

def _reset(self):
self._need_to_finalize = False
self._partial_result = self._get_backend(
"linear_model", "regression", "partial_train_result"
# Not supported with spmd policy so IncrementalLinearRegression must be specified
self._partial_result = IncrementalLinearRegression._get_backend(
IncrementalLinearRegression,
"linear_model",
"regression",
"partial_train_result",
)

def __getstate__(self):
Expand Down Expand Up @@ -84,10 +88,16 @@ def partial_fit(self, X, y, queue=None):
self : object
Returns the instance itself.
"""
module = self._get_backend("linear_model", "regression")
# Not supported with spmd policy so IncrementalLinearRegression must be specified
module = IncrementalLinearRegression._get_backend(
IncrementalLinearRegression, "linear_model", "regression"
)

self._queue = queue
policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalLinearRegression must be specified
policy = IncrementalLinearRegression._get_policy(
IncrementalLinearRegression, queue, X
)

X, y = _check_X_y(
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
Expand Down
49 changes: 1 addition & 48 deletions onedal/spmd/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,58 +14,11 @@
# limitations under the License.
# ==============================================================================

from daal4py.sklearn._utils import get_dtype

from ...basic_statistics import (
IncrementalBasicStatistics as base_IncrementalBasicStatistics,
)
from ...datatypes import to_table
from .._base import BaseEstimatorSPMD


class IncrementalBasicStatistics(BaseEstimatorSPMD, base_IncrementalBasicStatistics):
def _reset(self):
self._need_to_finalize = False
self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend(
"basic_statistics", None, "partial_compute_result"
)

def partial_fit(self, X, weights=None, queue=None):
"""
Computes partial data for basic statistics
from data batch X and saves it to `_partial_result`.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data batch, where `n_samples` is the number of samples
in the batch, and `n_features` is the number of features.

queue : dpctl.SyclQueue
If not None, use this queue for computations.

Returns
-------
self : object
Returns the instance itself.
"""
self._queue = queue
policy = super(base_IncrementalBasicStatistics, self)._get_policy(queue, X)
X_table, weights_table = to_table(X, weights, queue=queue)

if not hasattr(self, "_onedal_params"):
self._onedal_params = self._get_onedal_params(False, dtype=X_table.dtype)

self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend(
"basic_statistics",
None,
"partial_compute",
policy,
self._onedal_params,
self._partial_result,
X_table,
weights_table,
)

self._need_to_finalize = True
return self
pass
59 changes: 1 addition & 58 deletions onedal/spmd/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,70 +14,13 @@
# limitations under the License.
# ==============================================================================

import numpy as np

from daal4py.sklearn._utils import get_dtype

from ...covariance import (
IncrementalEmpiricalCovariance as base_IncrementalEmpiricalCovariance,
)
from ...datatypes import to_table
from ...utils import _check_array
from .._base import BaseEstimatorSPMD


class IncrementalEmpiricalCovariance(
BaseEstimatorSPMD, base_IncrementalEmpiricalCovariance
):
def _reset(self):
self._need_to_finalize = False
self._partial_result = super(
base_IncrementalEmpiricalCovariance, self
)._get_backend("covariance", None, "partial_compute_result")

def partial_fit(self, X, y=None, queue=None):
"""
Computes partial data for the covariance matrix
from data batch X and saves it to `_partial_result`.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data batch, where `n_samples` is the number of samples
in the batch, and `n_features` is the number of features.

y : Ignored
Not used, present for API consistency by convention.

queue : dpctl.SyclQueue
If not None, use this queue for computations.

Returns
-------
self : object
Returns the instance itself.
"""
X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True)

self._queue = queue

policy = super(base_IncrementalEmpiricalCovariance, self)._get_policy(queue, X)

X_table = to_table(X, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype

params = self._get_onedal_params(self._dtype)
self._partial_result = super(
base_IncrementalEmpiricalCovariance, self
)._get_backend(
"covariance",
None,
"partial_compute",
policy,
params,
self._partial_result,
X_table,
)
self._need_to_finalize = True
pass
Loading
Loading