uxlfoundation · icfaust · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024
@@ -60,5 +60,5 @@ def generate_data(par, size, seed=777):
 bss = BasicStatisticsSpmd(["mean", "standard_deviation"])
 bss.fit(dpt_data, dpt_weights)
 
-print(f"Computed mean on rank {rank}:\n", bss.mean)
-print(f"Computed std on rank {rank}:\n", bss.standard_deviation)
+print(f"Computed mean on rank {rank}:\n", bss.mean_)
+print(f"Computed std on rank {rank}:\n", bss.standard_deviation_)
@@ -30,16 +30,16 @@
 X_3 = np.array([[1, 1], [1, 2], [2, 3]])
 result = incbs.partial_fit(X_3)
 
-print(f"Mean:\n{result.mean}")
-print(f"Max:\n{result.max}")
-print(f"Sum:\n{result.sum}")
+print(f"Mean:\n{result.mean_}")
+print(f"Max:\n{result.max_}")
+print(f"Sum:\n{result.sum_}")
 
 # We put the whole data to fit method, it is split automatically and then
 # partial_fit is called for each batch.
 incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"], batch_size=3)
 X = np.array([[0, 1], [0, 1], [1, 2], [1, 1], [1, 2], [2, 3]])
 result = incbs.fit(X)
 
-print(f"Mean:\n{result.mean}")
-print(f"Max:\n{result.max}")
-print(f"Sum:\n{result.sum}")
+print(f"Mean:\n{result.mean_}")
+print(f"Max:\n{result.max_}")
+print(f"Sum:\n{result.sum_}")
@@ -36,16 +36,16 @@
 X_3 = dpt.asarray([[1, 1], [1, 2], [2, 3]], sycl_queue=queue)
 result = incbs.partial_fit(X_3)
 
-print(f"Mean:\n{result.mean}")
-print(f"Max:\n{result.max}")
-print(f"Sum:\n{result.sum}")
+print(f"Mean:\n{result.mean_}")
+print(f"Max:\n{result.max_}")
+print(f"Sum:\n{result.sum_}")
 
 # We put the whole data to fit method, it is split automatically and then
 # partial_fit is called for each batch.
 incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"], batch_size=3)
 X = dpt.asarray([[0, 1], [0, 1], [1, 2], [1, 1], [1, 2], [2, 3]], sycl_queue=queue)
 result = incbs.fit(X)
 
-print(f"Mean:\n{result.mean}")
-print(f"Max:\n{result.max}")
-print(f"Sum:\n{result.sum}")
+print(f"Mean:\n{result.mean_}")
+print(f"Max:\n{result.max_}")
+print(f"Sum:\n{result.sum_}")
@@ -17,17 +17,17 @@
 import warnings
 from abc import ABCMeta, abstractmethod
 
-import numpy as np
-
 from ..common._base import BaseEstimator
 from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import _is_csr
-from ..utils.validation import _check_array
 
 
-class BaseBasicStatistics(BaseEstimator, metaclass=ABCMeta):
-    @abstractmethod
-    def __init__(self, result_options, algorithm):
+class BasicStatistics(BaseEstimator, metaclass=ABCMeta):
+    """
+    Basic Statistics oneDAL implementation.
+    """
+
+    def __init__(self, result_options="all", algorithm="by_default"):
         self.options = result_options
         self.algorithm = algorithm
 
@@ -46,62 +46,49 @@ def get_all_result_options():
             "second_order_raw_moment",
         ]
 
-    def _get_result_options(self, options):
-        if options == "all":
-            options = self.get_all_result_options()
-        if isinstance(options, list):
-            options = "|".join(options)
-        assert isinstance(options, str)
-        return options
+    @property
+    def options(self):
+        if self._options == ["all"]:
+            return self.get_all_result_options()
+        return self._options
+
+    @options.setter
+    def options(self, opts):
+        # options always to be an iterable
+        self._options = opts.split("|") if isinstance(opts, str) else opts
 
-    def _get_onedal_params(self, is_csr, dtype=np.float32):
-        options = self._get_result_options(self.options)
+    def _get_onedal_params(self, is_csr, dtype=None):
         return {
             "fptype": dtype,
             "method": "sparse" if is_csr else self.algorithm,
-            "result_option": options,
+            "result_option": "|".join(self.options),
         }
 
-
-class BasicStatistics(BaseBasicStatistics):
-    """
-    Basic Statistics oneDAL implementation.
-    """
-
-    def __init__(self, result_options="all", algorithm="by_default"):
-        super().__init__(result_options, algorithm)
-
     def fit(self, data, sample_weight=None, queue=None):
         policy = self._get_policy(queue, data, sample_weight)
 
         is_csr = _is_csr(data)
 
-        if data is not None and not is_csr:
-            data = _check_array(data, ensure_2d=False)
-        if sample_weight is not None:
-            sample_weight = _check_array(sample_weight, ensure_2d=False)
-
-        data, sample_weight = _convert_to_supported(policy, data, sample_weight)
         is_single_dim = data.ndim == 1
-        data_table, weights_table = to_table(data, sample_weight)
+        data, sample_weight = to_table(
+            *_convert_to_supported(policy, data, sample_weight)
+        )
 
-        dtype = data.dtype
-        raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr)
-        for opt, raw_value in raw_result.items():
-            value = from_table(raw_value).ravel()
+        result = self._compute_raw(data, sample_weight, policy, data.dtype, is_csr)
+
+        for opt in self.options:
+            value = from_table(getattr(result, opt))[0]  # two-dimensional table [1, n]
             if is_single_dim:
                 setattr(self, opt, value[0])
             else:
                 setattr(self, opt, value)
 
         return self
 
-    def _compute_raw(
-        self, data_table, weights_table, policy, dtype=np.float32, is_csr=False
-    ):
+    def _compute_raw(self, data_table, weights_table, policy, dtype=None, is_csr=False):
+        # This function is maintained for internal use by KMeans tolerance
+        # calculations, but is otherwise considered legacy code and is not
+        # to be used externally in any circumstance
         module = self._get_backend("basic_statistics")
         params = self._get_onedal_params(is_csr, dtype)
-        result = module.compute(policy, params, data_table, weights_table)
-        options = self._get_result_options(self.options).split("|")
-
-        return {opt: getattr(result, opt) for opt in options}
+        return module.compute(policy, params, data_table, weights_table)
@@ -14,16 +14,11 @@
 # limitations under the License.
 # ==============================================================================
 
-import numpy as np
-
-from daal4py.sklearn._utils import get_dtype
-
 from ..datatypes import _convert_to_supported, from_table, to_table
-from ..utils import _check_array
-from .basic_statistics import BaseBasicStatistics
+from .basic_statistics import BasicStatistics
 
 
-class IncrementalBasicStatistics(BaseBasicStatistics):
+class IncrementalBasicStatistics(BasicStatistics):
     """
     Incremental estimator for basic statistics based on oneDAL implementation.
     Allows to compute basic statistics if data are splitted into batches.
@@ -65,8 +60,8 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
             Second order moment of each feature over all samples.
     """
 
-    def __init__(self, result_options="all"):
-        super().__init__(result_options, algorithm="by_default")
+    def __init__(self, result_options="all", algorithm="by_default"):
+        super().__init__(result_options, algorithm)
         self._reset()
 
     def _reset(self):
@@ -85,7 +80,7 @@ def __getstate__(self):
 
         return data
 
-    def partial_fit(self, X, weights=None, queue=None):
+    def partial_fit(self, X, sample_weight=None, queue=None):
         """
         Computes partial data for basic statistics
         from data batch X and saves it to `_partial_result`.
@@ -106,33 +101,20 @@ def partial_fit(self, X, weights=None, queue=None):
         """
         self._queue = queue
         policy = self._get_policy(queue, X)
-        X, weights = _convert_to_supported(policy, X, weights)
-
-        X = _check_array(
-            X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
-        )
-        if weights is not None:
-            weights = _check_array(
-                weights,
-                dtype=[np.float64, np.float32],
-                ensure_2d=False,
-                force_all_finite=False,
-            )
+        X, sample_weight = to_table(*_convert_to_supported(policy, X, sample_weight))
 
         if not hasattr(self, "_onedal_params"):
-            dtype = get_dtype(X)
-            self._onedal_params = self._get_onedal_params(False, dtype=dtype)
+            self._onedal_params = self._get_onedal_params(False, dtype=X.dtype)
 
-        X_table, weights_table = to_table(X, weights)
         self._partial_result = self._get_backend(
             "basic_statistics",
             None,
             "partial_compute",
             policy,
             self._onedal_params,
             self._partial_result,
-            X_table,
-            weights_table,
+            X,
+            sample_weight,
         )
 
         self._need_to_finalize = True
@@ -167,9 +149,8 @@ def finalize_fit(self, queue=None):
                 self._onedal_params,
                 self._partial_result,
             )
-            options = self._get_result_options(self.options).split("|")
-            for opt in options:
-                setattr(self, opt, from_table(getattr(result, opt)).ravel())
+            for opt in self.options:
+                setattr(self, opt, from_table(getattr(result, opt))[0])
 
             self._need_to_finalize = False
 

@@ -89,7 +89,7 @@ def _tolerance(self, X_table, rtol, is_csr, policy, dtype):
         bs = self._get_basic_statistics_backend("variance")
 
         res = bs._compute_raw(X_table, dummy, policy, dtype, is_csr)
-        mean_var = from_table(res["variance"]).mean()
+        mean_var = from_table(res.variance).mean()
 
         return mean_var * rtol
 

@@ -21,10 +21,4 @@
 
 
 class BasicStatistics(BaseEstimatorSPMD, BasicStatistics_Batch):
-    @support_input_format()
-    def compute(self, data, weights=None, queue=None):
-        return super().compute(data, weights=weights, queue=queue)
-
-    @support_input_format()
-    def fit(self, data, sample_weight=None, queue=None):
-        return super().fit(data, sample_weight=sample_weight, queue=queue)
+    pass
@@ -30,7 +30,7 @@ def _reset(self):
             "basic_statistics", None, "partial_compute_result"
         )
 
-    def partial_fit(self, X, weights=None, queue=None):
+    def partial_fit(self, X, sample_weight=None, queue=None):
         """
         Computes partial data for basic statistics
         from data batch X and saves it to `_partial_result`.
@@ -51,22 +51,20 @@ def partial_fit(self, X, weights=None, queue=None):
         """
         self._queue = queue
         policy = super(base_IncrementalBasicStatistics, self)._get_policy(queue, X)
-        X, weights = _convert_to_supported(policy, X, weights)
+        X, sample_weight = to_table(*_convert_to_supported(policy, X, sample_weight))
 
         if not hasattr(self, "_onedal_params"):
-            dtype = get_dtype(X)
-            self._onedal_params = self._get_onedal_params(False, dtype=dtype)
+            self._onedal_params = self._get_onedal_params(False, dtype=X.dtype)
 
-        X_table, weights_table = to_table(X, weights)
         self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend(
             "basic_statistics",
             None,
             "partial_compute",
             policy,
             self._onedal_params,
             self._partial_result,
-            X_table,
-            weights_table,
+            X,
+            sample_weight,
         )
 
         self._need_to_finalize = True

@@ -16,22 +16,17 @@
 
 import warnings
 
-import numpy as np
 from sklearn.base import BaseEstimator
 from sklearn.utils import check_array
-from sklearn.utils.validation import _check_sample_weight
 
 from daal4py.sklearn._n_jobs_support import control_n_jobs
 from daal4py.sklearn._utils import sklearn_check_version
 from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics
 
 from .._device_offload import dispatch
 from .._utils import IntelEstimator, PatchingConditionsChain
-
-if sklearn_check_version("1.6"):
-    from sklearn.utils.validation import validate_data
-else:
-    validate_data = BaseEstimator._validate_data
+from ..utils._array_api import get_namespace
+from ..utils.validation import _check_sample_weight, validate_data
 
 if sklearn_check_version("1.2"):
     from sklearn.utils._param_validation import StrOptions
@@ -130,30 +125,15 @@ def __init__(self, result_options="all"):
 
     def _save_attributes(self):
         assert hasattr(self, "_onedal_estimator")
-
-        if self.result_options == "all":
-            result_options = onedal_BasicStatistics.get_all_result_options()
-        else:
-            result_options = self.result_options
-
-        if isinstance(result_options, str):
-            setattr(
-                self,
-                result_options + "_",
-                getattr(self._onedal_estimator, result_options),
-            )
-        elif isinstance(result_options, list):
-            for option in result_options:
-                setattr(self, option + "_", getattr(self._onedal_estimator, option))
+        for option in self._onedal_estimator.options:
+            setattr(self, option + "_", getattr(self._onedal_estimator, option))
 
     def __getattr__(self, attr):
-        if self.result_options == "all":
-            result_options = onedal_BasicStatistics.get_all_result_options()
-        else:
-            result_options = self.result_options
         is_deprecated_attr = (
-            isinstance(result_options, str) and (attr == result_options)
-        ) or (isinstance(result_options, list) and (attr in result_options))
+            attr in self._onedal_estimator.options
+            if "_onedal_estimator" in self.__dict__
+            else False
+        )
         if is_deprecated_attr:
             warnings.warn(
                 "Result attributes without a trailing underscore were deprecated in version 2025.1 and will be removed in 2026.0"
@@ -179,13 +159,16 @@ def _onedal_fit(self, X, sample_weight=None, queue=None):
         if sklearn_check_version("1.2"):
             self._validate_params()
 
+        xp, _ = get_namespace(X)
         if sklearn_check_version("1.0"):
-            X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_2d=False)
+            X = validate_data(self, X, dtype=[xp.float64, xp.float32], ensure_2d=False)
         else:
-            X = check_array(X, dtype=[np.float64, np.float32])
+            X = check_array(X, dtype=[xp.float64, xp.float32])
 
         if sample_weight is not None:
-            sample_weight = _check_sample_weight(sample_weight, X)
+            sample_weight = _check_sample_weight(
+                sample_weight, X, dtype=[xp.float64, xp.float32]
+            )
 
         onedal_params = {
             "result_options": self.result_options,