"""Base classes for all estimators.""" # Author: Gael Varoquaux # License: BSD 3 clause import copy import warnings from collections import defaultdict import platform import inspect import re import numpy as np from . import __version__ from ._config import get_config from .utils import _IS_32BIT from .utils._set_output import _SetOutputMixin from .utils._tags import ( _DEFAULT_TAGS, ) from .utils.validation import check_X_y from .utils.validation import check_array from .utils.validation import _check_y from .utils.validation import _num_features from .utils.validation import _check_feature_names_in from .utils.validation import _generate_get_feature_names_out from .utils.validation import check_is_fitted from .utils.validation import _get_feature_names from .utils._estimator_html_repr import estimator_html_repr from .utils._param_validation import validate_parameter_constraints def clone(estimator, *, safe=True): """Construct a new unfitted estimator with the same parameters. Clone does a deep copy of the model in an estimator without actually copying attached data. It returns a new estimator with the same parameters that has not been fitted on any data. Parameters ---------- estimator : {list, tuple, set} of estimator instance or a single \ estimator instance The estimator or group of estimators to be cloned. safe : bool, default=True If safe is False, clone will fall back to a deep copy on objects that are not estimators. Returns ------- estimator : object The deep copy of the input, an estimator if input is an estimator. Notes ----- If the estimator's `random_state` parameter is an integer (or if the estimator doesn't have a `random_state` parameter), an *exact clone* is returned: the clone and the original estimator will give the exact same results. Otherwise, *statistical clone* is returned: the clone might return different results from the original estimator. More details can be found in :ref:`randomness`. """ estimator_type = type(estimator) # XXX: not handling dictionaries if estimator_type in (list, tuple, set, frozenset): return estimator_type([clone(e, safe=safe) for e in estimator]) elif not hasattr(estimator, "get_params") or isinstance(estimator, type): if not safe: return copy.deepcopy(estimator) else: if isinstance(estimator, type): raise TypeError( "Cannot clone object. " + "You should provide an instance of " + "scikit-learn estimator instead of a class." ) else: raise TypeError( "Cannot clone object '%s' (type %s): " "it does not seem to be a scikit-learn " "estimator as it does not implement a " "'get_params' method." % (repr(estimator), type(estimator)) ) klass = estimator.__class__ new_object_params = estimator.get_params(deep=False) for name, param in new_object_params.items(): new_object_params[name] = clone(param, safe=False) new_object = klass(**new_object_params) params_set = new_object.get_params(deep=False) # quick sanity check of the parameters of the clone for name in new_object_params: param1 = new_object_params[name] param2 = params_set[name] if param1 is not param2: raise RuntimeError( "Cannot clone object %s, as the constructor " "either does not set or modifies parameter %s" % (estimator, name) ) # _sklearn_output_config is used by `set_output` to configure the output # container of an estimator. if hasattr(estimator, "_sklearn_output_config"): new_object._sklearn_output_config = copy.deepcopy( estimator._sklearn_output_config ) return new_object class BaseEstimator: """Base class for all estimators in scikit-learn. Notes ----- All estimators should specify all the parameters that can be set at the class level in their ``__init__`` as explicit keyword arguments (no ``*args`` or ``**kwargs``). """ @classmethod def _get_param_names(cls): """Get parameter names for the estimator""" # fetch the constructor or the original constructor before # deprecation wrapping if any init = getattr(cls.__init__, "deprecated_original", cls.__init__) if init is object.__init__: # No explicit constructor to introspect return [] # introspect the constructor arguments to find the model parameters # to represent init_signature = inspect.signature(init) # Consider the constructor parameters excluding 'self' parameters = [ p for p in init_signature.parameters.values() if p.name != "self" and p.kind != p.VAR_KEYWORD ] for p in parameters: if p.kind == p.VAR_POSITIONAL: raise RuntimeError( "scikit-learn estimators should always " "specify their parameters in the signature" " of their __init__ (no varargs)." " %s with constructor %s doesn't " " follow this convention." % (cls, init_signature) ) # Extract and sort argument names excluding 'self' return sorted([p.name for p in parameters]) def get_params(self, deep=True): """ Get parameters for this estimator. Parameters ---------- deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : dict Parameter names mapped to their values. """ out = dict() for key in self._get_param_names(): value = getattr(self, key) if deep and hasattr(value, "get_params") and not isinstance(value, type): deep_items = value.get_params().items() out.update((key + "__" + k, val) for k, val in deep_items) out[key] = value return out def set_params(self, **params): """Set the parameters of this estimator. The method works on simple estimators as well as on nested objects (such as :class:`~sklearn.pipeline.Pipeline`). The latter have parameters of the form ``__`` so that it's possible to update each component of a nested object. Parameters ---------- **params : dict Estimator parameters. Returns ------- self : estimator instance Estimator instance. """ if not params: # Simple optimization to gain speed (inspect is slow) return self valid_params = self.get_params(deep=True) nested_params = defaultdict(dict) # grouped by prefix for key, value in params.items(): key, delim, sub_key = key.partition("__") if key not in valid_params: local_valid_params = self._get_param_names() raise ValueError( f"Invalid parameter {key!r} for estimator {self}. " f"Valid parameters are: {local_valid_params!r}." ) if delim: nested_params[key][sub_key] = value else: setattr(self, key, value) valid_params[key] = value for key, sub_params in nested_params.items(): # TODO(1.4): remove specific handling of "base_estimator". # The "base_estimator" key is special. It was deprecated and # renamed to "estimator" for several estimators. This means we # need to translate it here and set sub-parameters on "estimator", # but only if the user did not explicitly set a value for # "base_estimator". if ( key == "base_estimator" and valid_params[key] == "deprecated" and self.__module__.startswith("sklearn.") ): warnings.warn( f"Parameter 'base_estimator' of {self.__class__.__name__} is" " deprecated in favor of 'estimator'. See" f" {self.__class__.__name__}'s docstring for more details.", FutureWarning, stacklevel=2, ) key = "estimator" valid_params[key].set_params(**sub_params) return self def __repr__(self, N_CHAR_MAX=700): # N_CHAR_MAX is the (approximate) maximum number of non-blank # characters to render. We pass it as an optional parameter to ease # the tests. from .utils._pprint import _EstimatorPrettyPrinter N_MAX_ELEMENTS_TO_SHOW = 30 # number of elements to show in sequences # use ellipsis for sequences with a lot of elements pp = _EstimatorPrettyPrinter( compact=True, indent=1, indent_at_name=True, n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW, ) repr_ = pp.pformat(self) # Use bruteforce ellipsis when there are a lot of non-blank characters n_nonblank = len("".join(repr_.split())) if n_nonblank > N_CHAR_MAX: lim = N_CHAR_MAX // 2 # apprx number of chars to keep on both ends regex = r"^(\s*\S){%d}" % lim # The regex '^(\s*\S){%d}' % n # matches from the start of the string until the nth non-blank # character: # - ^ matches the start of string # - (pattern){n} matches n repetitions of pattern # - \s*\S matches a non-blank char following zero or more blanks left_lim = re.match(regex, repr_).end() right_lim = re.match(regex, repr_[::-1]).end() if "\n" in repr_[left_lim:-right_lim]: # The left side and right side aren't on the same line. # To avoid weird cuts, e.g.: # categoric...ore', # we need to start the right side with an appropriate newline # character so that it renders properly as: # categoric... # handle_unknown='ignore', # so we add [^\n]*\n which matches until the next \n regex += r"[^\n]*\n" right_lim = re.match(regex, repr_[::-1]).end() ellipsis = "..." if left_lim + len(ellipsis) < len(repr_) - right_lim: # Only add ellipsis if it results in a shorter repr repr_ = repr_[:left_lim] + "..." + repr_[-right_lim:] return repr_ def __getstate__(self): if getattr(self, "__slots__", None): raise TypeError( "You cannot use `__slots__` in objects inheriting from " "`sklearn.base.BaseEstimator`." ) try: state = super().__getstate__() if state is None: # For Python 3.11+, empty instance (no `__slots__`, # and `__dict__`) will return a state equal to `None`. state = self.__dict__.copy() except AttributeError: # Python < 3.11 state = self.__dict__.copy() if type(self).__module__.startswith("sklearn."): return dict(state.items(), _sklearn_version=__version__) else: return state def __setstate__(self, state): if type(self).__module__.startswith("sklearn."): pickle_version = state.pop("_sklearn_version", "pre-0.18") if pickle_version != __version__: warnings.warn( "Trying to unpickle estimator {0} from version {1} when " "using version {2}. This might lead to breaking code or " "invalid results. Use at your own risk. " "For more info please refer to:\n" "https://scikit-learn.org/stable/model_persistence.html" "#security-maintainability-limitations".format( self.__class__.__name__, pickle_version, __version__ ), UserWarning, ) try: super().__setstate__(state) except AttributeError: self.__dict__.update(state) def _more_tags(self): return _DEFAULT_TAGS def _get_tags(self): collected_tags = {} for base_class in reversed(inspect.getmro(self.__class__)): if hasattr(base_class, "_more_tags"): # need the if because mixins might not have _more_tags # but might do redundant work in estimators # (i.e. calling more tags on BaseEstimator multiple times) more_tags = base_class._more_tags(self) collected_tags.update(more_tags) return collected_tags def _check_n_features(self, X, reset): """Set the `n_features_in_` attribute, or check against it. Parameters ---------- X : {ndarray, sparse matrix} of shape (n_samples, n_features) The input samples. reset : bool If True, the `n_features_in_` attribute is set to `X.shape[1]`. If False and the attribute exists, then check that it is equal to `X.shape[1]`. If False and the attribute does *not* exist, then the check is skipped. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`. """ try: n_features = _num_features(X) except TypeError as e: if not reset and hasattr(self, "n_features_in_"): raise ValueError( "X does not contain any features, but " f"{self.__class__.__name__} is expecting " f"{self.n_features_in_} features" ) from e # If the number of features is not defined and reset=True, # then we skip this check return if reset: self.n_features_in_ = n_features return if not hasattr(self, "n_features_in_"): # Skip this check if the expected number of expected input features # was not recorded by calling fit first. This is typically the case # for stateless transformers. return if n_features != self.n_features_in_: raise ValueError( f"X has {n_features} features, but {self.__class__.__name__} " f"is expecting {self.n_features_in_} features as input." ) def _check_feature_names(self, X, *, reset): """Set or check the `feature_names_in_` attribute. .. versionadded:: 1.0 Parameters ---------- X : {ndarray, dataframe} of shape (n_samples, n_features) The input samples. reset : bool Whether to reset the `feature_names_in_` attribute. If False, the input will be checked for consistency with feature names of data provided when reset was last True. .. note:: It is recommended to call `reset=True` in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`. """ if reset: feature_names_in = _get_feature_names(X) if feature_names_in is not None: self.feature_names_in_ = feature_names_in elif hasattr(self, "feature_names_in_"): # Delete the attribute when the estimator is fitted on a new dataset # that has no feature names. delattr(self, "feature_names_in_") return fitted_feature_names = getattr(self, "feature_names_in_", None) X_feature_names = _get_feature_names(X) if fitted_feature_names is None and X_feature_names is None: # no feature names seen in fit and in X return if X_feature_names is not None and fitted_feature_names is None: warnings.warn( f"X has feature names, but {self.__class__.__name__} was fitted without" " feature names" ) return if X_feature_names is None and fitted_feature_names is not None: warnings.warn( "X does not have valid feature names, but" f" {self.__class__.__name__} was fitted with feature names" ) return # validate the feature names against the `feature_names_in_` attribute if len(fitted_feature_names) != len(X_feature_names) or np.any( fitted_feature_names != X_feature_names ): message = ( "The feature names should match those that were passed during fit.\n" ) fitted_feature_names_set = set(fitted_feature_names) X_feature_names_set = set(X_feature_names) unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set) missing_names = sorted(fitted_feature_names_set - X_feature_names_set) def add_names(names): output = "" max_n_names = 5 for i, name in enumerate(names): if i >= max_n_names: output += "- ...\n" break output += f"- {name}\n" return output if unexpected_names: message += "Feature names unseen at fit time:\n" message += add_names(unexpected_names) if missing_names: message += "Feature names seen at fit time, yet now missing:\n" message += add_names(missing_names) if not missing_names and not unexpected_names: message += ( "Feature names must be in the same order as they were in fit.\n" ) raise ValueError(message) def _validate_data( self, X="no_validation", y="no_validation", reset=True, validate_separately=False, **check_params, ): """Validate input data and set or check the `n_features_in_` attribute. Parameters ---------- X : {array-like, sparse matrix, dataframe} of shape \ (n_samples, n_features), default='no validation' The input samples. If `'no_validation'`, no validation is performed on `X`. This is useful for meta-estimator which can delegate input validation to their underlying estimator(s). In that case `y` must be passed and the only accepted `check_params` are `multi_output` and `y_numeric`. y : array-like of shape (n_samples,), default='no_validation' The targets. - If `None`, `check_array` is called on `X`. If the estimator's requires_y tag is True, then an error will be raised. - If `'no_validation'`, `check_array` is called on `X` and the estimator's requires_y tag is ignored. This is a default placeholder and is never meant to be explicitly set. In that case `X` must be passed. - Otherwise, only `y` with `_check_y` or both `X` and `y` are checked with either `check_array` or `check_X_y` depending on `validate_separately`. reset : bool, default=True Whether to reset the `n_features_in_` attribute. If False, the input will be checked for consistency with data provided when reset was last True. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`. validate_separately : False or tuple of dicts, default=False Only used if y is not None. If False, call validate_X_y(). Else, it must be a tuple of kwargs to be used for calling check_array() on X and y respectively. `estimator=self` is automatically added to these dicts to generate more informative error message in case of invalid input data. **check_params : kwargs Parameters passed to :func:`sklearn.utils.check_array` or :func:`sklearn.utils.check_X_y`. Ignored if validate_separately is not False. `estimator=self` is automatically added to these params to generate more informative error message in case of invalid input data. Returns ------- out : {ndarray, sparse matrix} or tuple of these The validated input. A tuple is returned if both `X` and `y` are validated. """ self._check_feature_names(X, reset=reset) if y is None and self._get_tags()["requires_y"]: raise ValueError( f"This {self.__class__.__name__} estimator " "requires y to be passed, but the target y is None." ) no_val_X = isinstance(X, str) and X == "no_validation" no_val_y = y is None or isinstance(y, str) and y == "no_validation" default_check_params = {"estimator": self} check_params = {**default_check_params, **check_params} if no_val_X and no_val_y: raise ValueError("Validation should be done on X, y or both.") elif not no_val_X and no_val_y: X = check_array(X, input_name="X", **check_params) out = X elif no_val_X and not no_val_y: y = _check_y(y, **check_params) out = y else: if validate_separately: # We need this because some estimators validate X and y # separately, and in general, separately calling check_array() # on X and y isn't equivalent to just calling check_X_y() # :( check_X_params, check_y_params = validate_separately if "estimator" not in check_X_params: check_X_params = {**default_check_params, **check_X_params} X = check_array(X, input_name="X", **check_X_params) if "estimator" not in check_y_params: check_y_params = {**default_check_params, **check_y_params} y = check_array(y, input_name="y", **check_y_params) else: X, y = check_X_y(X, y, **check_params) out = X, y if not no_val_X and check_params.get("ensure_2d", True): self._check_n_features(X, reset=reset) return out def _validate_params(self): """Validate types and values of constructor parameters The expected type and values must be defined in the `_parameter_constraints` class attribute, which is a dictionary `param_name: list of constraints`. See the docstring of `validate_parameter_constraints` for a description of the accepted constraints. """ validate_parameter_constraints( self._parameter_constraints, self.get_params(deep=False), caller_name=self.__class__.__name__, ) @property def _repr_html_(self): """HTML representation of estimator. This is redundant with the logic of `_repr_mimebundle_`. The latter should be favorted in the long term, `_repr_html_` is only implemented for consumers who do not interpret `_repr_mimbundle_`. """ if get_config()["display"] != "diagram": raise AttributeError( "_repr_html_ is only defined when the " "'display' configuration option is set to " "'diagram'" ) return self._repr_html_inner def _repr_html_inner(self): """This function is returned by the @property `_repr_html_` to make `hasattr(estimator, "_repr_html_") return `True` or `False` depending on `get_config()["display"]`. """ return estimator_html_repr(self) def _repr_mimebundle_(self, **kwargs): """Mime bundle used by jupyter kernels to display estimator""" output = {"text/plain": repr(self)} if get_config()["display"] == "diagram": output["text/html"] = estimator_html_repr(self) return output class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" _estimator_type = "classifier" def score(self, X, y, sample_weight=None): """ Return the mean accuracy on the given test data and labels. In multi-label classification, this is the subset accuracy which is a harsh metric since you require for each sample that each label set be correctly predicted. Parameters ---------- X : array-like of shape (n_samples, n_features) Test samples. y : array-like of shape (n_samples,) or (n_samples, n_outputs) True labels for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- score : float Mean accuracy of ``self.predict(X)`` w.r.t. `y`. """ from .metrics import accuracy_score return accuracy_score(y, self.predict(X), sample_weight=sample_weight) def _more_tags(self): return {"requires_y": True} class RegressorMixin: """Mixin class for all regression estimators in scikit-learn.""" _estimator_type = "regressor" def score(self, X, y, sample_weight=None): """Return the coefficient of determination of the prediction. The coefficient of determination :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`, where :math:`u` is the residual sum of squares ``((y_true - y_pred)** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true - y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of `y`, disregarding the input features, would get a :math:`R^2` score of 0.0. Parameters ---------- X : array-like of shape (n_samples, n_features) Test samples. For some estimators this may be a precomputed kernel matrix or a list of generic objects instead with shape ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator. y : array-like of shape (n_samples,) or (n_samples, n_outputs) True values for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- score : float :math:`R^2` of ``self.predict(X)`` w.r.t. `y`. Notes ----- The :math:`R^2` score used when calling ``score`` on a regressor uses ``multioutput='uniform_average'`` from version 0.23 to keep consistent with default value of :func:`~sklearn.metrics.r2_score`. This influences the ``score`` method of all the multioutput regressors (except for :class:`~sklearn.multioutput.MultiOutputRegressor`). """ from .metrics import r2_score y_pred = self.predict(X) return r2_score(y, y_pred, sample_weight=sample_weight) def _more_tags(self): return {"requires_y": True} class ClusterMixin: """Mixin class for all cluster estimators in scikit-learn.""" _estimator_type = "clusterer" def fit_predict(self, X, y=None): """ Perform clustering on `X` and returns cluster labels. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data. y : Ignored Not used, present for API consistency by convention. Returns ------- labels : ndarray of shape (n_samples,), dtype=np.int64 Cluster labels. """ # non-optimized default implementation; override when a better # method is possible for a given clustering algorithm self.fit(X) return self.labels_ def _more_tags(self): return {"preserves_dtype": []} class BiclusterMixin: """Mixin class for all bicluster estimators in scikit-learn.""" @property def biclusters_(self): """Convenient way to get row and column indicators together. Returns the ``rows_`` and ``columns_`` members. """ return self.rows_, self.columns_ def get_indices(self, i): """Row and column indices of the `i`'th bicluster. Only works if ``rows_`` and ``columns_`` attributes exist. Parameters ---------- i : int The index of the cluster. Returns ------- row_ind : ndarray, dtype=np.intp Indices of rows in the dataset that belong to the bicluster. col_ind : ndarray, dtype=np.intp Indices of columns in the dataset that belong to the bicluster. """ rows = self.rows_[i] columns = self.columns_[i] return np.nonzero(rows)[0], np.nonzero(columns)[0] def get_shape(self, i): """Shape of the `i`'th bicluster. Parameters ---------- i : int The index of the cluster. Returns ------- n_rows : int Number of rows in the bicluster. n_cols : int Number of columns in the bicluster. """ indices = self.get_indices(i) return tuple(len(i) for i in indices) def get_submatrix(self, i, data): """Return the submatrix corresponding to bicluster `i`. Parameters ---------- i : int The index of the cluster. data : array-like of shape (n_samples, n_features) The data. Returns ------- submatrix : ndarray of shape (n_rows, n_cols) The submatrix corresponding to bicluster `i`. Notes ----- Works with sparse matrices. Only works if ``rows_`` and ``columns_`` attributes exist. """ from .utils.validation import check_array data = check_array(data, accept_sparse="csr") row_ind, col_ind = self.get_indices(i) return data[row_ind[:, np.newaxis], col_ind] class TransformerMixin(_SetOutputMixin): """Mixin class for all transformers in scikit-learn. If :term:`get_feature_names_out` is defined, then `BaseEstimator` will automatically wrap `transform` and `fit_transform` to follow the `set_output` API. See the :ref:`developer_api_set_output` for details. :class:`base.OneToOneFeatureMixin` and :class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for defining :term:`get_feature_names_out`. """ def fit_transform(self, X, y=None, **fit_params): """ Fit to data, then transform it. Fits transformer to `X` and `y` with optional parameters `fit_params` and returns a transformed version of `X`. Parameters ---------- X : array-like of shape (n_samples, n_features) Input samples. y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ default=None Target values (None for unsupervised transformations). **fit_params : dict Additional fit parameters. Returns ------- X_new : ndarray array of shape (n_samples, n_features_new) Transformed array. """ # non-optimized default implementation; override when a better # method is possible for a given clustering algorithm if y is None: # fit method of arity 1 (unsupervised transformation) return self.fit(X, **fit_params).transform(X) else: # fit method of arity 2 (supervised transformation) return self.fit(X, y, **fit_params).transform(X) class OneToOneFeatureMixin: """Provides `get_feature_names_out` for simple transformers. This mixin assumes there's a 1-to-1 correspondence between input features and output features, such as :class:`~preprocessing.StandardScaler`. """ def get_feature_names_out(self, input_features=None): """Get output feature names for transformation. Parameters ---------- input_features : array-like of str or None, default=None Input features. - If `input_features` is `None`, then `feature_names_in_` is used as feature names in. If `feature_names_in_` is not defined, then the following input feature names are generated: `["x0", "x1", ..., "x(n_features_in_ - 1)"]`. - If `input_features` is an array-like, then `input_features` must match `feature_names_in_` if `feature_names_in_` is defined. Returns ------- feature_names_out : ndarray of str objects Same as input features. """ return _check_feature_names_in(self, input_features) class ClassNamePrefixFeaturesOutMixin: """Mixin class for transformers that generate their own names by prefixing. This mixin is useful when the transformer needs to generate its own feature names out, such as :class:`~decomposition.PCA`. For example, if :class:`~decomposition.PCA` outputs 3 features, then the generated feature names out are: `["pca0", "pca1", "pca2"]`. This mixin assumes that a `_n_features_out` attribute is defined when the transformer is fitted. `_n_features_out` is the number of output features that the transformer will return in `transform` of `fit_transform`. """ def get_feature_names_out(self, input_features=None): """Get output feature names for transformation. The feature names out will prefixed by the lowercased class name. For example, if the transformer outputs 3 features, then the feature names out are: `["class_name0", "class_name1", "class_name2"]`. Parameters ---------- input_features : array-like of str or None, default=None Only used to validate feature names with the names seen in :meth:`fit`. Returns ------- feature_names_out : ndarray of str objects Transformed feature names. """ check_is_fitted(self, "_n_features_out") return _generate_get_feature_names_out( self, self._n_features_out, input_features=input_features ) class DensityMixin: """Mixin class for all density estimators in scikit-learn.""" _estimator_type = "DensityEstimator" def score(self, X, y=None): """Return the score of the model on the data `X`. Parameters ---------- X : array-like of shape (n_samples, n_features) Test samples. y : Ignored Not used, present for API consistency by convention. Returns ------- score : float """ pass class OutlierMixin: """Mixin class for all outlier detection estimators in scikit-learn.""" _estimator_type = "outlier_detector" def fit_predict(self, X, y=None): """Perform fit on X and returns labels for X. Returns -1 for outliers and 1 for inliers. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- y : ndarray of shape (n_samples,) 1 for inliers, -1 for outliers. """ # override for transductive outlier detectors like LocalOulierFactor return self.fit(X).predict(X) class MetaEstimatorMixin: _required_parameters = ["estimator"] """Mixin class for all meta estimators in scikit-learn.""" class MultiOutputMixin: """Mixin to mark estimators that support multioutput.""" def _more_tags(self): return {"multioutput": True} class _UnstableArchMixin: """Mark estimators that are non-determinstic on 32bit or PowerPC""" def _more_tags(self): return { "non_deterministic": ( _IS_32BIT or platform.machine().startswith(("ppc", "powerpc")) ) } def is_classifier(estimator): """Return True if the given estimator is (probably) a classifier. Parameters ---------- estimator : object Estimator object to test. Returns ------- out : bool True if estimator is a classifier and False otherwise. """ return getattr(estimator, "_estimator_type", None) == "classifier" def is_regressor(estimator): """Return True if the given estimator is (probably) a regressor. Parameters ---------- estimator : estimator instance Estimator object to test. Returns ------- out : bool True if estimator is a regressor and False otherwise. """ return getattr(estimator, "_estimator_type", None) == "regressor" def is_outlier_detector(estimator): """Return True if the given estimator is (probably) an outlier detector. Parameters ---------- estimator : estimator instance Estimator object to test. Returns ------- out : bool True if estimator is an outlier detector and False otherwise. """ return getattr(estimator, "_estimator_type", None) == "outlier_detector"