Inzynierka_Gwiazdy/machine_learning/Lib/site-packages/sklearn/tests/test_base.py

# Author: Gael Varoquaux
# License: BSD 3 clause

import re
import numpy as np
import scipy.sparse as sp
import pytest
import warnings

import sklearn
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_no_warnings
from sklearn.utils._testing import ignore_warnings

from sklearn.base import BaseEstimator, clone, is_classifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.utils._set_output import _get_output_config
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn import datasets

from sklearn.base import TransformerMixin
from sklearn.utils._mocking import MockDataFrame
from sklearn import config_context
import pickle


#############################################################################
# A few test classes
class MyEstimator(BaseEstimator):
    def __init__(self, l1=0, empty=None):
        self.l1 = l1
        self.empty = empty


class K(BaseEstimator):
    def __init__(self, c=None, d=None):
        self.c = c
        self.d = d


class T(BaseEstimator):
    def __init__(self, a=None, b=None):
        self.a = a
        self.b = b


class NaNTag(BaseEstimator):
    def _more_tags(self):
        return {"allow_nan": True}


class NoNaNTag(BaseEstimator):
    def _more_tags(self):
        return {"allow_nan": False}


class OverrideTag(NaNTag):
    def _more_tags(self):
        return {"allow_nan": False}


class DiamondOverwriteTag(NaNTag, NoNaNTag):
    def _more_tags(self):
        return dict()


class InheritDiamondOverwriteTag(DiamondOverwriteTag):
    pass


class ModifyInitParams(BaseEstimator):
    """Deprecated behavior.
    Equal parameters but with a type cast.
    Doesn't fulfill a is a
    """

    def __init__(self, a=np.array([0])):
        self.a = a.copy()


class Buggy(BaseEstimator):
    "A buggy estimator that does not set its parameters right."

    def __init__(self, a=None):
        self.a = 1


class NoEstimator:
    def __init__(self):
        pass

    def fit(self, X=None, y=None):
        return self

    def predict(self, X=None):
        return None


class VargEstimator(BaseEstimator):
    """scikit-learn estimators shouldn't have vargs."""

    def __init__(self, *vargs):
        pass


#############################################################################
# The tests


def test_clone():
    # Tests that clone creates a correct deep copy.
    # We create an estimator, make a copy of its original state
    # (which, in this case, is the current state of the estimator),
    # and check that the obtained copy is a correct deep copy.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    new_selector = clone(selector)
    assert selector is not new_selector
    assert selector.get_params() == new_selector.get_params()

    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
    new_selector = clone(selector)
    assert selector is not new_selector


def test_clone_2():
    # Tests that clone doesn't copy everything.
    # We first create an estimator, give it an own attribute, and
    # make a copy of its original state. Then we check that the copy doesn't
    # have the specific attribute we manually added to the initial estimator.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    selector.own_attribute = "test"
    new_selector = clone(selector)
    assert not hasattr(new_selector, "own_attribute")


def test_clone_buggy():
    # Check that clone raises an error on buggy estimators.
    buggy = Buggy()
    buggy.a = 2
    with pytest.raises(RuntimeError):
        clone(buggy)

    no_estimator = NoEstimator()
    with pytest.raises(TypeError):
        clone(no_estimator)

    varg_est = VargEstimator()
    with pytest.raises(RuntimeError):
        clone(varg_est)

    est = ModifyInitParams()
    with pytest.raises(RuntimeError):
        clone(est)


def test_clone_empty_array():
    # Regression test for cloning estimators with empty arrays
    clf = MyEstimator(empty=np.array([]))
    clf2 = clone(clf)
    assert_array_equal(clf.empty, clf2.empty)

    clf = MyEstimator(empty=sp.csr_matrix(np.array([[0]])))
    clf2 = clone(clf)
    assert_array_equal(clf.empty.data, clf2.empty.data)


def test_clone_nan():
    # Regression test for cloning estimators with default parameter as np.nan
    clf = MyEstimator(empty=np.nan)
    clf2 = clone(clf)

    assert clf.empty is clf2.empty


def test_clone_sparse_matrices():
    sparse_matrix_classes = [
        getattr(sp, name) for name in dir(sp) if name.endswith("_matrix")
    ]

    for cls in sparse_matrix_classes:
        sparse_matrix = cls(np.eye(5))
        clf = MyEstimator(empty=sparse_matrix)
        clf_cloned = clone(clf)
        assert clf.empty.__class__ is clf_cloned.empty.__class__
        assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray())


def test_clone_estimator_types():
    # Check that clone works for parameters that are types rather than
    # instances
    clf = MyEstimator(empty=MyEstimator)
    clf2 = clone(clf)

    assert clf.empty is clf2.empty


def test_clone_class_rather_than_instance():
    # Check that clone raises expected error message when
    # cloning class rather than instance
    msg = "You should provide an instance of scikit-learn estimator"
    with pytest.raises(TypeError, match=msg):
        clone(MyEstimator)


def test_repr():
    # Smoke test the repr of the base estimator.
    my_estimator = MyEstimator()
    repr(my_estimator)
    test = T(K(), K())
    assert repr(test) == "T(a=K(), b=K())"

    some_est = T(a=["long_params"] * 1000)
    assert len(repr(some_est)) == 485


def test_str():
    # Smoke test the str of the base estimator
    my_estimator = MyEstimator()
    str(my_estimator)


def test_get_params():
    test = T(K(), K)

    assert "a__d" in test.get_params(deep=True)
    assert "a__d" not in test.get_params(deep=False)

    test.set_params(a__d=2)
    assert test.a.d == 2

    with pytest.raises(ValueError):
        test.set_params(a__a=2)


def test_is_classifier():
    svc = SVC()
    assert is_classifier(svc)
    assert is_classifier(GridSearchCV(svc, {"C": [0.1, 1]}))
    assert is_classifier(Pipeline([("svc", svc)]))
    assert is_classifier(Pipeline([("svc_cv", GridSearchCV(svc, {"C": [0.1, 1]}))]))


def test_set_params():
    # test nested estimator parameter setting
    clf = Pipeline([("svc", SVC())])

    # non-existing parameter in svc
    with pytest.raises(ValueError):
        clf.set_params(svc__stupid_param=True)

    # non-existing parameter of pipeline
    with pytest.raises(ValueError):
        clf.set_params(svm__stupid_param=True)

    # we don't currently catch if the things in pipeline are estimators
    # bad_pipeline = Pipeline([("bad", NoEstimator())])
    # assert_raises(AttributeError, bad_pipeline.set_params,
    #               bad__stupid_param=True)


def test_set_params_passes_all_parameters():
    # Make sure all parameters are passed together to set_params
    # of nested estimator. Regression test for #9944

    class TestDecisionTree(DecisionTreeClassifier):
        def set_params(self, **kwargs):
            super().set_params(**kwargs)
            # expected_kwargs is in test scope
            assert kwargs == expected_kwargs
            return self

    expected_kwargs = {"max_depth": 5, "min_samples_leaf": 2}
    for est in [
        Pipeline([("estimator", TestDecisionTree())]),
        GridSearchCV(TestDecisionTree(), {}),
    ]:
        est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)


def test_set_params_updates_valid_params():
    # Check that set_params tries to set SVC().C, not
    # DecisionTreeClassifier().C
    gscv = GridSearchCV(DecisionTreeClassifier(), {})
    gscv.set_params(estimator=SVC(), estimator__C=42.0)
    assert gscv.estimator.C == 42.0


@pytest.mark.parametrize(
    "tree,dataset",
    [
        (
            DecisionTreeClassifier(max_depth=2, random_state=0),
            datasets.make_classification(random_state=0),
        ),
        (
            DecisionTreeRegressor(max_depth=2, random_state=0),
            datasets.make_regression(random_state=0),
        ),
    ],
)
def test_score_sample_weight(tree, dataset):
    rng = np.random.RandomState(0)
    # check that the score with and without sample weights are different
    X, y = dataset

    tree.fit(X, y)
    # generate random sample weights
    sample_weight = rng.randint(1, 10, size=len(y))
    score_unweighted = tree.score(X, y)
    score_weighted = tree.score(X, y, sample_weight=sample_weight)
    msg = "Unweighted and weighted scores are unexpectedly equal"
    assert score_unweighted != score_weighted, msg


def test_clone_pandas_dataframe():
    class DummyEstimator(TransformerMixin, BaseEstimator):
        """This is a dummy class for generating numerical features

        This feature extractor extracts numerical features from pandas data
        frame.

        Parameters
        ----------

        df: pandas data frame
            The pandas data frame parameter.

        Notes
        -----
        """

        def __init__(self, df=None, scalar_param=1):
            self.df = df
            self.scalar_param = scalar_param

        def fit(self, X, y=None):
            pass

        def transform(self, X):
            pass

    # build and clone estimator
    d = np.arange(10)
    df = MockDataFrame(d)
    e = DummyEstimator(df, scalar_param=1)
    cloned_e = clone(e)

    # the test
    assert (e.df == cloned_e.df).values.all()
    assert e.scalar_param == cloned_e.scalar_param


def test_pickle_version_warning_is_not_raised_with_matching_version():
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert b"version" in tree_pickle
    tree_restored = assert_no_warnings(pickle.loads, tree_pickle)

    # test that we can predict with the restored decision tree classifier
    score_of_original = tree.score(iris.data, iris.target)
    score_of_restored = tree_restored.score(iris.data, iris.target)
    assert score_of_original == score_of_restored


class TreeBadVersion(DecisionTreeClassifier):
    def __getstate__(self):
        return dict(self.__dict__.items(), _sklearn_version="something")


pickle_error_message = (
    "Trying to unpickle estimator {estimator} from "
    "version {old_version} when using version "
    "{current_version}. This might "
    "lead to breaking code or invalid results. "
    "Use at your own risk."
)


def test_pickle_version_warning_is_issued_upon_different_version():
    iris = datasets.load_iris()
    tree = TreeBadVersion().fit(iris.data, iris.target)
    tree_pickle_other = pickle.dumps(tree)
    message = pickle_error_message.format(
        estimator="TreeBadVersion",
        old_version="something",
        current_version=sklearn.__version__,
    )
    with pytest.warns(UserWarning, match=message):
        pickle.loads(tree_pickle_other)


class TreeNoVersion(DecisionTreeClassifier):
    def __getstate__(self):
        return self.__dict__


def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():
    iris = datasets.load_iris()
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert b"version" not in tree_pickle_noversion
    message = pickle_error_message.format(
        estimator="TreeNoVersion",
        old_version="pre-0.18",
        current_version=sklearn.__version__,
    )
    # check we got the warning about using pre-0.18 pickle
    with pytest.warns(UserWarning, match=message):
        pickle.loads(tree_pickle_noversion)


def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
    iris = datasets.load_iris()
    tree = TreeNoVersion().fit(iris.data, iris.target)
    tree_pickle_noversion = pickle.dumps(tree)
    try:
        module_backup = TreeNoVersion.__module__
        TreeNoVersion.__module__ = "notsklearn"
        assert_no_warnings(pickle.loads, tree_pickle_noversion)
    finally:
        TreeNoVersion.__module__ = module_backup


class DontPickleAttributeMixin:
    def __getstate__(self):
        data = self.__dict__.copy()
        data["_attribute_not_pickled"] = None
        return data

    def __setstate__(self, state):
        state["_restored"] = True
        self.__dict__.update(state)


class MultiInheritanceEstimator(DontPickleAttributeMixin, BaseEstimator):
    def __init__(self, attribute_pickled=5):
        self.attribute_pickled = attribute_pickled
        self._attribute_not_pickled = None


def test_pickling_when_getstate_is_overwritten_by_mixin():
    estimator = MultiInheritanceEstimator()
    estimator._attribute_not_pickled = "this attribute should not be pickled"

    serialized = pickle.dumps(estimator)
    estimator_restored = pickle.loads(serialized)
    assert estimator_restored.attribute_pickled == 5
    assert estimator_restored._attribute_not_pickled is None
    assert estimator_restored._restored


def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
    try:
        estimator = MultiInheritanceEstimator()
        text = "this attribute should not be pickled"
        estimator._attribute_not_pickled = text
        old_mod = type(estimator).__module__
        type(estimator).__module__ = "notsklearn"

        serialized = estimator.__getstate__()
        assert serialized == {"_attribute_not_pickled": None, "attribute_pickled": 5}

        serialized["attribute_pickled"] = 4
        estimator.__setstate__(serialized)
        assert estimator.attribute_pickled == 4
        assert estimator._restored
    finally:
        type(estimator).__module__ = old_mod


class SingleInheritanceEstimator(BaseEstimator):
    def __init__(self, attribute_pickled=5):
        self.attribute_pickled = attribute_pickled
        self._attribute_not_pickled = None

    def __getstate__(self):
        data = self.__dict__.copy()
        data["_attribute_not_pickled"] = None
        return data


@ignore_warnings(category=(UserWarning))
def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():
    estimator = SingleInheritanceEstimator()
    estimator._attribute_not_pickled = "this attribute should not be pickled"

    serialized = pickle.dumps(estimator)
    estimator_restored = pickle.loads(serialized)
    assert estimator_restored.attribute_pickled == 5
    assert estimator_restored._attribute_not_pickled is None


def test_tag_inheritance():
    # test that changing tags by inheritance is not allowed

    nan_tag_est = NaNTag()
    no_nan_tag_est = NoNaNTag()
    assert nan_tag_est._get_tags()["allow_nan"]
    assert not no_nan_tag_est._get_tags()["allow_nan"]

    redefine_tags_est = OverrideTag()
    assert not redefine_tags_est._get_tags()["allow_nan"]

    diamond_tag_est = DiamondOverwriteTag()
    assert diamond_tag_est._get_tags()["allow_nan"]

    inherit_diamond_tag_est = InheritDiamondOverwriteTag()
    assert inherit_diamond_tag_est._get_tags()["allow_nan"]


def test_raises_on_get_params_non_attribute():
    class MyEstimator(BaseEstimator):
        def __init__(self, param=5):
            pass

        def fit(self, X, y=None):
            return self

    est = MyEstimator()
    msg = "'MyEstimator' object has no attribute 'param'"

    with pytest.raises(AttributeError, match=msg):
        est.get_params()


def test_repr_mimebundle_():
    # Checks the display configuration flag controls the json output
    tree = DecisionTreeClassifier()
    output = tree._repr_mimebundle_()
    assert "text/plain" in output
    assert "text/html" in output

    with config_context(display="text"):
        output = tree._repr_mimebundle_()
        assert "text/plain" in output
        assert "text/html" not in output


def test_repr_html_wraps():
    # Checks the display configuration flag controls the html output
    tree = DecisionTreeClassifier()

    output = tree._repr_html_()
    assert "<style>" in output

    with config_context(display="text"):
        msg = "_repr_html_ is only defined when"
        with pytest.raises(AttributeError, match=msg):
            output = tree._repr_html_()


def test_n_features_in_validation():
    """Check that `_check_n_features` validates data when reset=False"""
    est = MyEstimator()
    X_train = [[1, 2, 3], [4, 5, 6]]
    est._check_n_features(X_train, reset=True)

    assert est.n_features_in_ == 3

    msg = "X does not contain any features, but MyEstimator is expecting 3 features"
    with pytest.raises(ValueError, match=msg):
        est._check_n_features("invalid X", reset=False)


def test_n_features_in_no_validation():
    """Check that `_check_n_features` does not validate data when
    n_features_in_ is not defined."""
    est = MyEstimator()
    est._check_n_features("invalid X", reset=True)

    assert not hasattr(est, "n_features_in_")

    # does not raise
    est._check_n_features("invalid X", reset=False)


def test_feature_names_in():
    """Check that feature_name_in are recorded by `_validate_data`"""
    pd = pytest.importorskip("pandas")
    iris = datasets.load_iris()
    X_np = iris.data
    df = pd.DataFrame(X_np, columns=iris.feature_names)

    class NoOpTransformer(TransformerMixin, BaseEstimator):
        def fit(self, X, y=None):
            self._validate_data(X)
            return self

        def transform(self, X):
            self._validate_data(X, reset=False)
            return X

    # fit on dataframe saves the feature names
    trans = NoOpTransformer().fit(df)
    assert_array_equal(trans.feature_names_in_, df.columns)

    # fit again but on ndarray does not keep the previous feature names (see #21383)
    trans.fit(X_np)
    assert not hasattr(trans, "feature_names_in_")

    trans.fit(df)
    msg = "The feature names should match those that were passed"
    df_bad = pd.DataFrame(X_np, columns=iris.feature_names[::-1])
    with pytest.raises(ValueError, match=msg):
        trans.transform(df_bad)

    # warns when fitted on dataframe and transforming a ndarray
    msg = (
        "X does not have valid feature names, but NoOpTransformer was "
        "fitted with feature names"
    )
    with pytest.warns(UserWarning, match=msg):
        trans.transform(X_np)

    # warns when fitted on a ndarray and transforming dataframe
    msg = "X has feature names, but NoOpTransformer was fitted without feature names"
    trans = NoOpTransformer().fit(X_np)
    with pytest.warns(UserWarning, match=msg):
        trans.transform(df)

    # fit on dataframe with all integer feature names works without warning
    df_int_names = pd.DataFrame(X_np)
    trans = NoOpTransformer()
    with warnings.catch_warnings():
        warnings.simplefilter("error", UserWarning)
        trans.fit(df_int_names)

    # fit on dataframe with no feature names or all integer feature names
    # -> do not warn on transform
    Xs = [X_np, df_int_names]
    for X in Xs:
        with warnings.catch_warnings():
            warnings.simplefilter("error", UserWarning)
            trans.transform(X)

    # fit on dataframe with feature names that are mixed raises an error:
    df_mixed = pd.DataFrame(X_np, columns=["a", "b", 1, 2])
    trans = NoOpTransformer()
    msg = re.escape(
        "Feature names are only supported if all input features have string names, "
        "but your input has ['int', 'str'] as feature name / column name types. "
        "If you want feature names to be stored and validated, you must convert "
        "them all to strings, by using X.columns = X.columns.astype(str) for "
        "example. Otherwise you can remove feature / column names from your input "
        "data, or convert them all to a non-string data type."
    )
    with pytest.raises(TypeError, match=msg):
        trans.fit(df_mixed)

    # transform on feature names that are mixed also raises:
    with pytest.raises(TypeError, match=msg):
        trans.transform(df_mixed)


def test_clone_keeps_output_config():
    """Check that clone keeps the set_output config."""

    ss = StandardScaler().set_output(transform="pandas")
    config = _get_output_config("transform", ss)

    ss_clone = clone(ss)
    config_clone = _get_output_config("transform", ss_clone)
    assert config == config_clone


class _Empty:
    pass


class EmptyEstimator(_Empty, BaseEstimator):
    pass


@pytest.mark.parametrize("estimator", [BaseEstimator(), EmptyEstimator()])
def test_estimator_empty_instance_dict(estimator):
    """Check that ``__getstate__`` returns an empty ``dict`` with an empty
    instance.

    Python 3.11+ changed behaviour by returning ``None`` instead of raising an
    ``AttributeError``. Non-regression test for gh-25188.
    """
    state = estimator.__getstate__()
    expected = {"_sklearn_version": sklearn.__version__}
    assert state == expected

    # this should not raise
    pickle.loads(pickle.dumps(BaseEstimator()))


def test_estimator_getstate_using_slots_error_message():
    """Using a `BaseEstimator` with `__slots__` is not supported."""

    class WithSlots:
        __slots__ = ("x",)

    class Estimator(BaseEstimator, WithSlots):
        pass

    msg = (
        "You cannot use `__slots__` in objects inheriting from "
        "`sklearn.base.BaseEstimator`"
    )

    with pytest.raises(TypeError, match=msg):
        Estimator().__getstate__()

    with pytest.raises(TypeError, match=msg):
        pickle.dumps(Estimator())
Machine learning 2023-09-20 19:46:58 +02:00			`# Author: Gael Varoquaux`
			`# License: BSD 3 clause`

			`import re`
			`import numpy as np`
			`import scipy.sparse as sp`
			`import pytest`
			`import warnings`

			`import sklearn`
			`from sklearn.utils._testing import assert_array_equal`
			`from sklearn.utils._testing import assert_no_warnings`
			`from sklearn.utils._testing import ignore_warnings`

			`from sklearn.base import BaseEstimator, clone, is_classifier`
			`from sklearn.svm import SVC`
			`from sklearn.preprocessing import StandardScaler`
			`from sklearn.utils._set_output import _get_output_config`
			`from sklearn.pipeline import Pipeline`
			`from sklearn.model_selection import GridSearchCV`

			`from sklearn.tree import DecisionTreeClassifier`
			`from sklearn.tree import DecisionTreeRegressor`
			`from sklearn import datasets`

			`from sklearn.base import TransformerMixin`
			`from sklearn.utils._mocking import MockDataFrame`
			`from sklearn import config_context`
			`import pickle`


			`#############################################################################`
			`# A few test classes`
			`class MyEstimator(BaseEstimator):`
			`def __init__(self, l1=0, empty=None):`
			`self.l1 = l1`
			`self.empty = empty`


			`class K(BaseEstimator):`
			`def __init__(self, c=None, d=None):`
			`self.c = c`
			`self.d = d`


			`class T(BaseEstimator):`
			`def __init__(self, a=None, b=None):`
			`self.a = a`
			`self.b = b`


			`class NaNTag(BaseEstimator):`
			`def _more_tags(self):`
			`return {"allow_nan": True}`


			`class NoNaNTag(BaseEstimator):`
			`def _more_tags(self):`
			`return {"allow_nan": False}`


			`class OverrideTag(NaNTag):`
			`def _more_tags(self):`
			`return {"allow_nan": False}`


			`class DiamondOverwriteTag(NaNTag, NoNaNTag):`
			`def _more_tags(self):`
			`return dict()`


			`class InheritDiamondOverwriteTag(DiamondOverwriteTag):`
			`pass`


			`class ModifyInitParams(BaseEstimator):`
			`"""Deprecated behavior.`
			`Equal parameters but with a type cast.`
			`Doesn't fulfill a is a`
			`"""`

			`def __init__(self, a=np.array([0])):`
			`self.a = a.copy()`


			`class Buggy(BaseEstimator):`
			`"A buggy estimator that does not set its parameters right."`

			`def __init__(self, a=None):`
			`self.a = 1`


			`class NoEstimator:`
			`def __init__(self):`
			`pass`

			`def fit(self, X=None, y=None):`
			`return self`

			`def predict(self, X=None):`
			`return None`


			`class VargEstimator(BaseEstimator):`
			`"""scikit-learn estimators shouldn't have vargs."""`

			`def __init__(self, *vargs):`
			`pass`


			`#############################################################################`
			`# The tests`


			`def test_clone():`
			`# Tests that clone creates a correct deep copy.`
			`# We create an estimator, make a copy of its original state`
			`# (which, in this case, is the current state of the estimator),`
			`# and check that the obtained copy is a correct deep copy.`

			`from sklearn.feature_selection import SelectFpr, f_classif`

			`selector = SelectFpr(f_classif, alpha=0.1)`
			`new_selector = clone(selector)`
			`assert selector is not new_selector`
			`assert selector.get_params() == new_selector.get_params()`

			`selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))`
			`new_selector = clone(selector)`
			`assert selector is not new_selector`


			`def test_clone_2():`
			`# Tests that clone doesn't copy everything.`
			`# We first create an estimator, give it an own attribute, and`
			`# make a copy of its original state. Then we check that the copy doesn't`
			`# have the specific attribute we manually added to the initial estimator.`

			`from sklearn.feature_selection import SelectFpr, f_classif`

			`selector = SelectFpr(f_classif, alpha=0.1)`
			`selector.own_attribute = "test"`
			`new_selector = clone(selector)`
			`assert not hasattr(new_selector, "own_attribute")`


			`def test_clone_buggy():`
			`# Check that clone raises an error on buggy estimators.`
			`buggy = Buggy()`
			`buggy.a = 2`
			`with pytest.raises(RuntimeError):`
			`clone(buggy)`

			`no_estimator = NoEstimator()`
			`with pytest.raises(TypeError):`
			`clone(no_estimator)`

			`varg_est = VargEstimator()`
			`with pytest.raises(RuntimeError):`
			`clone(varg_est)`

			`est = ModifyInitParams()`
			`with pytest.raises(RuntimeError):`
			`clone(est)`


			`def test_clone_empty_array():`
			`# Regression test for cloning estimators with empty arrays`
			`clf = MyEstimator(empty=np.array([]))`
			`clf2 = clone(clf)`
			`assert_array_equal(clf.empty, clf2.empty)`

			`clf = MyEstimator(empty=sp.csr_matrix(np.array([[0]])))`
			`clf2 = clone(clf)`
			`assert_array_equal(clf.empty.data, clf2.empty.data)`


			`def test_clone_nan():`
			`# Regression test for cloning estimators with default parameter as np.nan`
			`clf = MyEstimator(empty=np.nan)`
			`clf2 = clone(clf)`

			`assert clf.empty is clf2.empty`


			`def test_clone_sparse_matrices():`
			`sparse_matrix_classes = [`
			`getattr(sp, name) for name in dir(sp) if name.endswith("_matrix")`
			`]`

			`for cls in sparse_matrix_classes:`
			`sparse_matrix = cls(np.eye(5))`
			`clf = MyEstimator(empty=sparse_matrix)`
			`clf_cloned = clone(clf)`
			`assert clf.empty.__class__ is clf_cloned.empty.__class__`
			`assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray())`


			`def test_clone_estimator_types():`
			`# Check that clone works for parameters that are types rather than`
			`# instances`
			`clf = MyEstimator(empty=MyEstimator)`
			`clf2 = clone(clf)`

			`assert clf.empty is clf2.empty`


			`def test_clone_class_rather_than_instance():`
			`# Check that clone raises expected error message when`
			`# cloning class rather than instance`
			`msg = "You should provide an instance of scikit-learn estimator"`
			`with pytest.raises(TypeError, match=msg):`
			`clone(MyEstimator)`


			`def test_repr():`
			`# Smoke test the repr of the base estimator.`
			`my_estimator = MyEstimator()`
			`repr(my_estimator)`
			`test = T(K(), K())`
			`assert repr(test) == "T(a=K(), b=K())"`

			`some_est = T(a=["long_params"] * 1000)`
			`assert len(repr(some_est)) == 485`


			`def test_str():`
			`# Smoke test the str of the base estimator`
			`my_estimator = MyEstimator()`
			`str(my_estimator)`


			`def test_get_params():`
			`test = T(K(), K)`

			`assert "a__d" in test.get_params(deep=True)`
			`assert "a__d" not in test.get_params(deep=False)`

			`test.set_params(a__d=2)`
			`assert test.a.d == 2`

			`with pytest.raises(ValueError):`
			`test.set_params(a__a=2)`


			`def test_is_classifier():`
			`svc = SVC()`
			`assert is_classifier(svc)`
			`assert is_classifier(GridSearchCV(svc, {"C": [0.1, 1]}))`
			`assert is_classifier(Pipeline([("svc", svc)]))`
			`assert is_classifier(Pipeline([("svc_cv", GridSearchCV(svc, {"C": [0.1, 1]}))]))`


			`def test_set_params():`
			`# test nested estimator parameter setting`
			`clf = Pipeline([("svc", SVC())])`

			`# non-existing parameter in svc`
			`with pytest.raises(ValueError):`
			`clf.set_params(svc__stupid_param=True)`

			`# non-existing parameter of pipeline`
			`with pytest.raises(ValueError):`
			`clf.set_params(svm__stupid_param=True)`

			`# we don't currently catch if the things in pipeline are estimators`
			`# bad_pipeline = Pipeline([("bad", NoEstimator())])`
			`# assert_raises(AttributeError, bad_pipeline.set_params,`
			`# bad__stupid_param=True)`


			`def test_set_params_passes_all_parameters():`
			`# Make sure all parameters are passed together to set_params`
			`# of nested estimator. Regression test for #9944`

			`class TestDecisionTree(DecisionTreeClassifier):`
			`def set_params(self, **kwargs):`
			`super().set_params(**kwargs)`
			`# expected_kwargs is in test scope`
			`assert kwargs == expected_kwargs`
			`return self`

			`expected_kwargs = {"max_depth": 5, "min_samples_leaf": 2}`
			`for est in [`
			`Pipeline([("estimator", TestDecisionTree())]),`
			`GridSearchCV(TestDecisionTree(), {}),`
			`]:`
			`est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)`


			`def test_set_params_updates_valid_params():`
			`# Check that set_params tries to set SVC().C, not`
			`# DecisionTreeClassifier().C`
			`gscv = GridSearchCV(DecisionTreeClassifier(), {})`
			`gscv.set_params(estimator=SVC(), estimator__C=42.0)`
			`assert gscv.estimator.C == 42.0`


			`@pytest.mark.parametrize(`
			`"tree,dataset",`
			`[`
			`(`
			`DecisionTreeClassifier(max_depth=2, random_state=0),`
			`datasets.make_classification(random_state=0),`
			`),`
			`(`
			`DecisionTreeRegressor(max_depth=2, random_state=0),`
			`datasets.make_regression(random_state=0),`
			`),`
			`],`
			`)`
			`def test_score_sample_weight(tree, dataset):`
			`rng = np.random.RandomState(0)`
			`# check that the score with and without sample weights are different`
			`X, y = dataset`

			`tree.fit(X, y)`
			`# generate random sample weights`
			`sample_weight = rng.randint(1, 10, size=len(y))`
			`score_unweighted = tree.score(X, y)`
			`score_weighted = tree.score(X, y, sample_weight=sample_weight)`
			`msg = "Unweighted and weighted scores are unexpectedly equal"`
			`assert score_unweighted != score_weighted, msg`


			`def test_clone_pandas_dataframe():`
			`class DummyEstimator(TransformerMixin, BaseEstimator):`
			`"""This is a dummy class for generating numerical features`

			`This feature extractor extracts numerical features from pandas data`
			`frame.`

			`Parameters`
			`----------`

			`df: pandas data frame`
			`The pandas data frame parameter.`

			`Notes`
			`-----`
			`"""`

			`def __init__(self, df=None, scalar_param=1):`
			`self.df = df`
			`self.scalar_param = scalar_param`

			`def fit(self, X, y=None):`
			`pass`

			`def transform(self, X):`
			`pass`

			`# build and clone estimator`
			`d = np.arange(10)`
			`df = MockDataFrame(d)`
			`e = DummyEstimator(df, scalar_param=1)`
			`cloned_e = clone(e)`

			`# the test`
			`assert (e.df == cloned_e.df).values.all()`
			`assert e.scalar_param == cloned_e.scalar_param`


			`def test_pickle_version_warning_is_not_raised_with_matching_version():`
			`iris = datasets.load_iris()`
			`tree = DecisionTreeClassifier().fit(iris.data, iris.target)`
			`tree_pickle = pickle.dumps(tree)`
			`assert b"version" in tree_pickle`
			`tree_restored = assert_no_warnings(pickle.loads, tree_pickle)`

			`# test that we can predict with the restored decision tree classifier`
			`score_of_original = tree.score(iris.data, iris.target)`
			`score_of_restored = tree_restored.score(iris.data, iris.target)`
			`assert score_of_original == score_of_restored`


			`class TreeBadVersion(DecisionTreeClassifier):`
			`def __getstate__(self):`
			`return dict(self.__dict__.items(), _sklearn_version="something")`


			`pickle_error_message = (`
			`"Trying to unpickle estimator {estimator} from "`
			`"version {old_version} when using version "`
			`"{current_version}. This might "`
			`"lead to breaking code or invalid results. "`
			`"Use at your own risk."`
			`)`


			`def test_pickle_version_warning_is_issued_upon_different_version():`
			`iris = datasets.load_iris()`
			`tree = TreeBadVersion().fit(iris.data, iris.target)`
			`tree_pickle_other = pickle.dumps(tree)`
			`message = pickle_error_message.format(`
			`estimator="TreeBadVersion",`
			`old_version="something",`
			`current_version=sklearn.__version__,`
			`)`
			`with pytest.warns(UserWarning, match=message):`
			`pickle.loads(tree_pickle_other)`


			`class TreeNoVersion(DecisionTreeClassifier):`
			`def __getstate__(self):`
			`return self.__dict__`


			`def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():`
			`iris = datasets.load_iris()`
			`# TreeNoVersion has no getstate, like pre-0.18`
			`tree = TreeNoVersion().fit(iris.data, iris.target)`

			`tree_pickle_noversion = pickle.dumps(tree)`
			`assert b"version" not in tree_pickle_noversion`
			`message = pickle_error_message.format(`
			`estimator="TreeNoVersion",`
			`old_version="pre-0.18",`
			`current_version=sklearn.__version__,`
			`)`
			`# check we got the warning about using pre-0.18 pickle`
			`with pytest.warns(UserWarning, match=message):`
			`pickle.loads(tree_pickle_noversion)`


			`def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():`
			`iris = datasets.load_iris()`
			`tree = TreeNoVersion().fit(iris.data, iris.target)`
			`tree_pickle_noversion = pickle.dumps(tree)`
			`try:`
			`module_backup = TreeNoVersion.__module__`
			`TreeNoVersion.__module__ = "notsklearn"`
			`assert_no_warnings(pickle.loads, tree_pickle_noversion)`
			`finally:`
			`TreeNoVersion.__module__ = module_backup`


			`class DontPickleAttributeMixin:`
			`def __getstate__(self):`
			`data = self.__dict__.copy()`
			`data["_attribute_not_pickled"] = None`
			`return data`

			`def __setstate__(self, state):`
			`state["_restored"] = True`
			`self.__dict__.update(state)`


			`class MultiInheritanceEstimator(DontPickleAttributeMixin, BaseEstimator):`
			`def __init__(self, attribute_pickled=5):`
			`self.attribute_pickled = attribute_pickled`
			`self._attribute_not_pickled = None`


			`def test_pickling_when_getstate_is_overwritten_by_mixin():`
			`estimator = MultiInheritanceEstimator()`
			`estimator._attribute_not_pickled = "this attribute should not be pickled"`

			`serialized = pickle.dumps(estimator)`
			`estimator_restored = pickle.loads(serialized)`
			`assert estimator_restored.attribute_pickled == 5`
			`assert estimator_restored._attribute_not_pickled is None`
			`assert estimator_restored._restored`


			`def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():`
			`try:`
			`estimator = MultiInheritanceEstimator()`
			`text = "this attribute should not be pickled"`
			`estimator._attribute_not_pickled = text`
			`old_mod = type(estimator).__module__`
			`type(estimator).__module__ = "notsklearn"`

			`serialized = estimator.__getstate__()`
			`assert serialized == {"_attribute_not_pickled": None, "attribute_pickled": 5}`

			`serialized["attribute_pickled"] = 4`
			`estimator.__setstate__(serialized)`
			`assert estimator.attribute_pickled == 4`
			`assert estimator._restored`
			`finally:`
			`type(estimator).__module__ = old_mod`


			`class SingleInheritanceEstimator(BaseEstimator):`
			`def __init__(self, attribute_pickled=5):`
			`self.attribute_pickled = attribute_pickled`
			`self._attribute_not_pickled = None`

			`def __getstate__(self):`
			`data = self.__dict__.copy()`
			`data["_attribute_not_pickled"] = None`
			`return data`


			`@ignore_warnings(category=(UserWarning))`
			`def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():`
			`estimator = SingleInheritanceEstimator()`
			`estimator._attribute_not_pickled = "this attribute should not be pickled"`

			`serialized = pickle.dumps(estimator)`
			`estimator_restored = pickle.loads(serialized)`
			`assert estimator_restored.attribute_pickled == 5`
			`assert estimator_restored._attribute_not_pickled is None`


			`def test_tag_inheritance():`
			`# test that changing tags by inheritance is not allowed`

			`nan_tag_est = NaNTag()`
			`no_nan_tag_est = NoNaNTag()`
			`assert nan_tag_est._get_tags()["allow_nan"]`
			`assert not no_nan_tag_est._get_tags()["allow_nan"]`

			`redefine_tags_est = OverrideTag()`
			`assert not redefine_tags_est._get_tags()["allow_nan"]`

			`diamond_tag_est = DiamondOverwriteTag()`
			`assert diamond_tag_est._get_tags()["allow_nan"]`

			`inherit_diamond_tag_est = InheritDiamondOverwriteTag()`
			`assert inherit_diamond_tag_est._get_tags()["allow_nan"]`


			`def test_raises_on_get_params_non_attribute():`
			`class MyEstimator(BaseEstimator):`
			`def __init__(self, param=5):`
			`pass`

			`def fit(self, X, y=None):`
			`return self`

			`est = MyEstimator()`
			`msg = "'MyEstimator' object has no attribute 'param'"`

			`with pytest.raises(AttributeError, match=msg):`
			`est.get_params()`


			`def test_repr_mimebundle_():`
			`# Checks the display configuration flag controls the json output`
			`tree = DecisionTreeClassifier()`
			`output = tree._repr_mimebundle_()`
			`assert "text/plain" in output`
			`assert "text/html" in output`

			`with config_context(display="text"):`
			`output = tree._repr_mimebundle_()`
			`assert "text/plain" in output`
			`assert "text/html" not in output`


			`def test_repr_html_wraps():`
			`# Checks the display configuration flag controls the html output`
			`tree = DecisionTreeClassifier()`

			`output = tree._repr_html_()`
			`assert "<style>" in output`

			`with config_context(display="text"):`
			`msg = "_repr_html_ is only defined when"`
			`with pytest.raises(AttributeError, match=msg):`
			`output = tree._repr_html_()`


			`def test_n_features_in_validation():`
			"""Check that `_check_n_features` validates data when reset=False"""
			`est = MyEstimator()`
			`X_train = [[1, 2, 3], [4, 5, 6]]`
			`est._check_n_features(X_train, reset=True)`

			`assert est.n_features_in_ == 3`

			`msg = "X does not contain any features, but MyEstimator is expecting 3 features"`
			`with pytest.raises(ValueError, match=msg):`
			`est._check_n_features("invalid X", reset=False)`


			`def test_n_features_in_no_validation():`
			"""Check that `_check_n_features` does not validate data when
			`n_features_in_ is not defined."""`
			`est = MyEstimator()`
			`est._check_n_features("invalid X", reset=True)`

			`assert not hasattr(est, "n_features_in_")`

			`# does not raise`
			`est._check_n_features("invalid X", reset=False)`


			`def test_feature_names_in():`
			"""Check that feature_name_in are recorded by `_validate_data`"""
			`pd = pytest.importorskip("pandas")`
			`iris = datasets.load_iris()`
			`X_np = iris.data`
			`df = pd.DataFrame(X_np, columns=iris.feature_names)`

			`class NoOpTransformer(TransformerMixin, BaseEstimator):`
			`def fit(self, X, y=None):`
			`self._validate_data(X)`
			`return self`

			`def transform(self, X):`
			`self._validate_data(X, reset=False)`
			`return X`

			`# fit on dataframe saves the feature names`
			`trans = NoOpTransformer().fit(df)`
			`assert_array_equal(trans.feature_names_in_, df.columns)`

			`# fit again but on ndarray does not keep the previous feature names (see #21383)`
			`trans.fit(X_np)`
			`assert not hasattr(trans, "feature_names_in_")`

			`trans.fit(df)`
			`msg = "The feature names should match those that were passed"`
			`df_bad = pd.DataFrame(X_np, columns=iris.feature_names[::-1])`
			`with pytest.raises(ValueError, match=msg):`
			`trans.transform(df_bad)`

			`# warns when fitted on dataframe and transforming a ndarray`
			`msg = (`
			`"X does not have valid feature names, but NoOpTransformer was "`
			`"fitted with feature names"`
			`)`
			`with pytest.warns(UserWarning, match=msg):`
			`trans.transform(X_np)`

			`# warns when fitted on a ndarray and transforming dataframe`
			`msg = "X has feature names, but NoOpTransformer was fitted without feature names"`
			`trans = NoOpTransformer().fit(X_np)`
			`with pytest.warns(UserWarning, match=msg):`
			`trans.transform(df)`

			`# fit on dataframe with all integer feature names works without warning`
			`df_int_names = pd.DataFrame(X_np)`
			`trans = NoOpTransformer()`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", UserWarning)`
			`trans.fit(df_int_names)`

			`# fit on dataframe with no feature names or all integer feature names`
			`# -> do not warn on transform`
			`Xs = [X_np, df_int_names]`
			`for X in Xs:`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", UserWarning)`
			`trans.transform(X)`

			`# fit on dataframe with feature names that are mixed raises an error:`
			`df_mixed = pd.DataFrame(X_np, columns=["a", "b", 1, 2])`
			`trans = NoOpTransformer()`
			`msg = re.escape(`
			`"Feature names are only supported if all input features have string names, "`
			`"but your input has ['int', 'str'] as feature name / column name types. "`
			`"If you want feature names to be stored and validated, you must convert "`
			`"them all to strings, by using X.columns = X.columns.astype(str) for "`
			`"example. Otherwise you can remove feature / column names from your input "`
			`"data, or convert them all to a non-string data type."`
			`)`
			`with pytest.raises(TypeError, match=msg):`
			`trans.fit(df_mixed)`

			`# transform on feature names that are mixed also raises:`
			`with pytest.raises(TypeError, match=msg):`
			`trans.transform(df_mixed)`


			`def test_clone_keeps_output_config():`
			`"""Check that clone keeps the set_output config."""`

			`ss = StandardScaler().set_output(transform="pandas")`
			`config = _get_output_config("transform", ss)`

			`ss_clone = clone(ss)`
			`config_clone = _get_output_config("transform", ss_clone)`
			`assert config == config_clone`


			`class _Empty:`
			`pass`


			`class EmptyEstimator(_Empty, BaseEstimator):`
			`pass`


			`@pytest.mark.parametrize("estimator", [BaseEstimator(), EmptyEstimator()])`
			`def test_estimator_empty_instance_dict(estimator):`
			"""Check that ``__getstate__`` returns an empty ``dict`` with an empty
			`instance.`

			Python 3.11+ changed behaviour by returning ``None`` instead of raising an
			``AttributeError``. Non-regression test for gh-25188.
			`"""`
			`state = estimator.__getstate__()`
			`expected = {"_sklearn_version": sklearn.__version__}`
			`assert state == expected`

			`# this should not raise`
			`pickle.loads(pickle.dumps(BaseEstimator()))`


			`def test_estimator_getstate_using_slots_error_message():`
			"""Using a `BaseEstimator` with `__slots__` is not supported."""

			`class WithSlots:`
			`__slots__ = ("x",)`

			`class Estimator(BaseEstimator, WithSlots):`
			`pass`

			`msg = (`
			"You cannot use `__slots__` in objects inheriting from "
			"`sklearn.base.BaseEstimator`"
			`)`

			`with pytest.raises(TypeError, match=msg):`
			`Estimator().__getstate__()`

			`with pytest.raises(TypeError, match=msg):`
			`pickle.dumps(Estimator())`