projektAI/venv/Lib/site-packages/sklearn/tests/test_common.py

"""
General tests for all estimators in sklearn.
"""

# Authors: Andreas Mueller <amueller@ais.uni-bonn.de>
#          Gael Varoquaux gael.varoquaux@normalesup.org
# License: BSD 3 clause

import os
import warnings
import sys
import re
import pkgutil
from inspect import isgenerator
from functools import partial

import pytest

from sklearn.utils import all_estimators
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.exceptions import FitFailedWarning
from sklearn.utils.estimator_checks import check_estimator

import sklearn
from sklearn.base import BiclusterMixin

from sklearn.decomposition import PCA
from sklearn.linear_model._base import LinearClassifierMixin
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import make_pipeline

from sklearn.utils import IS_PYPY
from sklearn.utils._testing import SkipTest
from sklearn.utils.estimator_checks import (
    _construct_instance,
    _set_checking_parameters,
    _get_check_estimator_ids,
    check_class_weight_balanced_linear_classifier,
    parametrize_with_checks,
    check_n_features_in_after_fitting,
)


def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert not name.lower().startswith('base'), msg


def _sample_func(x, y=1):
    pass


@pytest.mark.parametrize("val, expected", [
    (partial(_sample_func, y=1), "_sample_func(y=1)"),
    (_sample_func, "_sample_func"),
    (partial(_sample_func, 'world'), "_sample_func"),
    (LogisticRegression(C=2.0), "LogisticRegression(C=2.0)"),
    (LogisticRegression(random_state=1, solver='newton-cg',
                        class_weight='balanced', warm_start=True),
     "LogisticRegression(class_weight='balanced',random_state=1,"
     "solver='newton-cg',warm_start=True)")
])
def test_get_check_estimator_ids(val, expected):
    assert _get_check_estimator_ids(val) == expected


def _tested_estimators():
    for name, Estimator in all_estimators():
        if issubclass(Estimator, BiclusterMixin):
            continue
        try:
            estimator = _construct_instance(Estimator)
        except SkipTest:
            continue

        yield estimator


@parametrize_with_checks(list(_tested_estimators()))
def test_estimators(estimator, check, request):
    # Common tests for estimator instances
    with ignore_warnings(category=(FutureWarning,
                                   ConvergenceWarning,
                                   UserWarning, FutureWarning)):
        _set_checking_parameters(estimator)
        check(estimator)


def test_check_estimator_generate_only():
    all_instance_gen_checks = check_estimator(LogisticRegression(),
                                              generate_only=True)
    assert isgenerator(all_instance_gen_checks)


@ignore_warnings(category=(DeprecationWarning, FutureWarning))
# ignore deprecated open(.., 'U') in numpy distutils
def test_configure():
    # Smoke test the 'configure' step of setup, this tests all the
    # 'configure' functions in the setup.pys in scikit-learn
    # This test requires Cython which is not necessarily there when running
    # the tests of an installed version of scikit-learn or when scikit-learn
    # is installed in editable mode by pip build isolation enabled.
    pytest.importorskip("Cython")
    cwd = os.getcwd()
    setup_path = os.path.abspath(os.path.join(sklearn.__path__[0], '..'))
    setup_filename = os.path.join(setup_path, 'setup.py')
    if not os.path.exists(setup_filename):
        pytest.skip('setup.py not available')
    # XXX unreached code as of v0.22
    try:
        os.chdir(setup_path)
        old_argv = sys.argv
        sys.argv = ['setup.py', 'config']

        with warnings.catch_warnings():
            # The configuration spits out warnings when not finding
            # Blas/Atlas development headers
            warnings.simplefilter('ignore', UserWarning)
            with open('setup.py') as f:
                exec(f.read(), dict(__name__='__main__'))
    finally:
        sys.argv = old_argv
        os.chdir(cwd)


def _tested_linear_classifiers():
    classifiers = all_estimators(type_filter='classifier')

    with warnings.catch_warnings(record=True):
        for name, clazz in classifiers:
            required_parameters = getattr(clazz, "_required_parameters", [])
            if len(required_parameters):
                # FIXME
                continue

            if ('class_weight' in clazz().get_params().keys() and
                    issubclass(clazz, LinearClassifierMixin)):
                yield name, clazz


@pytest.mark.parametrize("name, Classifier",
                         _tested_linear_classifiers())
def test_class_weight_balanced_linear_classifiers(name, Classifier):
    check_class_weight_balanced_linear_classifier(name, Classifier)


@ignore_warnings
def test_import_all_consistency():
    # Smoke test to check that any name in a __all__ list is actually defined
    # in the namespace of the module or package.
    pkgs = pkgutil.walk_packages(path=sklearn.__path__, prefix='sklearn.',
                                 onerror=lambda _: None)
    submods = [modname for _, modname, _ in pkgs]
    for modname in submods + ['sklearn']:
        if ".tests." in modname:
            continue
        if IS_PYPY and ('_svmlight_format_io' in modname or
                        'feature_extraction._hashing_fast' in modname):
            continue
        package = __import__(modname, fromlist="dummy")
        for name in getattr(package, '__all__', ()):
            assert hasattr(package, name),\
                "Module '{0}' has no attribute '{1}'".format(modname, name)


def test_root_import_all_completeness():
    EXCEPTIONS = ('utils', 'tests', 'base', 'setup', 'conftest')
    for _, modname, _ in pkgutil.walk_packages(path=sklearn.__path__,
                                               onerror=lambda _: None):
        if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS:
            continue
        assert modname in sklearn.__all__


def test_all_tests_are_importable():
    # Ensure that for each contentful subpackage, there is a test directory
    # within it that is also a subpackage (i.e. a directory with __init__.py)

    HAS_TESTS_EXCEPTIONS = re.compile(r'''(?x)
                                      \.externals(\.|$)|
                                      \.tests(\.|$)|
                                      \._
                                      ''')
    lookup = {name: ispkg
              for _, name, ispkg
              in pkgutil.walk_packages(sklearn.__path__, prefix='sklearn.')}
    missing_tests = [name for name, ispkg in lookup.items()
                     if ispkg
                     and not HAS_TESTS_EXCEPTIONS.search(name)
                     and name + '.tests' not in lookup]
    assert missing_tests == [], ('{0} do not have `tests` subpackages. '
                                 'Perhaps they require '
                                 '__init__.py or an add_subpackage directive '
                                 'in the parent '
                                 'setup.py'.format(missing_tests))


def test_class_support_removed():
    # Make sure passing classes to check_estimator or parametrize_with_checks
    # raises an error

    msg = "Passing a class was deprecated.* isn't supported anymore"
    with pytest.raises(TypeError, match=msg):
        check_estimator(LogisticRegression)

    with pytest.raises(TypeError, match=msg):
        parametrize_with_checks([LogisticRegression])


def _generate_search_cv_instances():
    for SearchCV, (Estimator, param_grid) in zip(
        [GridSearchCV, RandomizedSearchCV],
        [
            (Ridge, {"alpha": [0.1, 1.0]}),
            (LogisticRegression, {"C": [0.1, 1.0]}),
        ],
    ):
        yield SearchCV(Estimator(), param_grid)

    for SearchCV, (Estimator, param_grid) in zip(
        [GridSearchCV, RandomizedSearchCV],
        [
            (Ridge, {"ridge__alpha": [0.1, 1.0]}),
            (LogisticRegression, {"logisticregression__C": [0.1, 1.0]}),
        ],
    ):
        yield SearchCV(
            make_pipeline(PCA(), Estimator()), param_grid
        ).set_params(error_score="raise")


@parametrize_with_checks(list(_generate_search_cv_instances()))
def test_search_cv(estimator, check, request):
    # Common tests for SearchCV instances
    # We have a separate test because those meta-estimators can accept a
    # wide range of base estimators (classifiers, regressors, pipelines)
    with ignore_warnings(
        category=(
            FutureWarning,
            ConvergenceWarning,
            UserWarning,
            FutureWarning,
            FitFailedWarning,
        )
    ):
        check(estimator)


# TODO: When more modules get added, we can remove it from this list to make
# sure it gets tested. After we finish each module we can move the checks
# into sklearn.utils.estimator_checks.check_n_features_in.
#
# check_estimators_partial_fit_n_features can either be removed or updated
# with the two more assertions:
# 1. `n_features_in_` is set during the first call to `partial_fit`.
# 2. More strict when it comes to the error message.
#
# check_classifiers_train would need to be updated with the error message
N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = {
    'calibration',
    'compose',
    'covariance',
    'cross_decomposition',
    'discriminant_analysis',
    'ensemble',
    'feature_extraction',
    'feature_selection',
    'gaussian_process',
    'isotonic',
    'linear_model',
    'manifold',
    'mixture',
    'model_selection',
    'multiclass',
    'multioutput',
    'naive_bayes',
    'neighbors',
    'pipeline',
    'random_projection',
    'semi_supervised',
    'svm',
}

N_FEATURES_IN_AFTER_FIT_ESTIMATORS = [
    est for est in _tested_estimators() if est.__module__.split('.')[1] not in
    N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE
]


@pytest.mark.parametrize("estimator", N_FEATURES_IN_AFTER_FIT_ESTIMATORS,
                         ids=_get_check_estimator_ids)
def test_check_n_features_in_after_fitting(estimator):
    _set_checking_parameters(estimator)
    check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)
Działa 2021-06-06 22:13:05 +02:00			`"""`
			`General tests for all estimators in sklearn.`
			`"""`

			`# Authors: Andreas Mueller <amueller@ais.uni-bonn.de>`
			`# Gael Varoquaux gael.varoquaux@normalesup.org`
			`# License: BSD 3 clause`

			`import os`
			`import warnings`
			`import sys`
			`import re`
			`import pkgutil`
			`from inspect import isgenerator`
			`from functools import partial`

			`import pytest`

			`from sklearn.utils import all_estimators`
			`from sklearn.utils._testing import ignore_warnings`
			`from sklearn.exceptions import ConvergenceWarning`
			`from sklearn.exceptions import FitFailedWarning`
			`from sklearn.utils.estimator_checks import check_estimator`

			`import sklearn`
			`from sklearn.base import BiclusterMixin`

			`from sklearn.decomposition import PCA`
			`from sklearn.linear_model._base import LinearClassifierMixin`
			`from sklearn.linear_model import LogisticRegression`
			`from sklearn.linear_model import Ridge`
			`from sklearn.model_selection import GridSearchCV`
			`from sklearn.model_selection import RandomizedSearchCV`
			`from sklearn.pipeline import make_pipeline`

			`from sklearn.utils import IS_PYPY`
			`from sklearn.utils._testing import SkipTest`
			`from sklearn.utils.estimator_checks import (`
			`_construct_instance,`
			`_set_checking_parameters,`
			`_get_check_estimator_ids,`
			`check_class_weight_balanced_linear_classifier,`
			`parametrize_with_checks,`
			`check_n_features_in_after_fitting,`
			`)`


			`def test_all_estimator_no_base_class():`
			`# test that all_estimators doesn't find abstract classes.`
			`for name, Estimator in all_estimators():`
			`msg = ("Base estimators such as {0} should not be included"`
			`" in all_estimators").format(name)`
			`assert not name.lower().startswith('base'), msg`


			`def _sample_func(x, y=1):`
			`pass`


			`@pytest.mark.parametrize("val, expected", [`
			`(partial(_sample_func, y=1), "_sample_func(y=1)"),`
			`(_sample_func, "_sample_func"),`
			`(partial(_sample_func, 'world'), "_sample_func"),`
			`(LogisticRegression(C=2.0), "LogisticRegression(C=2.0)"),`
			`(LogisticRegression(random_state=1, solver='newton-cg',`
			`class_weight='balanced', warm_start=True),`
			`"LogisticRegression(class_weight='balanced',random_state=1,"`
			`"solver='newton-cg',warm_start=True)")`
			`])`
			`def test_get_check_estimator_ids(val, expected):`
			`assert _get_check_estimator_ids(val) == expected`


			`def _tested_estimators():`
			`for name, Estimator in all_estimators():`
			`if issubclass(Estimator, BiclusterMixin):`
			`continue`
			`try:`
			`estimator = _construct_instance(Estimator)`
			`except SkipTest:`
			`continue`

			`yield estimator`


			`@parametrize_with_checks(list(_tested_estimators()))`
			`def test_estimators(estimator, check, request):`
			`# Common tests for estimator instances`
			`with ignore_warnings(category=(FutureWarning,`
			`ConvergenceWarning,`
			`UserWarning, FutureWarning)):`
			`_set_checking_parameters(estimator)`
			`check(estimator)`


			`def test_check_estimator_generate_only():`
			`all_instance_gen_checks = check_estimator(LogisticRegression(),`
			`generate_only=True)`
			`assert isgenerator(all_instance_gen_checks)`


			`@ignore_warnings(category=(DeprecationWarning, FutureWarning))`
			`# ignore deprecated open(.., 'U') in numpy distutils`
			`def test_configure():`
			`# Smoke test the 'configure' step of setup, this tests all the`
			`# 'configure' functions in the setup.pys in scikit-learn`
			`# This test requires Cython which is not necessarily there when running`
			`# the tests of an installed version of scikit-learn or when scikit-learn`
			`# is installed in editable mode by pip build isolation enabled.`
			`pytest.importorskip("Cython")`
			`cwd = os.getcwd()`
			`setup_path = os.path.abspath(os.path.join(sklearn.__path__[0], '..'))`
			`setup_filename = os.path.join(setup_path, 'setup.py')`
			`if not os.path.exists(setup_filename):`
			`pytest.skip('setup.py not available')`
			`# XXX unreached code as of v0.22`
			`try:`
			`os.chdir(setup_path)`
			`old_argv = sys.argv`
			`sys.argv = ['setup.py', 'config']`

			`with warnings.catch_warnings():`
			`# The configuration spits out warnings when not finding`
			`# Blas/Atlas development headers`
			`warnings.simplefilter('ignore', UserWarning)`
			`with open('setup.py') as f:`
			`exec(f.read(), dict(__name__='__main__'))`
			`finally:`
			`sys.argv = old_argv`
			`os.chdir(cwd)`


			`def _tested_linear_classifiers():`
			`classifiers = all_estimators(type_filter='classifier')`

			`with warnings.catch_warnings(record=True):`
			`for name, clazz in classifiers:`
			`required_parameters = getattr(clazz, "_required_parameters", [])`
			`if len(required_parameters):`
			`# FIXME`
			`continue`

			`if ('class_weight' in clazz().get_params().keys() and`
			`issubclass(clazz, LinearClassifierMixin)):`
			`yield name, clazz`


			`@pytest.mark.parametrize("name, Classifier",`
			`_tested_linear_classifiers())`
			`def test_class_weight_balanced_linear_classifiers(name, Classifier):`
			`check_class_weight_balanced_linear_classifier(name, Classifier)`


			`@ignore_warnings`
			`def test_import_all_consistency():`
			`# Smoke test to check that any name in a __all__ list is actually defined`
			`# in the namespace of the module or package.`
			`pkgs = pkgutil.walk_packages(path=sklearn.__path__, prefix='sklearn.',`
			`onerror=lambda _: None)`
			`submods = [modname for _, modname, _ in pkgs]`
			`for modname in submods + ['sklearn']:`
			`if ".tests." in modname:`
			`continue`
			`if IS_PYPY and ('_svmlight_format_io' in modname or`
			`'feature_extraction._hashing_fast' in modname):`
			`continue`
			`package = __import__(modname, fromlist="dummy")`
			`for name in getattr(package, '__all__', ()):`
			`assert hasattr(package, name),\`
			`"Module '{0}' has no attribute '{1}'".format(modname, name)`


			`def test_root_import_all_completeness():`
			`EXCEPTIONS = ('utils', 'tests', 'base', 'setup', 'conftest')`
			`for _, modname, _ in pkgutil.walk_packages(path=sklearn.__path__,`
			`onerror=lambda _: None):`
			`if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS:`
			`continue`
			`assert modname in sklearn.__all__`


			`def test_all_tests_are_importable():`
			`# Ensure that for each contentful subpackage, there is a test directory`
			`# within it that is also a subpackage (i.e. a directory with __init__.py)`

			`HAS_TESTS_EXCEPTIONS = re.compile(r'''(?x)`
			`\.externals(\.\|$)\|`
			`\.tests(\.\|$)\|`
			`\._`
			`''')`
			`lookup = {name: ispkg`
			`for _, name, ispkg`
			`in pkgutil.walk_packages(sklearn.__path__, prefix='sklearn.')}`
			`missing_tests = [name for name, ispkg in lookup.items()`
			`if ispkg`
			`and not HAS_TESTS_EXCEPTIONS.search(name)`
			`and name + '.tests' not in lookup]`
			assert missing_tests == [], ('{0} do not have `tests` subpackages. '
			`'Perhaps they require '`
			`'__init__.py or an add_subpackage directive '`
			`'in the parent '`
			`'setup.py'.format(missing_tests))`


			`def test_class_support_removed():`
			`# Make sure passing classes to check_estimator or parametrize_with_checks`
			`# raises an error`

			`msg = "Passing a class was deprecated.* isn't supported anymore"`
			`with pytest.raises(TypeError, match=msg):`
			`check_estimator(LogisticRegression)`

			`with pytest.raises(TypeError, match=msg):`
			`parametrize_with_checks([LogisticRegression])`


			`def _generate_search_cv_instances():`
			`for SearchCV, (Estimator, param_grid) in zip(`
			`[GridSearchCV, RandomizedSearchCV],`
			`[`
			`(Ridge, {"alpha": [0.1, 1.0]}),`
			`(LogisticRegression, {"C": [0.1, 1.0]}),`
			`],`
			`):`
			`yield SearchCV(Estimator(), param_grid)`

			`for SearchCV, (Estimator, param_grid) in zip(`
			`[GridSearchCV, RandomizedSearchCV],`
			`[`
			`(Ridge, {"ridge__alpha": [0.1, 1.0]}),`
			`(LogisticRegression, {"logisticregression__C": [0.1, 1.0]}),`
			`],`
			`):`
			`yield SearchCV(`
			`make_pipeline(PCA(), Estimator()), param_grid`
			`).set_params(error_score="raise")`


			`@parametrize_with_checks(list(_generate_search_cv_instances()))`
			`def test_search_cv(estimator, check, request):`
			`# Common tests for SearchCV instances`
			`# We have a separate test because those meta-estimators can accept a`
			`# wide range of base estimators (classifiers, regressors, pipelines)`
			`with ignore_warnings(`
			`category=(`
			`FutureWarning,`
			`ConvergenceWarning,`
			`UserWarning,`
			`FutureWarning,`
			`FitFailedWarning,`
			`)`
			`):`
			`check(estimator)`


			`# TODO: When more modules get added, we can remove it from this list to make`
			`# sure it gets tested. After we finish each module we can move the checks`
			`# into sklearn.utils.estimator_checks.check_n_features_in.`
			`#`
			`# check_estimators_partial_fit_n_features can either be removed or updated`
			`# with the two more assertions:`
			# 1. `n_features_in_` is set during the first call to `partial_fit`.
			`# 2. More strict when it comes to the error message.`
			`#`
			`# check_classifiers_train would need to be updated with the error message`
			`N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = {`
			`'calibration',`
			`'compose',`
			`'covariance',`
			`'cross_decomposition',`
			`'discriminant_analysis',`
			`'ensemble',`
			`'feature_extraction',`
			`'feature_selection',`
			`'gaussian_process',`
			`'isotonic',`
			`'linear_model',`
			`'manifold',`
			`'mixture',`
			`'model_selection',`
			`'multiclass',`
			`'multioutput',`
			`'naive_bayes',`
			`'neighbors',`
			`'pipeline',`
			`'random_projection',`
			`'semi_supervised',`
			`'svm',`
			`}`

			`N_FEATURES_IN_AFTER_FIT_ESTIMATORS = [`
			`est for est in _tested_estimators() if est.__module__.split('.')[1] not in`
			`N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE`
			`]`


			`@pytest.mark.parametrize("estimator", N_FEATURES_IN_AFTER_FIT_ESTIMATORS,`
			`ids=_get_check_estimator_ids)`
			`def test_check_n_features_in_after_fitting(estimator):`
			`_set_checking_parameters(estimator)`
			`check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)`