projektAI/venv/Lib/site-packages/sklearn/feature_selection/tests/test_sequential.py

import pytest
import scipy
import numpy as np
from numpy.testing import assert_array_equal

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingRegressor


@pytest.mark.parametrize('n_features_to_select', (0, 5, 0., -1, 1.1))
def test_bad_n_features_to_select(n_features_to_select):
    X, y = make_regression(n_features=5)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select)
    with pytest.raises(ValueError, match="must be either None"):
        sfs.fit(X, y)


def test_bad_direction():
    X, y = make_regression(n_features=5)
    sfs = SequentialFeatureSelector(LinearRegression(), direction='bad')
    with pytest.raises(ValueError, match="must be either 'forward' or"):
        sfs.fit(X, y)


@pytest.mark.parametrize('direction', ('forward', 'backward'))
@pytest.mark.parametrize('n_features_to_select', (1, 5, 9, None))
def test_n_features_to_select(direction, n_features_to_select):
    # Make sure n_features_to_select is respected

    X, y = make_regression(n_features=10)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select,
                                    direction=direction, cv=2)
    sfs.fit(X, y)
    if n_features_to_select is None:
        n_features_to_select = 5  # n_features // 2
    assert sfs.get_support(indices=True).shape[0] == n_features_to_select
    assert sfs.n_features_to_select_ == n_features_to_select
    assert sfs.transform(X).shape[1] == n_features_to_select


@pytest.mark.parametrize('direction', ('forward', 'backward'))
@pytest.mark.parametrize('n_features_to_select, expected', (
    (.1, 1),
    (1., 10),
    (.5, 5),
    (None, 5),  # just to make sure .5 is equivalent to passing None
))
def test_n_features_to_select_float(direction, n_features_to_select, expected):
    # Test passing a float as n_features_to_select
    X, y = make_regression(n_features=10)
    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select,
                                    direction=direction, cv=2)
    sfs.fit(X, y)
    assert sfs.n_features_to_select_ == expected


@pytest.mark.parametrize('seed', range(10))
@pytest.mark.parametrize('direction', ('forward', 'backward'))
@pytest.mark.parametrize('n_features_to_select, expected_selected_features', [
    (2, [0, 2]),  # f1 is dropped since it has no predictive power
    (1, [2]),  # f2 is more predictive than f0 so it's kept
])
def test_sanity(seed, direction, n_features_to_select,
                expected_selected_features):
    # Basic sanity check: 3 features, only f0 and f2 are correlated with the
    # target, f2 having a stronger correlation than f0. We expect f1 to be
    # dropped, and f2 to always be selected.

    rng = np.random.RandomState(seed)
    n_samples = 100
    X = rng.randn(n_samples, 3)
    y = 3 * X[:, 0] - 10 * X[:, 2]

    sfs = SequentialFeatureSelector(LinearRegression(),
                                    n_features_to_select=n_features_to_select,
                                    direction=direction, cv=2)
    sfs.fit(X, y)
    assert_array_equal(sfs.get_support(indices=True),
                       expected_selected_features)


def test_sparse_support():
    # Make sure sparse data is supported

    X, y = make_regression(n_features=10)
    X = scipy.sparse.csr_matrix(X)
    sfs = SequentialFeatureSelector(LinearRegression(), cv=2)
    sfs.fit(X, y)
    sfs.transform(X)


def test_nan_support():
    # Make sure nans are OK if the underlying estimator supports nans

    rng = np.random.RandomState(0)
    n_samples, n_features = 100, 10
    X, y = make_regression(n_samples, n_features, random_state=0)
    nan_mask = rng.randint(0, 2, size=(n_samples, n_features), dtype=bool)
    X[nan_mask] = np.nan
    sfs = SequentialFeatureSelector(HistGradientBoostingRegressor(), cv=2)
    sfs.fit(X, y)
    sfs.transform(X)

    with pytest.raises(ValueError, match='Input contains NaN'):
        # LinearRegression does not support nans
        SequentialFeatureSelector(LinearRegression(), cv=2).fit(X, y)


def test_pipeline_support():
    # Make sure that pipelines can be passed into SFS and that SFS can be
    # passed into a pipeline

    n_samples, n_features = 50, 3
    X, y = make_regression(n_samples, n_features, random_state=0)

    # pipeline in SFS
    pipe = make_pipeline(StandardScaler(), LinearRegression())
    sfs = SequentialFeatureSelector(pipe, cv=2)
    sfs.fit(X, y)
    sfs.transform(X)

    # SFS in pipeline
    sfs = SequentialFeatureSelector(LinearRegression(), cv=2)
    pipe = make_pipeline(StandardScaler(), sfs)
    pipe.fit(X, y)
    pipe.transform(X)
Działa 2021-06-06 22:13:05 +02:00			`import pytest`
			`import scipy`
			`import numpy as np`
			`from numpy.testing import assert_array_equal`

			`from sklearn.preprocessing import StandardScaler`
			`from sklearn.pipeline import make_pipeline`
			`from sklearn.feature_selection import SequentialFeatureSelector`
			`from sklearn.datasets import make_regression`
			`from sklearn.linear_model import LinearRegression`
			`from sklearn.experimental import enable_hist_gradient_boosting # noqa`
			`from sklearn.ensemble import HistGradientBoostingRegressor`


			`@pytest.mark.parametrize('n_features_to_select', (0, 5, 0., -1, 1.1))`
			`def test_bad_n_features_to_select(n_features_to_select):`
			`X, y = make_regression(n_features=5)`
			`sfs = SequentialFeatureSelector(LinearRegression(),`
			`n_features_to_select=n_features_to_select)`
			`with pytest.raises(ValueError, match="must be either None"):`
			`sfs.fit(X, y)`


			`def test_bad_direction():`
			`X, y = make_regression(n_features=5)`
			`sfs = SequentialFeatureSelector(LinearRegression(), direction='bad')`
			`with pytest.raises(ValueError, match="must be either 'forward' or"):`
			`sfs.fit(X, y)`


			`@pytest.mark.parametrize('direction', ('forward', 'backward'))`
			`@pytest.mark.parametrize('n_features_to_select', (1, 5, 9, None))`
			`def test_n_features_to_select(direction, n_features_to_select):`
			`# Make sure n_features_to_select is respected`

			`X, y = make_regression(n_features=10)`
			`sfs = SequentialFeatureSelector(LinearRegression(),`
			`n_features_to_select=n_features_to_select,`
			`direction=direction, cv=2)`
			`sfs.fit(X, y)`
			`if n_features_to_select is None:`
			`n_features_to_select = 5 # n_features // 2`
			`assert sfs.get_support(indices=True).shape[0] == n_features_to_select`
			`assert sfs.n_features_to_select_ == n_features_to_select`
			`assert sfs.transform(X).shape[1] == n_features_to_select`


			`@pytest.mark.parametrize('direction', ('forward', 'backward'))`
			`@pytest.mark.parametrize('n_features_to_select, expected', (`
			`(.1, 1),`
			`(1., 10),`
			`(.5, 5),`
			`(None, 5), # just to make sure .5 is equivalent to passing None`
			`))`
			`def test_n_features_to_select_float(direction, n_features_to_select, expected):`
			`# Test passing a float as n_features_to_select`
			`X, y = make_regression(n_features=10)`
			`sfs = SequentialFeatureSelector(LinearRegression(),`
			`n_features_to_select=n_features_to_select,`
			`direction=direction, cv=2)`
			`sfs.fit(X, y)`
			`assert sfs.n_features_to_select_ == expected`


			`@pytest.mark.parametrize('seed', range(10))`
			`@pytest.mark.parametrize('direction', ('forward', 'backward'))`
			`@pytest.mark.parametrize('n_features_to_select, expected_selected_features', [`
			`(2, [0, 2]), # f1 is dropped since it has no predictive power`
			`(1, [2]), # f2 is more predictive than f0 so it's kept`
			`])`
			`def test_sanity(seed, direction, n_features_to_select,`
			`expected_selected_features):`
			`# Basic sanity check: 3 features, only f0 and f2 are correlated with the`
			`# target, f2 having a stronger correlation than f0. We expect f1 to be`
			`# dropped, and f2 to always be selected.`

			`rng = np.random.RandomState(seed)`
			`n_samples = 100`
			`X = rng.randn(n_samples, 3)`
			`y = 3 * X[:, 0] - 10 * X[:, 2]`

			`sfs = SequentialFeatureSelector(LinearRegression(),`
			`n_features_to_select=n_features_to_select,`
			`direction=direction, cv=2)`
			`sfs.fit(X, y)`
			`assert_array_equal(sfs.get_support(indices=True),`
			`expected_selected_features)`


			`def test_sparse_support():`
			`# Make sure sparse data is supported`

			`X, y = make_regression(n_features=10)`
			`X = scipy.sparse.csr_matrix(X)`
			`sfs = SequentialFeatureSelector(LinearRegression(), cv=2)`
			`sfs.fit(X, y)`
			`sfs.transform(X)`


			`def test_nan_support():`
			`# Make sure nans are OK if the underlying estimator supports nans`

			`rng = np.random.RandomState(0)`
			`n_samples, n_features = 100, 10`
			`X, y = make_regression(n_samples, n_features, random_state=0)`
			`nan_mask = rng.randint(0, 2, size=(n_samples, n_features), dtype=bool)`
			`X[nan_mask] = np.nan`
			`sfs = SequentialFeatureSelector(HistGradientBoostingRegressor(), cv=2)`
			`sfs.fit(X, y)`
			`sfs.transform(X)`

			`with pytest.raises(ValueError, match='Input contains NaN'):`
			`# LinearRegression does not support nans`
			`SequentialFeatureSelector(LinearRegression(), cv=2).fit(X, y)`


			`def test_pipeline_support():`
			`# Make sure that pipelines can be passed into SFS and that SFS can be`
			`# passed into a pipeline`

			`n_samples, n_features = 50, 3`
			`X, y = make_regression(n_samples, n_features, random_state=0)`

			`# pipeline in SFS`
			`pipe = make_pipeline(StandardScaler(), LinearRegression())`
			`sfs = SequentialFeatureSelector(pipe, cv=2)`
			`sfs.fit(X, y)`
			`sfs.transform(X)`

			`# SFS in pipeline`
			`sfs = SequentialFeatureSelector(LinearRegression(), cv=2)`
			`pipe = make_pipeline(StandardScaler(), sfs)`
			`pipe.fit(X, y)`
			`pipe.transform(X)`