3RNN/Lib/site-packages/sklearn/compose/tests/test_target.py
2024-05-26 19:49:15 +02:00

396 lines
13 KiB
Python

import numpy as np
import pytest
from sklearn import datasets
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.compose import TransformedTargetRegressor
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, StandardScaler
from sklearn.utils._testing import assert_allclose, assert_no_warnings
friedman = datasets.make_friedman1(random_state=0)
def test_transform_target_regressor_error():
X, y = friedman
# provide a transformer and functions at the same time
regr = TransformedTargetRegressor(
regressor=LinearRegression(),
transformer=StandardScaler(),
func=np.exp,
inverse_func=np.log,
)
with pytest.raises(
ValueError,
match="'transformer' and functions 'func'/'inverse_func' cannot both be set.",
):
regr.fit(X, y)
# fit with sample_weight with a regressor which does not support it
sample_weight = np.ones((y.shape[0],))
regr = TransformedTargetRegressor(
regressor=OrthogonalMatchingPursuit(), transformer=StandardScaler()
)
with pytest.raises(
TypeError,
match=r"fit\(\) got an unexpected " "keyword argument 'sample_weight'",
):
regr.fit(X, y, sample_weight=sample_weight)
# one of (func, inverse_func) is given but the other one is not
regr = TransformedTargetRegressor(func=np.exp)
with pytest.raises(
ValueError,
match="When 'func' is provided, 'inverse_func' must also be provided",
):
regr.fit(X, y)
regr = TransformedTargetRegressor(inverse_func=np.log)
with pytest.raises(
ValueError,
match="When 'inverse_func' is provided, 'func' must also be provided",
):
regr.fit(X, y)
def test_transform_target_regressor_invertible():
X, y = friedman
regr = TransformedTargetRegressor(
regressor=LinearRegression(),
func=np.sqrt,
inverse_func=np.log,
check_inverse=True,
)
with pytest.warns(
UserWarning,
match=(
"The provided functions or"
" transformer are not strictly inverse of each other."
),
):
regr.fit(X, y)
regr = TransformedTargetRegressor(
regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log
)
regr.set_params(check_inverse=False)
assert_no_warnings(regr.fit, X, y)
def _check_standard_scaled(y, y_pred):
y_mean = np.mean(y, axis=0)
y_std = np.std(y, axis=0)
assert_allclose((y - y_mean) / y_std, y_pred)
def _check_shifted_by_one(y, y_pred):
assert_allclose(y + 1, y_pred)
def test_transform_target_regressor_functions():
X, y = friedman
regr = TransformedTargetRegressor(
regressor=LinearRegression(), func=np.log, inverse_func=np.exp
)
y_pred = regr.fit(X, y).predict(X)
# check the transformer output
y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
assert_allclose(np.log(y), y_tran)
assert_allclose(
y, regr.transformer_.inverse_transform(y_tran.reshape(-1, 1)).squeeze()
)
assert y.shape == y_pred.shape
assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
# check the regressor output
lr = LinearRegression().fit(X, regr.func(y))
assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
def test_transform_target_regressor_functions_multioutput():
X = friedman[0]
y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
regr = TransformedTargetRegressor(
regressor=LinearRegression(), func=np.log, inverse_func=np.exp
)
y_pred = regr.fit(X, y).predict(X)
# check the transformer output
y_tran = regr.transformer_.transform(y)
assert_allclose(np.log(y), y_tran)
assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
assert y.shape == y_pred.shape
assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
# check the regressor output
lr = LinearRegression().fit(X, regr.func(y))
assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
@pytest.mark.parametrize(
"X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
)
def test_transform_target_regressor_1d_transformer(X, y):
# All transformer in scikit-learn expect 2D data. FunctionTransformer with
# validate=False lift this constraint without checking that the input is a
# 2D vector. We check the consistency of the data shape using a 1D and 2D y
# array.
transformer = FunctionTransformer(
func=lambda x: x + 1, inverse_func=lambda x: x - 1
)
regr = TransformedTargetRegressor(
regressor=LinearRegression(), transformer=transformer
)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
y_tran = regr.transformer_.transform(y)
_check_shifted_by_one(y, y_tran)
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
assert_allclose(regr.regressor_.coef_, lr.coef_)
@pytest.mark.parametrize(
"X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
)
def test_transform_target_regressor_2d_transformer(X, y):
# Check consistency with transformer accepting only 2D array and a 1D/2D y
# array.
transformer = StandardScaler()
regr = TransformedTargetRegressor(
regressor=LinearRegression(), transformer=transformer
)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
if y.ndim == 1: # create a 2D array and squeeze results
y_tran = regr.transformer_.transform(y.reshape(-1, 1))
else:
y_tran = regr.transformer_.transform(y)
_check_standard_scaled(y, y_tran.squeeze())
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
if y.ndim == 1: # create a 2D array and squeeze results
lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
y_lr_pred = lr.predict(X).reshape(-1, 1)
y_pred2 = transformer2.inverse_transform(y_lr_pred).squeeze()
else:
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
y_pred2 = transformer2.inverse_transform(y_lr_pred)
assert_allclose(y_pred, y_pred2)
assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_transform_target_regressor_2d_transformer_multioutput():
# Check consistency with transformer accepting only 2D array and a 2D y
# array.
X = friedman[0]
y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
transformer = StandardScaler()
regr = TransformedTargetRegressor(
regressor=LinearRegression(), transformer=transformer
)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
y_tran = regr.transformer_.transform(y)
_check_standard_scaled(y, y_tran)
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_transform_target_regressor_3d_target():
# Non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/18866
# Check with a 3D target with a transformer that reshapes the target
X = friedman[0]
y = np.tile(friedman[1].reshape(-1, 1, 1), [1, 3, 2])
def flatten_data(data):
return data.reshape(data.shape[0], -1)
def unflatten_data(data):
return data.reshape(data.shape[0], -1, 2)
transformer = FunctionTransformer(func=flatten_data, inverse_func=unflatten_data)
regr = TransformedTargetRegressor(
regressor=LinearRegression(), transformer=transformer
)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
def test_transform_target_regressor_multi_to_single():
X = friedman[0]
y = np.transpose([friedman[1], (friedman[1] ** 2 + 1)])
def func(y):
out = np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
return out[:, np.newaxis]
def inverse_func(y):
return y
tt = TransformedTargetRegressor(
func=func, inverse_func=inverse_func, check_inverse=False
)
tt.fit(X, y)
y_pred_2d_func = tt.predict(X)
assert y_pred_2d_func.shape == (100, 1)
# force that the function only return a 1D array
def func(y):
return np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
tt = TransformedTargetRegressor(
func=func, inverse_func=inverse_func, check_inverse=False
)
tt.fit(X, y)
y_pred_1d_func = tt.predict(X)
assert y_pred_1d_func.shape == (100, 1)
assert_allclose(y_pred_1d_func, y_pred_2d_func)
class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator):
def fit(self, X, y=None):
assert isinstance(X, np.ndarray)
return self
def transform(self, X):
assert isinstance(X, np.ndarray)
return X
def inverse_transform(self, X):
assert isinstance(X, np.ndarray)
return X
class DummyCheckerListRegressor(DummyRegressor):
def fit(self, X, y, sample_weight=None):
assert isinstance(X, list)
return super().fit(X, y, sample_weight)
def predict(self, X):
assert isinstance(X, list)
return super().predict(X)
def test_transform_target_regressor_ensure_y_array():
# check that the target ``y`` passed to the transformer will always be a
# numpy array. Similarly, if ``X`` is passed as a list, we check that the
# predictor receive as it is.
X, y = friedman
tt = TransformedTargetRegressor(
transformer=DummyCheckerArrayTransformer(),
regressor=DummyCheckerListRegressor(),
check_inverse=False,
)
tt.fit(X.tolist(), y.tolist())
tt.predict(X.tolist())
with pytest.raises(AssertionError):
tt.fit(X, y.tolist())
with pytest.raises(AssertionError):
tt.predict(X)
class DummyTransformer(TransformerMixin, BaseEstimator):
"""Dummy transformer which count how many time fit was called."""
def __init__(self, fit_counter=0):
self.fit_counter = fit_counter
def fit(self, X, y=None):
self.fit_counter += 1
return self
def transform(self, X):
return X
def inverse_transform(self, X):
return X
@pytest.mark.parametrize("check_inverse", [False, True])
def test_transform_target_regressor_count_fit(check_inverse):
# regression test for gh-issue #11618
# check that we only call a single time fit for the transformer
X, y = friedman
ttr = TransformedTargetRegressor(
transformer=DummyTransformer(), check_inverse=check_inverse
)
ttr.fit(X, y)
assert ttr.transformer_.fit_counter == 1
class DummyRegressorWithExtraFitParams(DummyRegressor):
def fit(self, X, y, sample_weight=None, check_input=True):
# on the test below we force this to false, we make sure this is
# actually passed to the regressor
assert not check_input
return super().fit(X, y, sample_weight)
def test_transform_target_regressor_pass_fit_parameters():
X, y = friedman
regr = TransformedTargetRegressor(
regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
)
regr.fit(X, y, check_input=False)
assert regr.transformer_.fit_counter == 1
def test_transform_target_regressor_route_pipeline():
X, y = friedman
regr = TransformedTargetRegressor(
regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
)
estimators = [("normalize", StandardScaler()), ("est", regr)]
pip = Pipeline(estimators)
pip.fit(X, y, **{"est__check_input": False})
assert regr.transformer_.fit_counter == 1
class DummyRegressorWithExtraPredictParams(DummyRegressor):
def predict(self, X, check_input=True):
# In the test below we make sure that the check input parameter is
# passed as false
self.predict_called = True
assert not check_input
return super().predict(X)
def test_transform_target_regressor_pass_extra_predict_parameters():
# Checks that predict kwargs are passed to regressor.
X, y = friedman
regr = TransformedTargetRegressor(
regressor=DummyRegressorWithExtraPredictParams(), transformer=DummyTransformer()
)
regr.fit(X, y)
regr.predict(X, check_input=False)
assert regr.regressor_.predict_called