# Authors:
#
#          Giorgio Patrini
#
# License: BSD 3 clause

import warnings
import itertools

import re
import numpy as np
import numpy.linalg as la
from scipy import sparse, stats

import pytest

from sklearn.utils import gen_batches

from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_array_less
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_allclose_dense_sparse
from sklearn.utils._testing import skip_if_32bit
from sklearn.utils._testing import _convert_container

from sklearn.utils.sparsefuncs import mean_variance_axis
from sklearn.preprocessing import Binarizer
from sklearn.preprocessing import KernelCenterer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import quantile_transform
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import maxabs_scale
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import robust_scale
from sklearn.preprocessing import add_dummy_feature
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import power_transform
from sklearn.preprocessing._data import _handle_zeros_in_scale
from sklearn.preprocessing._data import BOUNDS_THRESHOLD
from sklearn.metrics.pairwise import linear_kernel

from sklearn.exceptions import NotFittedError

from sklearn.base import clone
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.svm import SVR
from sklearn.utils import shuffle

from sklearn import datasets


iris = datasets.load_iris()

# Make some data to be used many times
rng = np.random.RandomState(0)
n_features = 30
n_samples = 1000
offsets = rng.uniform(-1, 1, size=n_features)
scales = rng.uniform(1, 10, size=n_features)
X_2d = rng.randn(n_samples, n_features) * scales + offsets
X_1row = X_2d[0, :].reshape(1, n_features)
X_1col = X_2d[:, 0].reshape(n_samples, 1)
X_list_1row = X_1row.tolist()
X_list_1col = X_1col.tolist()


def toarray(a):
    if hasattr(a, "toarray"):
        a = a.toarray()
    return a


def _check_dim_1axis(a):
    return np.asarray(a).shape[0]


def assert_correct_incr(i, batch_start, batch_stop, n, chunk_size, n_samples_seen):
    if batch_stop != n:
        assert (i + 1) * chunk_size == n_samples_seen
    else:
        assert i * chunk_size + (batch_stop - batch_start) == n_samples_seen


def test_raises_value_error_if_sample_weights_greater_than_1d():
    # Sample weights must be either scalar or 1D

    n_sampless = [2, 3]
    n_featuress = [3, 2]

    for n_samples, n_features in zip(n_sampless, n_featuress):

        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)

        scaler = StandardScaler()

        # make sure Error is raised the sample weights greater than 1d
        sample_weight_notOK = rng.randn(n_samples, 1) ** 2
        with pytest.raises(ValueError):
            scaler.fit(X, y, sample_weight=sample_weight_notOK)


@pytest.mark.parametrize(
    ["Xw", "X", "sample_weight"],
    [
        ([[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [1, 2, 3], [4, 5, 6]], [2.0, 1.0]),
        (
            [[1, 0, 1], [0, 0, 1]],
            [[1, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]],
            np.array([1, 3]),
        ),
        (
            [[1, np.nan, 1], [np.nan, np.nan, 1]],
            [
                [1, np.nan, 1],
                [np.nan, np.nan, 1],
                [np.nan, np.nan, 1],
                [np.nan, np.nan, 1],
            ],
            np.array([1, 3]),
        ),
    ],
)
@pytest.mark.parametrize("array_constructor", ["array", "sparse_csr", "sparse_csc"])
def test_standard_scaler_sample_weight(Xw, X, sample_weight, array_constructor):
    with_mean = not array_constructor.startswith("sparse")
    X = _convert_container(X, array_constructor)
    Xw = _convert_container(Xw, array_constructor)

    # weighted StandardScaler
    yw = np.ones(Xw.shape[0])
    scaler_w = StandardScaler(with_mean=with_mean)
    scaler_w.fit(Xw, yw, sample_weight=sample_weight)

    # unweighted, but with repeated samples
    y = np.ones(X.shape[0])
    scaler = StandardScaler(with_mean=with_mean)
    scaler.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]

    assert_almost_equal(scaler.mean_, scaler_w.mean_)
    assert_almost_equal(scaler.var_, scaler_w.var_)
    assert_almost_equal(scaler.transform(X_test), scaler_w.transform(X_test))


def test_standard_scaler_1d():
    # Test scaling of dataset along single axis
    for X in [X_1row, X_1col, X_list_1row, X_list_1row]:
        scaler = StandardScaler()
        X_scaled = scaler.fit(X).transform(X, copy=True)

        if isinstance(X, list):
            X = np.array(X)  # cast only after scaling done

        if _check_dim_1axis(X) == 1:
            assert_almost_equal(scaler.mean_, X.ravel())
            assert_almost_equal(scaler.scale_, np.ones(n_features))
            assert_array_almost_equal(X_scaled.mean(axis=0), np.zeros_like(n_features))
            assert_array_almost_equal(X_scaled.std(axis=0), np.zeros_like(n_features))
        else:
            assert_almost_equal(scaler.mean_, X.mean())
            assert_almost_equal(scaler.scale_, X.std())
            assert_array_almost_equal(X_scaled.mean(axis=0), np.zeros_like(n_features))
            assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
            assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
        assert scaler.n_samples_seen_ == X.shape[0]

        # check inverse transform
        X_scaled_back = scaler.inverse_transform(X_scaled)
        assert_array_almost_equal(X_scaled_back, X)

    # Constant feature
    X = np.ones((5, 1))
    scaler = StandardScaler()
    X_scaled = scaler.fit(X).transform(X, copy=True)
    assert_almost_equal(scaler.mean_, 1.0)
    assert_almost_equal(scaler.scale_, 1.0)
    assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
    assert_array_almost_equal(X_scaled.std(axis=0), 0.0)
    assert scaler.n_samples_seen_ == X.shape[0]


@pytest.mark.parametrize(
    "sparse_constructor", [None, sparse.csc_matrix, sparse.csr_matrix]
)
@pytest.mark.parametrize("add_sample_weight", [False, True])
def test_standard_scaler_dtype(add_sample_weight, sparse_constructor):
    # Ensure scaling does not affect dtype
    rng = np.random.RandomState(0)
    n_samples = 10
    n_features = 3
    if add_sample_weight:
        sample_weight = np.ones(n_samples)
    else:
        sample_weight = None
    with_mean = True
    for dtype in [np.float16, np.float32, np.float64]:
        X = rng.randn(n_samples, n_features).astype(dtype)
        if sparse_constructor is not None:
            X = sparse_constructor(X)
            with_mean = False

        scaler = StandardScaler(with_mean=with_mean)
        X_scaled = scaler.fit(X, sample_weight=sample_weight).transform(X)
        assert X.dtype == X_scaled.dtype
        assert scaler.mean_.dtype == np.float64
        assert scaler.scale_.dtype == np.float64


@pytest.mark.parametrize(
    "scaler",
    [
        StandardScaler(with_mean=False),
        RobustScaler(with_centering=False),
    ],
)
@pytest.mark.parametrize(
    "sparse_constructor", [np.asarray, sparse.csc_matrix, sparse.csr_matrix]
)
@pytest.mark.parametrize("add_sample_weight", [False, True])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("constant", [0, 1.0, 100.0])
def test_standard_scaler_constant_features(
    scaler, add_sample_weight, sparse_constructor, dtype, constant
):

    if isinstance(scaler, RobustScaler) and add_sample_weight:
        pytest.skip(f"{scaler.__class__.__name__} does not yet support sample_weight")

    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 1
    if add_sample_weight:
        fit_params = dict(sample_weight=rng.uniform(size=n_samples) * 2)
    else:
        fit_params = {}
    X_array = np.full(shape=(n_samples, n_features), fill_value=constant, dtype=dtype)
    X = sparse_constructor(X_array)
    X_scaled = scaler.fit(X, **fit_params).transform(X)

    if isinstance(scaler, StandardScaler):
        # The variance info should be close to zero for constant features.
        assert_allclose(scaler.var_, np.zeros(X.shape[1]), atol=1e-7)

    # Constant features should not be scaled (scale of 1.):
    assert_allclose(scaler.scale_, np.ones(X.shape[1]))

    if hasattr(X_scaled, "toarray"):
        assert_allclose(X_scaled.toarray(), X_array)
    else:
        assert_allclose(X_scaled, X)

    if isinstance(scaler, StandardScaler) and not add_sample_weight:
        # Also check consistency with the standard scale function.
        X_scaled_2 = scale(X, with_mean=scaler.with_mean)
        if hasattr(X_scaled_2, "toarray"):
            assert_allclose(X_scaled_2.toarray(), X_scaled_2.toarray())
        else:
            assert_allclose(X_scaled_2, X_scaled_2)


@pytest.mark.parametrize("n_samples", [10, 100, 10_000])
@pytest.mark.parametrize("average", [1e-10, 1, 1e10])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize(
    "array_constructor", [np.asarray, sparse.csc_matrix, sparse.csr_matrix]
)
def test_standard_scaler_near_constant_features(
    n_samples, array_constructor, average, dtype
):
    # Check that when the variance is too small (var << mean**2) the feature
    # is considered constant and not scaled.

    scale_min, scale_max = -30, 19
    scales = np.array([10**i for i in range(scale_min, scale_max + 1)], dtype=dtype)

    n_features = scales.shape[0]
    X = np.empty((n_samples, n_features), dtype=dtype)
    # Make a dataset of known var = scales**2 and mean = average
    X[: n_samples // 2, :] = average + scales
    X[n_samples // 2 :, :] = average - scales
    X_array = array_constructor(X)

    scaler = StandardScaler(with_mean=False).fit(X_array)

    # StandardScaler uses float64 accumulators even if the data has a float32
    # dtype.
    eps = np.finfo(np.float64).eps

    # if var < bound = N.eps.var + N².eps².mean², the feature is considered
    # constant and the scale_ attribute is set to 1.
    bounds = n_samples * eps * scales**2 + n_samples**2 * eps**2 * average**2
    within_bounds = scales**2 <= bounds

    # Check that scale_min is small enough to have some scales below the
    # bound and therefore detected as constant:
    assert np.any(within_bounds)

    # Check that such features are actually treated as constant by the scaler:
    assert all(scaler.var_[within_bounds] <= bounds[within_bounds])
    assert_allclose(scaler.scale_[within_bounds], 1.0)

    # Depending the on the dtype of X, some features might not actually be
    # representable as non constant for small scales (even if above the
    # precision bound of the float64 variance estimate). Such feature should
    # be correctly detected as constants with 0 variance by StandardScaler.
    representable_diff = X[0, :] - X[-1, :] != 0
    assert_allclose(scaler.var_[np.logical_not(representable_diff)], 0)
    assert_allclose(scaler.scale_[np.logical_not(representable_diff)], 1)

    # The other features are scaled and scale_ is equal to sqrt(var_) assuming
    # that scales are large enough for average + scale and average - scale to
    # be distinct in X (depending on X's dtype).
    common_mask = np.logical_and(scales**2 > bounds, representable_diff)
    assert_allclose(scaler.scale_[common_mask], np.sqrt(scaler.var_)[common_mask])


def test_scale_1d():
    # 1-d inputs
    X_list = [1.0, 3.0, 5.0, 0.0]
    X_arr = np.array(X_list)

    for X in [X_list, X_arr]:
        X_scaled = scale(X)
        assert_array_almost_equal(X_scaled.mean(), 0.0)
        assert_array_almost_equal(X_scaled.std(), 1.0)
        assert_array_equal(scale(X, with_mean=False, with_std=False), X)


@skip_if_32bit
def test_standard_scaler_numerical_stability():
    # Test numerical stability of scaling
    # np.log(1e-5) is taken because of its floating point representation
    # was empirically found to cause numerical problems with np.mean & np.std.
    x = np.full(8, np.log(1e-5), dtype=np.float64)
    # This does not raise a warning as the number of samples is too low
    # to trigger the problem in recent numpy
    with warnings.catch_warnings():
        warnings.simplefilter("error", UserWarning)
        scale(x)
    assert_array_almost_equal(scale(x), np.zeros(8))

    # with 2 more samples, the std computation run into numerical issues:
    x = np.full(10, np.log(1e-5), dtype=np.float64)
    warning_message = "standard deviation of the data is probably very close to 0"
    with pytest.warns(UserWarning, match=warning_message):
        x_scaled = scale(x)
    assert_array_almost_equal(x_scaled, np.zeros(10))

    x = np.full(10, 1e-100, dtype=np.float64)
    with warnings.catch_warnings():
        warnings.simplefilter("error", UserWarning)
        x_small_scaled = scale(x)
    assert_array_almost_equal(x_small_scaled, np.zeros(10))

    # Large values can cause (often recoverable) numerical stability issues:
    x_big = np.full(10, 1e100, dtype=np.float64)
    warning_message = "Dataset may contain too large values"
    with pytest.warns(UserWarning, match=warning_message):
        x_big_scaled = scale(x_big)
    assert_array_almost_equal(x_big_scaled, np.zeros(10))
    assert_array_almost_equal(x_big_scaled, x_small_scaled)
    with pytest.warns(UserWarning, match=warning_message):
        x_big_centered = scale(x_big, with_std=False)
    assert_array_almost_equal(x_big_centered, np.zeros(10))
    assert_array_almost_equal(x_big_centered, x_small_scaled)


def test_scaler_2d_arrays():
    # Test scaling of 2d array along first axis
    rng = np.random.RandomState(0)
    n_features = 5
    n_samples = 4
    X = rng.randn(n_samples, n_features)
    X[:, 0] = 0.0  # first feature is always of zero

    scaler = StandardScaler()
    X_scaled = scaler.fit(X).transform(X, copy=True)
    assert not np.any(np.isnan(X_scaled))
    assert scaler.n_samples_seen_ == n_samples

    assert_array_almost_equal(X_scaled.mean(axis=0), n_features * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0), [0.0, 1.0, 1.0, 1.0, 1.0])
    # Check that X has been copied
    assert X_scaled is not X

    # check inverse transform
    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert X_scaled_back is not X
    assert X_scaled_back is not X_scaled
    assert_array_almost_equal(X_scaled_back, X)

    X_scaled = scale(X, axis=1, with_std=False)
    assert not np.any(np.isnan(X_scaled))
    assert_array_almost_equal(X_scaled.mean(axis=1), n_samples * [0.0])
    X_scaled = scale(X, axis=1, with_std=True)
    assert not np.any(np.isnan(X_scaled))
    assert_array_almost_equal(X_scaled.mean(axis=1), n_samples * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=1), n_samples * [1.0])
    # Check that the data hasn't been modified
    assert X_scaled is not X

    X_scaled = scaler.fit(X).transform(X, copy=False)
    assert not np.any(np.isnan(X_scaled))
    assert_array_almost_equal(X_scaled.mean(axis=0), n_features * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0), [0.0, 1.0, 1.0, 1.0, 1.0])
    # Check that X has not been copied
    assert X_scaled is X

    X = rng.randn(4, 5)
    X[:, 0] = 1.0  # first feature is a constant, non zero feature
    scaler = StandardScaler()
    X_scaled = scaler.fit(X).transform(X, copy=True)
    assert not np.any(np.isnan(X_scaled))
    assert_array_almost_equal(X_scaled.mean(axis=0), n_features * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0), [0.0, 1.0, 1.0, 1.0, 1.0])
    # Check that X has not been copied
    assert X_scaled is not X


def test_scaler_float16_overflow():
    # Test if the scaler will not overflow on float16 numpy arrays
    rng = np.random.RandomState(0)
    # float16 has a maximum of 65500.0. On the worst case 5 * 200000 is 100000
    # which is enough to overflow the data type
    X = rng.uniform(5, 10, [200000, 1]).astype(np.float16)

    with np.errstate(over="raise"):
        scaler = StandardScaler().fit(X)
        X_scaled = scaler.transform(X)

    # Calculate the float64 equivalent to verify result
    X_scaled_f64 = StandardScaler().fit_transform(X.astype(np.float64))

    # Overflow calculations may cause -inf, inf, or nan. Since there is no nan
    # input, all of the outputs should be finite. This may be redundant since a
    # FloatingPointError exception will be thrown on overflow above.
    assert np.all(np.isfinite(X_scaled))

    # The normal distribution is very unlikely to go above 4. At 4.0-8.0 the
    # float16 precision is 2^-8 which is around 0.004. Thus only 2 decimals are
    # checked to account for precision differences.
    assert_array_almost_equal(X_scaled, X_scaled_f64, decimal=2)


def test_handle_zeros_in_scale():
    s1 = np.array([0, 1e-16, 1, 2, 3])
    s2 = _handle_zeros_in_scale(s1, copy=True)

    assert_allclose(s1, np.array([0, 1e-16, 1, 2, 3]))
    assert_allclose(s2, np.array([1, 1, 1, 2, 3]))


def test_minmax_scaler_partial_fit():
    # Test if partial_fit run over many batches of size 1 and 50
    # gives the same results as fit
    X = X_2d
    n = X.shape[0]

    for chunk_size in [1, 2, 50, n, n + 42]:
        # Test mean at the end of the process
        scaler_batch = MinMaxScaler().fit(X)

        scaler_incr = MinMaxScaler()
        for batch in gen_batches(n_samples, chunk_size):
            scaler_incr = scaler_incr.partial_fit(X[batch])

        assert_array_almost_equal(scaler_batch.data_min_, scaler_incr.data_min_)
        assert_array_almost_equal(scaler_batch.data_max_, scaler_incr.data_max_)
        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
        assert_array_almost_equal(scaler_batch.data_range_, scaler_incr.data_range_)
        assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
        assert_array_almost_equal(scaler_batch.min_, scaler_incr.min_)

        # Test std after 1 step
        batch0 = slice(0, chunk_size)
        scaler_batch = MinMaxScaler().fit(X[batch0])
        scaler_incr = MinMaxScaler().partial_fit(X[batch0])

        assert_array_almost_equal(scaler_batch.data_min_, scaler_incr.data_min_)
        assert_array_almost_equal(scaler_batch.data_max_, scaler_incr.data_max_)
        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
        assert_array_almost_equal(scaler_batch.data_range_, scaler_incr.data_range_)
        assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
        assert_array_almost_equal(scaler_batch.min_, scaler_incr.min_)

        # Test std until the end of partial fits, and
        scaler_batch = MinMaxScaler().fit(X)
        scaler_incr = MinMaxScaler()  # Clean estimator
        for i, batch in enumerate(gen_batches(n_samples, chunk_size)):
            scaler_incr = scaler_incr.partial_fit(X[batch])
            assert_correct_incr(
                i,
                batch_start=batch.start,
                batch_stop=batch.stop,
                n=n,
                chunk_size=chunk_size,
                n_samples_seen=scaler_incr.n_samples_seen_,
            )


def test_standard_scaler_partial_fit():
    # Test if partial_fit run over many batches of size 1 and 50
    # gives the same results as fit
    X = X_2d
    n = X.shape[0]

    for chunk_size in [1, 2, 50, n, n + 42]:
        # Test mean at the end of the process
        scaler_batch = StandardScaler(with_std=False).fit(X)

        scaler_incr = StandardScaler(with_std=False)
        for batch in gen_batches(n_samples, chunk_size):
            scaler_incr = scaler_incr.partial_fit(X[batch])
        assert_array_almost_equal(scaler_batch.mean_, scaler_incr.mean_)
        assert scaler_batch.var_ == scaler_incr.var_  # Nones
        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_

        # Test std after 1 step
        batch0 = slice(0, chunk_size)
        scaler_incr = StandardScaler().partial_fit(X[batch0])
        if chunk_size == 1:
            assert_array_almost_equal(
                np.zeros(n_features, dtype=np.float64), scaler_incr.var_
            )
            assert_array_almost_equal(
                np.ones(n_features, dtype=np.float64), scaler_incr.scale_
            )
        else:
            assert_array_almost_equal(np.var(X[batch0], axis=0), scaler_incr.var_)
            assert_array_almost_equal(
                np.std(X[batch0], axis=0), scaler_incr.scale_
            )  # no constants

        # Test std until the end of partial fits, and
        scaler_batch = StandardScaler().fit(X)
        scaler_incr = StandardScaler()  # Clean estimator
        for i, batch in enumerate(gen_batches(n_samples, chunk_size)):
            scaler_incr = scaler_incr.partial_fit(X[batch])
            assert_correct_incr(
                i,
                batch_start=batch.start,
                batch_stop=batch.stop,
                n=n,
                chunk_size=chunk_size,
                n_samples_seen=scaler_incr.n_samples_seen_,
            )

        assert_array_almost_equal(scaler_batch.var_, scaler_incr.var_)
        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_


def test_standard_scaler_partial_fit_numerical_stability():
    # Test if the incremental computation introduces significative errors
    # for large datasets with values of large magniture
    rng = np.random.RandomState(0)
    n_features = 2
    n_samples = 100
    offsets = rng.uniform(-1e15, 1e15, size=n_features)
    scales = rng.uniform(1e3, 1e6, size=n_features)
    X = rng.randn(n_samples, n_features) * scales + offsets

    scaler_batch = StandardScaler().fit(X)
    scaler_incr = StandardScaler()
    for chunk in X:
        scaler_incr = scaler_incr.partial_fit(chunk.reshape(1, n_features))

    # Regardless of abs values, they must not be more diff 6 significant digits
    tol = 10 ** (-6)
    assert_allclose(scaler_incr.mean_, scaler_batch.mean_, rtol=tol)
    assert_allclose(scaler_incr.var_, scaler_batch.var_, rtol=tol)
    assert_allclose(scaler_incr.scale_, scaler_batch.scale_, rtol=tol)
    # NOTE Be aware that for much larger offsets std is very unstable (last
    # assert) while mean is OK.

    # Sparse input
    size = (100, 3)
    scale = 1e20
    X = rng.randint(0, 2, size).astype(np.float64) * scale
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    for X in [X_csr, X_csc]:
        # with_mean=False is required with sparse input
        scaler = StandardScaler(with_mean=False).fit(X)
        scaler_incr = StandardScaler(with_mean=False)

        for chunk in X:
            # chunk = sparse.csr_matrix(data_chunks)
            scaler_incr = scaler_incr.partial_fit(chunk)

        # Regardless of magnitude, they must not differ more than of 6 digits
        tol = 10 ** (-6)
        assert scaler.mean_ is not None
        assert_allclose(scaler_incr.var_, scaler.var_, rtol=tol)
        assert_allclose(scaler_incr.scale_, scaler.scale_, rtol=tol)


@pytest.mark.parametrize("sample_weight", [True, None])
def test_partial_fit_sparse_input(sample_weight):
    # Check that sparsity is not destroyed
    X = np.array([[1.0], [0.0], [0.0], [5.0]])
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    if sample_weight:
        sample_weight = rng.rand(X_csc.shape[0])

    null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
    for X in [X_csr, X_csc]:

        X_null = null_transform.partial_fit(X, sample_weight=sample_weight).transform(X)
        assert_array_equal(X_null.toarray(), X.toarray())
        X_orig = null_transform.inverse_transform(X_null)
        assert_array_equal(X_orig.toarray(), X_null.toarray())
        assert_array_equal(X_orig.toarray(), X.toarray())


@pytest.mark.parametrize("sample_weight", [True, None])
def test_standard_scaler_trasform_with_partial_fit(sample_weight):
    # Check some postconditions after applying partial_fit and transform
    X = X_2d[:100, :]

    if sample_weight:
        sample_weight = rng.rand(X.shape[0])

    scaler_incr = StandardScaler()
    for i, batch in enumerate(gen_batches(X.shape[0], 1)):

        X_sofar = X[: (i + 1), :]
        chunks_copy = X_sofar.copy()
        if sample_weight is None:
            scaled_batch = StandardScaler().fit_transform(X_sofar)
            scaler_incr = scaler_incr.partial_fit(X[batch])
        else:
            scaled_batch = StandardScaler().fit_transform(
                X_sofar, sample_weight=sample_weight[: i + 1]
            )
            scaler_incr = scaler_incr.partial_fit(
                X[batch], sample_weight=sample_weight[batch]
            )
        scaled_incr = scaler_incr.transform(X_sofar)

        assert_array_almost_equal(scaled_batch, scaled_incr)
        assert_array_almost_equal(X_sofar, chunks_copy)  # No change
        right_input = scaler_incr.inverse_transform(scaled_incr)
        assert_array_almost_equal(X_sofar, right_input)

        zero = np.zeros(X.shape[1])
        epsilon = np.finfo(float).eps
        assert_array_less(zero, scaler_incr.var_ + epsilon)  # as less or equal
        assert_array_less(zero, scaler_incr.scale_ + epsilon)
        if sample_weight is None:
            # (i+1) because the Scaler has been already fitted
            assert (i + 1) == scaler_incr.n_samples_seen_
        else:
            assert np.sum(sample_weight[: i + 1]) == pytest.approx(
                scaler_incr.n_samples_seen_
            )


def test_standard_check_array_of_inverse_transform():
    # Check if StandardScaler inverse_transform is
    # converting the integer array to float
    x = np.array(
        [
            [1, 1, 1, 0, 1, 0],
            [1, 1, 1, 0, 1, 0],
            [0, 8, 0, 1, 0, 0],
            [1, 4, 1, 1, 0, 0],
            [0, 1, 0, 0, 1, 0],
            [0, 4, 0, 1, 0, 1],
        ],
        dtype=np.int32,
    )

    scaler = StandardScaler()
    scaler.fit(x)

    # The of inverse_transform should be converted
    # to a float array.
    # If not X *= self.scale_ will fail.
    scaler.inverse_transform(x)


def test_min_max_scaler_iris():
    X = iris.data
    scaler = MinMaxScaler()
    # default params
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), 0)
    assert_array_almost_equal(X_trans.max(axis=0), 1)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # not default params: min=1, max=2
    scaler = MinMaxScaler(feature_range=(1, 2))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), 1)
    assert_array_almost_equal(X_trans.max(axis=0), 2)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # min=-.5, max=.6
    scaler = MinMaxScaler(feature_range=(-0.5, 0.6))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), -0.5)
    assert_array_almost_equal(X_trans.max(axis=0), 0.6)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # raises on invalid range
    scaler = MinMaxScaler(feature_range=(2, 1))
    with pytest.raises(ValueError):
        scaler.fit(X)


def test_min_max_scaler_zero_variance_features():
    # Check min max scaler on toy data with zero variance features
    X = [[0.0, 1.0, +0.5], [0.0, 1.0, -0.1], [0.0, 1.0, +1.1]]

    X_new = [[+0.0, 2.0, 0.5], [-1.0, 1.0, 0.0], [+0.0, 1.0, 1.5]]

    # default params
    scaler = MinMaxScaler()
    X_trans = scaler.fit_transform(X)
    X_expected_0_1 = [[0.0, 0.0, 0.5], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]
    assert_array_almost_equal(X_trans, X_expected_0_1)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    X_trans_new = scaler.transform(X_new)
    X_expected_0_1_new = [[+0.0, 1.0, 0.500], [-1.0, 0.0, 0.083], [+0.0, 0.0, 1.333]]
    assert_array_almost_equal(X_trans_new, X_expected_0_1_new, decimal=2)

    # not default params
    scaler = MinMaxScaler(feature_range=(1, 2))
    X_trans = scaler.fit_transform(X)
    X_expected_1_2 = [[1.0, 1.0, 1.5], [1.0, 1.0, 1.0], [1.0, 1.0, 2.0]]
    assert_array_almost_equal(X_trans, X_expected_1_2)

    # function interface
    X_trans = minmax_scale(X)
    assert_array_almost_equal(X_trans, X_expected_0_1)
    X_trans = minmax_scale(X, feature_range=(1, 2))
    assert_array_almost_equal(X_trans, X_expected_1_2)


def test_minmax_scale_axis1():
    X = iris.data
    X_trans = minmax_scale(X, axis=1)
    assert_array_almost_equal(np.min(X_trans, axis=1), 0)
    assert_array_almost_equal(np.max(X_trans, axis=1), 1)


def test_min_max_scaler_1d():
    # Test scaling of dataset along single axis
    for X in [X_1row, X_1col, X_list_1row, X_list_1row]:

        scaler = MinMaxScaler(copy=True)
        X_scaled = scaler.fit(X).transform(X)

        if isinstance(X, list):
            X = np.array(X)  # cast only after scaling done

        if _check_dim_1axis(X) == 1:
            assert_array_almost_equal(X_scaled.min(axis=0), np.zeros(n_features))
            assert_array_almost_equal(X_scaled.max(axis=0), np.zeros(n_features))
        else:
            assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
            assert_array_almost_equal(X_scaled.max(axis=0), 1.0)
        assert scaler.n_samples_seen_ == X.shape[0]

        # check inverse transform
        X_scaled_back = scaler.inverse_transform(X_scaled)
        assert_array_almost_equal(X_scaled_back, X)

    # Constant feature
    X = np.ones((5, 1))
    scaler = MinMaxScaler()
    X_scaled = scaler.fit(X).transform(X)
    assert X_scaled.min() >= 0.0
    assert X_scaled.max() <= 1.0
    assert scaler.n_samples_seen_ == X.shape[0]

    # Function interface
    X_1d = X_1row.ravel()
    min_ = X_1d.min()
    max_ = X_1d.max()
    assert_array_almost_equal(
        (X_1d - min_) / (max_ - min_), minmax_scale(X_1d, copy=True)
    )


@pytest.mark.parametrize("sample_weight", [True, None])
def test_scaler_without_centering(sample_weight):
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    if sample_weight:
        sample_weight = rng.rand(X.shape[0])

    with pytest.raises(ValueError):
        StandardScaler().fit(X_csr)
    with pytest.raises(ValueError):
        StandardScaler().fit(X_csc)

    null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
    X_null = null_transform.fit_transform(X_csr)
    assert_array_equal(X_null.data, X_csr.data)
    X_orig = null_transform.inverse_transform(X_null)
    assert_array_equal(X_orig.data, X_csr.data)

    scaler = StandardScaler(with_mean=False).fit(X, sample_weight=sample_weight)
    X_scaled = scaler.transform(X, copy=True)
    assert not np.any(np.isnan(X_scaled))

    scaler_csr = StandardScaler(with_mean=False).fit(X_csr, sample_weight=sample_weight)
    X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
    assert not np.any(np.isnan(X_csr_scaled.data))

    scaler_csc = StandardScaler(with_mean=False).fit(X_csc, sample_weight=sample_weight)
    X_csc_scaled = scaler_csc.transform(X_csc, copy=True)
    assert not np.any(np.isnan(X_csc_scaled.data))

    assert_array_almost_equal(scaler.mean_, scaler_csr.mean_)
    assert_array_almost_equal(scaler.var_, scaler_csr.var_)
    assert_array_almost_equal(scaler.scale_, scaler_csr.scale_)
    assert_array_almost_equal(scaler.n_samples_seen_, scaler_csr.n_samples_seen_)

    assert_array_almost_equal(scaler.mean_, scaler_csc.mean_)
    assert_array_almost_equal(scaler.var_, scaler_csc.var_)
    assert_array_almost_equal(scaler.scale_, scaler_csc.scale_)
    assert_array_almost_equal(scaler.n_samples_seen_, scaler_csc.n_samples_seen_)

    if sample_weight is None:
        assert_array_almost_equal(
            X_scaled.mean(axis=0), [0.0, -0.01, 2.24, -0.35, -0.78], 2
        )
        assert_array_almost_equal(X_scaled.std(axis=0), [0.0, 1.0, 1.0, 1.0, 1.0])

    X_csr_scaled_mean, X_csr_scaled_var = mean_variance_axis(X_csr_scaled, 0)
    assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
    assert_array_almost_equal(X_csr_scaled_var, X_scaled.var(axis=0))

    # Check that X has not been modified (copy)
    assert X_scaled is not X
    assert X_csr_scaled is not X_csr

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert X_scaled_back is not X
    assert X_scaled_back is not X_scaled
    assert_array_almost_equal(X_scaled_back, X)

    X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
    assert X_csr_scaled_back is not X_csr
    assert X_csr_scaled_back is not X_csr_scaled
    assert_array_almost_equal(X_csr_scaled_back.toarray(), X)

    X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
    assert X_csc_scaled_back is not X_csc
    assert X_csc_scaled_back is not X_csc_scaled
    assert_array_almost_equal(X_csc_scaled_back.toarray(), X)


@pytest.mark.parametrize("with_mean", [True, False])
@pytest.mark.parametrize("with_std", [True, False])
@pytest.mark.parametrize(
    "array_constructor", [np.asarray, sparse.csc_matrix, sparse.csr_matrix]
)
def test_scaler_n_samples_seen_with_nan(with_mean, with_std, array_constructor):
    X = np.array(
        [[0, 1, 3], [np.nan, 6, 10], [5, 4, np.nan], [8, 0, np.nan]], dtype=np.float64
    )
    X = array_constructor(X)

    if sparse.issparse(X) and with_mean:
        pytest.skip("'with_mean=True' cannot be used with sparse matrix.")

    transformer = StandardScaler(with_mean=with_mean, with_std=with_std)
    transformer.fit(X)

    assert_array_equal(transformer.n_samples_seen_, np.array([3, 4, 2]))


def _check_identity_scalers_attributes(scaler_1, scaler_2):
    assert scaler_1.mean_ is scaler_2.mean_ is None
    assert scaler_1.var_ is scaler_2.var_ is None
    assert scaler_1.scale_ is scaler_2.scale_ is None
    assert scaler_1.n_samples_seen_ == scaler_2.n_samples_seen_


def test_scaler_return_identity():
    # test that the scaler return identity when with_mean and with_std are
    # False
    X_dense = np.array([[0, 1, 3], [5, 6, 0], [8, 0, 10]], dtype=np.float64)
    X_csr = sparse.csr_matrix(X_dense)
    X_csc = X_csr.tocsc()

    transformer_dense = StandardScaler(with_mean=False, with_std=False)
    X_trans_dense = transformer_dense.fit_transform(X_dense)

    transformer_csr = clone(transformer_dense)
    X_trans_csr = transformer_csr.fit_transform(X_csr)

    transformer_csc = clone(transformer_dense)
    X_trans_csc = transformer_csc.fit_transform(X_csc)

    assert_allclose_dense_sparse(X_trans_csr, X_csr)
    assert_allclose_dense_sparse(X_trans_csc, X_csc)
    assert_allclose(X_trans_dense, X_dense)

    for trans_1, trans_2 in itertools.combinations(
        [transformer_dense, transformer_csr, transformer_csc], 2
    ):
        _check_identity_scalers_attributes(trans_1, trans_2)

    transformer_dense.partial_fit(X_dense)
    transformer_csr.partial_fit(X_csr)
    transformer_csc.partial_fit(X_csc)

    for trans_1, trans_2 in itertools.combinations(
        [transformer_dense, transformer_csr, transformer_csc], 2
    ):
        _check_identity_scalers_attributes(trans_1, trans_2)

    transformer_dense.fit(X_dense)
    transformer_csr.fit(X_csr)
    transformer_csc.fit(X_csc)

    for trans_1, trans_2 in itertools.combinations(
        [transformer_dense, transformer_csr, transformer_csc], 2
    ):
        _check_identity_scalers_attributes(trans_1, trans_2)


def test_scaler_int():
    # test that scaler converts integer input to floating
    # for both sparse and dense matrices
    rng = np.random.RandomState(42)
    X = rng.randint(20, size=(4, 5))
    X[:, 0] = 0  # first feature is always of zero
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
    with warnings.catch_warnings(record=True):
        X_null = null_transform.fit_transform(X_csr)
    assert_array_equal(X_null.data, X_csr.data)
    X_orig = null_transform.inverse_transform(X_null)
    assert_array_equal(X_orig.data, X_csr.data)

    with warnings.catch_warnings(record=True):
        scaler = StandardScaler(with_mean=False).fit(X)
        X_scaled = scaler.transform(X, copy=True)
    assert not np.any(np.isnan(X_scaled))

    with warnings.catch_warnings(record=True):
        scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
        X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
    assert not np.any(np.isnan(X_csr_scaled.data))

    with warnings.catch_warnings(record=True):
        scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
        X_csc_scaled = scaler_csc.transform(X_csc, copy=True)
    assert not np.any(np.isnan(X_csc_scaled.data))

    assert_array_almost_equal(scaler.mean_, scaler_csr.mean_)
    assert_array_almost_equal(scaler.var_, scaler_csr.var_)
    assert_array_almost_equal(scaler.scale_, scaler_csr.scale_)

    assert_array_almost_equal(scaler.mean_, scaler_csc.mean_)
    assert_array_almost_equal(scaler.var_, scaler_csc.var_)
    assert_array_almost_equal(scaler.scale_, scaler_csc.scale_)

    assert_array_almost_equal(
        X_scaled.mean(axis=0), [0.0, 1.109, 1.856, 21.0, 1.559], 2
    )
    assert_array_almost_equal(X_scaled.std(axis=0), [0.0, 1.0, 1.0, 1.0, 1.0])

    X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis(
        X_csr_scaled.astype(float), 0
    )
    assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
    assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))

    # Check that X has not been modified (copy)
    assert X_scaled is not X
    assert X_csr_scaled is not X_csr

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert X_scaled_back is not X
    assert X_scaled_back is not X_scaled
    assert_array_almost_equal(X_scaled_back, X)

    X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
    assert X_csr_scaled_back is not X_csr
    assert X_csr_scaled_back is not X_csr_scaled
    assert_array_almost_equal(X_csr_scaled_back.toarray(), X)

    X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
    assert X_csc_scaled_back is not X_csc
    assert X_csc_scaled_back is not X_csc_scaled
    assert_array_almost_equal(X_csc_scaled_back.toarray(), X)


def test_scaler_without_copy():
    # Check that StandardScaler.fit does not change input
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    X_copy = X.copy()
    StandardScaler(copy=False).fit(X)
    assert_array_equal(X, X_copy)

    X_csr_copy = X_csr.copy()
    StandardScaler(with_mean=False, copy=False).fit(X_csr)
    assert_array_equal(X_csr.toarray(), X_csr_copy.toarray())

    X_csc_copy = X_csc.copy()
    StandardScaler(with_mean=False, copy=False).fit(X_csc)
    assert_array_equal(X_csc.toarray(), X_csc_copy.toarray())


def test_scale_sparse_with_mean_raise_exception():
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    # check scaling and fit with direct calls on sparse data
    with pytest.raises(ValueError):
        scale(X_csr, with_mean=True)
    with pytest.raises(ValueError):
        StandardScaler(with_mean=True).fit(X_csr)

    with pytest.raises(ValueError):
        scale(X_csc, with_mean=True)
    with pytest.raises(ValueError):
        StandardScaler(with_mean=True).fit(X_csc)

    # check transform and inverse_transform after a fit on a dense array
    scaler = StandardScaler(with_mean=True).fit(X)
    with pytest.raises(ValueError):
        scaler.transform(X_csr)
    with pytest.raises(ValueError):
        scaler.transform(X_csc)

    X_transformed_csr = sparse.csr_matrix(scaler.transform(X))
    with pytest.raises(ValueError):
        scaler.inverse_transform(X_transformed_csr)

    X_transformed_csc = sparse.csc_matrix(scaler.transform(X))
    with pytest.raises(ValueError):
        scaler.inverse_transform(X_transformed_csc)


def test_scale_input_finiteness_validation():
    # Check if non finite inputs raise ValueError
    X = [[np.inf, 5, 6, 7, 8]]
    with pytest.raises(
        ValueError, match="Input contains infinity or a value too large"
    ):
        scale(X)


def test_robust_scaler_error_sparse():
    X_sparse = sparse.rand(1000, 10)
    scaler = RobustScaler(with_centering=True)
    err_msg = "Cannot center sparse matrices"
    with pytest.raises(ValueError, match=err_msg):
        scaler.fit(X_sparse)


@pytest.mark.parametrize("with_centering", [True, False])
@pytest.mark.parametrize("with_scaling", [True, False])
@pytest.mark.parametrize("X", [np.random.randn(10, 3), sparse.rand(10, 3, density=0.5)])
def test_robust_scaler_attributes(X, with_centering, with_scaling):
    # check consistent type of attributes
    if with_centering and sparse.issparse(X):
        pytest.skip("RobustScaler cannot center sparse matrix")

    scaler = RobustScaler(with_centering=with_centering, with_scaling=with_scaling)
    scaler.fit(X)

    if with_centering:
        assert isinstance(scaler.center_, np.ndarray)
    else:
        assert scaler.center_ is None
    if with_scaling:
        assert isinstance(scaler.scale_, np.ndarray)
    else:
        assert scaler.scale_ is None


def test_robust_scaler_col_zero_sparse():
    # check that the scaler is working when there is not data materialized in a
    # column of a sparse matrix
    X = np.random.randn(10, 5)
    X[:, 0] = 0
    X = sparse.csr_matrix(X)

    scaler = RobustScaler(with_centering=False)
    scaler.fit(X)
    assert scaler.scale_[0] == pytest.approx(1)

    X_trans = scaler.transform(X)
    assert_allclose(X[:, 0].toarray(), X_trans[:, 0].toarray())


def test_robust_scaler_2d_arrays():
    # Test robust scaling of 2d array along first axis
    rng = np.random.RandomState(0)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero

    scaler = RobustScaler()
    X_scaled = scaler.fit(X).transform(X)

    assert_array_almost_equal(np.median(X_scaled, axis=0), 5 * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0)[0], 0)


@pytest.mark.parametrize("density", [0, 0.05, 0.1, 0.5, 1])
@pytest.mark.parametrize("strictly_signed", ["positive", "negative", "zeros", None])
def test_robust_scaler_equivalence_dense_sparse(density, strictly_signed):
    # Check the equivalence of the fitting with dense and sparse matrices
    X_sparse = sparse.rand(1000, 5, density=density).tocsc()
    if strictly_signed == "positive":
        X_sparse.data = np.abs(X_sparse.data)
    elif strictly_signed == "negative":
        X_sparse.data = -np.abs(X_sparse.data)
    elif strictly_signed == "zeros":
        X_sparse.data = np.zeros(X_sparse.data.shape, dtype=np.float64)
    X_dense = X_sparse.toarray()

    scaler_sparse = RobustScaler(with_centering=False)
    scaler_dense = RobustScaler(with_centering=False)

    scaler_sparse.fit(X_sparse)
    scaler_dense.fit(X_dense)

    assert_allclose(scaler_sparse.scale_, scaler_dense.scale_)


def test_robust_scaler_transform_one_row_csr():
    # Check RobustScaler on transforming csr matrix with one row
    rng = np.random.RandomState(0)
    X = rng.randn(4, 5)
    single_row = np.array([[0.1, 1.0, 2.0, 0.0, -1.0]])
    scaler = RobustScaler(with_centering=False)
    scaler = scaler.fit(X)
    row_trans = scaler.transform(sparse.csr_matrix(single_row))
    row_expected = single_row / scaler.scale_
    assert_array_almost_equal(row_trans.toarray(), row_expected)
    row_scaled_back = scaler.inverse_transform(row_trans)
    assert_array_almost_equal(single_row, row_scaled_back.toarray())


def test_robust_scaler_iris():
    X = iris.data
    scaler = RobustScaler()
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(np.median(X_trans, axis=0), 0)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)
    q = np.percentile(X_trans, q=(25, 75), axis=0)
    iqr = q[1] - q[0]
    assert_array_almost_equal(iqr, 1)


def test_robust_scaler_iris_quantiles():
    X = iris.data
    scaler = RobustScaler(quantile_range=(10, 90))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(np.median(X_trans, axis=0), 0)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)
    q = np.percentile(X_trans, q=(10, 90), axis=0)
    q_range = q[1] - q[0]
    assert_array_almost_equal(q_range, 1)


def test_quantile_transform_iris():
    X = iris.data
    # uniform output distribution
    transformer = QuantileTransformer(n_quantiles=30)
    X_trans = transformer.fit_transform(X)
    X_trans_inv = transformer.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)
    # normal output distribution
    transformer = QuantileTransformer(n_quantiles=30, output_distribution="normal")
    X_trans = transformer.fit_transform(X)
    X_trans_inv = transformer.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)
    # make sure it is possible to take the inverse of a sparse matrix
    # which contain negative value; this is the case in the iris dataset
    X_sparse = sparse.csc_matrix(X)
    X_sparse_tran = transformer.fit_transform(X_sparse)
    X_sparse_tran_inv = transformer.inverse_transform(X_sparse_tran)
    assert_array_almost_equal(X_sparse.A, X_sparse_tran_inv.A)


def test_quantile_transform_check_error():
    X = np.transpose(
        [
            [0, 25, 50, 0, 0, 0, 75, 0, 0, 100],
            [2, 4, 0, 0, 6, 8, 0, 10, 0, 0],
            [0, 0, 2.6, 4.1, 0, 0, 2.3, 0, 9.5, 0.1],
        ]
    )
    X = sparse.csc_matrix(X)
    X_neg = np.transpose(
        [
            [0, 25, 50, 0, 0, 0, 75, 0, 0, 100],
            [-2, 4, 0, 0, 6, 8, 0, 10, 0, 0],
            [0, 0, 2.6, 4.1, 0, 0, 2.3, 0, 9.5, 0.1],
        ]
    )
    X_neg = sparse.csc_matrix(X_neg)

    err_msg = (
        "The number of quantiles cannot be greater than "
        "the number of samples used. Got 1000 quantiles "
        "and 10 samples."
    )
    with pytest.raises(ValueError, match=err_msg):
        QuantileTransformer(subsample=10).fit(X)

    transformer = QuantileTransformer(n_quantiles=10)
    err_msg = "QuantileTransformer only accepts non-negative sparse matrices."
    with pytest.raises(ValueError, match=err_msg):
        transformer.fit(X_neg)
    transformer.fit(X)
    err_msg = "QuantileTransformer only accepts non-negative sparse matrices."
    with pytest.raises(ValueError, match=err_msg):
        transformer.transform(X_neg)

    X_bad_feat = np.transpose(
        [[0, 25, 50, 0, 0, 0, 75, 0, 0, 100], [0, 0, 2.6, 4.1, 0, 0, 2.3, 0, 9.5, 0.1]]
    )
    err_msg = (
        "X has 2 features, but QuantileTransformer is expecting 3 features as input."
    )
    with pytest.raises(ValueError, match=err_msg):
        transformer.inverse_transform(X_bad_feat)

    transformer = QuantileTransformer(n_quantiles=10).fit(X)
    # check that an error is raised if input is scalar
    with pytest.raises(ValueError, match="Expected 2D array, got scalar array instead"):
        transformer.transform(10)
    # check that a warning is raised is n_quantiles > n_samples
    transformer = QuantileTransformer(n_quantiles=100)
    warn_msg = "n_quantiles is set to n_samples"
    with pytest.warns(UserWarning, match=warn_msg) as record:
        transformer.fit(X)
    assert len(record) == 1
    assert transformer.n_quantiles_ == X.shape[0]


def test_quantile_transform_sparse_ignore_zeros():
    X = np.array([[0, 1], [0, 0], [0, 2], [0, 2], [0, 1]])
    X_sparse = sparse.csc_matrix(X)
    transformer = QuantileTransformer(ignore_implicit_zeros=True, n_quantiles=5)

    # dense case -> warning raise
    warning_message = (
        "'ignore_implicit_zeros' takes effect"
        " only with sparse matrix. This parameter has no"
        " effect."
    )
    with pytest.warns(UserWarning, match=warning_message):
        transformer.fit(X)

    X_expected = np.array([[0, 0], [0, 0], [0, 1], [0, 1], [0, 0]])
    X_trans = transformer.fit_transform(X_sparse)
    assert_almost_equal(X_expected, X_trans.A)

    # consider the case where sparse entries are missing values and user-given
    # zeros are to be considered
    X_data = np.array([0, 0, 1, 0, 2, 2, 1, 0, 1, 2, 0])
    X_col = np.array([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])
    X_row = np.array([0, 4, 0, 1, 2, 3, 4, 5, 6, 7, 8])
    X_sparse = sparse.csc_matrix((X_data, (X_row, X_col)))
    X_trans = transformer.fit_transform(X_sparse)
    X_expected = np.array(
        [
            [0.0, 0.5],
            [0.0, 0.0],
            [0.0, 1.0],
            [0.0, 1.0],
            [0.0, 0.5],
            [0.0, 0.0],
            [0.0, 0.5],
            [0.0, 1.0],
            [0.0, 0.0],
        ]
    )
    assert_almost_equal(X_expected, X_trans.A)

    transformer = QuantileTransformer(ignore_implicit_zeros=True, n_quantiles=5)
    X_data = np.array([-1, -1, 1, 0, 0, 0, 1, -1, 1])
    X_col = np.array([0, 0, 1, 1, 1, 1, 1, 1, 1])
    X_row = np.array([0, 4, 0, 1, 2, 3, 4, 5, 6])
    X_sparse = sparse.csc_matrix((X_data, (X_row, X_col)))
    X_trans = transformer.fit_transform(X_sparse)
    X_expected = np.array(
        [[0, 1], [0, 0.375], [0, 0.375], [0, 0.375], [0, 1], [0, 0], [0, 1]]
    )
    assert_almost_equal(X_expected, X_trans.A)
    assert_almost_equal(X_sparse.A, transformer.inverse_transform(X_trans).A)

    # check in conjunction with subsampling
    transformer = QuantileTransformer(
        ignore_implicit_zeros=True, n_quantiles=5, subsample=8, random_state=0
    )
    X_trans = transformer.fit_transform(X_sparse)
    assert_almost_equal(X_expected, X_trans.A)
    assert_almost_equal(X_sparse.A, transformer.inverse_transform(X_trans).A)


def test_quantile_transform_dense_toy():
    X = np.array(
        [[0, 2, 2.6], [25, 4, 4.1], [50, 6, 2.3], [75, 8, 9.5], [100, 10, 0.1]]
    )

    transformer = QuantileTransformer(n_quantiles=5)
    transformer.fit(X)

    # using a uniform output, each entry of X should be map between 0 and 1
    # and equally spaced
    X_trans = transformer.fit_transform(X)
    X_expected = np.tile(np.linspace(0, 1, num=5), (3, 1)).T
    assert_almost_equal(np.sort(X_trans, axis=0), X_expected)

    X_test = np.array(
        [
            [-1, 1, 0],
            [101, 11, 10],
        ]
    )
    X_expected = np.array(
        [
            [0, 0, 0],
            [1, 1, 1],
        ]
    )
    assert_array_almost_equal(transformer.transform(X_test), X_expected)

    X_trans_inv = transformer.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)


def test_quantile_transform_subsampling():
    # Test that subsampling the input yield to a consistent results We check
    # that the computed quantiles are almost mapped to a [0, 1] vector where
    # values are equally spaced. The infinite norm is checked to be smaller
    # than a given threshold. This is repeated 5 times.

    # dense support
    n_samples = 1000000
    n_quantiles = 1000
    X = np.sort(np.random.sample((n_samples, 1)), axis=0)
    ROUND = 5
    inf_norm_arr = []
    for random_state in range(ROUND):
        transformer = QuantileTransformer(
            random_state=random_state,
            n_quantiles=n_quantiles,
            subsample=n_samples // 10,
        )
        transformer.fit(X)
        diff = np.linspace(0, 1, n_quantiles) - np.ravel(transformer.quantiles_)
        inf_norm = np.max(np.abs(diff))
        assert inf_norm < 1e-2
        inf_norm_arr.append(inf_norm)
    # each random subsampling yield a unique approximation to the expected
    # linspace CDF
    assert len(np.unique(inf_norm_arr)) == len(inf_norm_arr)

    # sparse support

    X = sparse.rand(n_samples, 1, density=0.99, format="csc", random_state=0)
    inf_norm_arr = []
    for random_state in range(ROUND):
        transformer = QuantileTransformer(
            random_state=random_state,
            n_quantiles=n_quantiles,
            subsample=n_samples // 10,
        )
        transformer.fit(X)
        diff = np.linspace(0, 1, n_quantiles) - np.ravel(transformer.quantiles_)
        inf_norm = np.max(np.abs(diff))
        assert inf_norm < 1e-1
        inf_norm_arr.append(inf_norm)
    # each random subsampling yield a unique approximation to the expected
    # linspace CDF
    assert len(np.unique(inf_norm_arr)) == len(inf_norm_arr)


def test_quantile_transform_sparse_toy():
    X = np.array(
        [
            [0.0, 2.0, 0.0],
            [25.0, 4.0, 0.0],
            [50.0, 0.0, 2.6],
            [0.0, 0.0, 4.1],
            [0.0, 6.0, 0.0],
            [0.0, 8.0, 0.0],
            [75.0, 0.0, 2.3],
            [0.0, 10.0, 0.0],
            [0.0, 0.0, 9.5],
            [100.0, 0.0, 0.1],
        ]
    )

    X = sparse.csc_matrix(X)

    transformer = QuantileTransformer(n_quantiles=10)
    transformer.fit(X)

    X_trans = transformer.fit_transform(X)
    assert_array_almost_equal(np.min(X_trans.toarray(), axis=0), 0.0)
    assert_array_almost_equal(np.max(X_trans.toarray(), axis=0), 1.0)

    X_trans_inv = transformer.inverse_transform(X_trans)
    assert_array_almost_equal(X.toarray(), X_trans_inv.toarray())

    transformer_dense = QuantileTransformer(n_quantiles=10).fit(X.toarray())

    X_trans = transformer_dense.transform(X)
    assert_array_almost_equal(np.min(X_trans.toarray(), axis=0), 0.0)
    assert_array_almost_equal(np.max(X_trans.toarray(), axis=0), 1.0)

    X_trans_inv = transformer_dense.inverse_transform(X_trans)
    assert_array_almost_equal(X.toarray(), X_trans_inv.toarray())


def test_quantile_transform_axis1():
    X = np.array([[0, 25, 50, 75, 100], [2, 4, 6, 8, 10], [2.6, 4.1, 2.3, 9.5, 0.1]])

    X_trans_a0 = quantile_transform(X.T, axis=0, n_quantiles=5)
    X_trans_a1 = quantile_transform(X, axis=1, n_quantiles=5)
    assert_array_almost_equal(X_trans_a0, X_trans_a1.T)


def test_quantile_transform_bounds():
    # Lower and upper bounds are manually mapped. We checked that in the case
    # of a constant feature and binary feature, the bounds are properly mapped.
    X_dense = np.array([[0, 0], [0, 0], [1, 0]])
    X_sparse = sparse.csc_matrix(X_dense)

    # check sparse and dense are consistent
    X_trans = QuantileTransformer(n_quantiles=3, random_state=0).fit_transform(X_dense)
    assert_array_almost_equal(X_trans, X_dense)
    X_trans_sp = QuantileTransformer(n_quantiles=3, random_state=0).fit_transform(
        X_sparse
    )
    assert_array_almost_equal(X_trans_sp.A, X_dense)
    assert_array_almost_equal(X_trans, X_trans_sp.A)

    # check the consistency of the bounds by learning on 1 matrix
    # and transforming another
    X = np.array([[0, 1], [0, 0.5], [1, 0]])
    X1 = np.array([[0, 0.1], [0, 0.5], [1, 0.1]])
    transformer = QuantileTransformer(n_quantiles=3).fit(X)
    X_trans = transformer.transform(X1)
    assert_array_almost_equal(X_trans, X1)

    # check that values outside of the range learned will be mapped properly.
    X = np.random.random((1000, 1))
    transformer = QuantileTransformer()
    transformer.fit(X)
    assert transformer.transform([[-10]]) == transformer.transform([[np.min(X)]])
    assert transformer.transform([[10]]) == transformer.transform([[np.max(X)]])
    assert transformer.inverse_transform([[-10]]) == transformer.inverse_transform(
        [[np.min(transformer.references_)]]
    )
    assert transformer.inverse_transform([[10]]) == transformer.inverse_transform(
        [[np.max(transformer.references_)]]
    )


def test_quantile_transform_and_inverse():
    X_1 = iris.data
    X_2 = np.array([[0.0], [BOUNDS_THRESHOLD / 10], [1.5], [2], [3], [3], [4]])
    for X in [X_1, X_2]:
        transformer = QuantileTransformer(n_quantiles=1000, random_state=0)
        X_trans = transformer.fit_transform(X)
        X_trans_inv = transformer.inverse_transform(X_trans)
        assert_array_almost_equal(X, X_trans_inv, decimal=9)


def test_quantile_transform_nan():
    X = np.array([[np.nan, 0, 0, 1], [np.nan, np.nan, 0, 0.5], [np.nan, 1, 1, 0]])

    transformer = QuantileTransformer(n_quantiles=10, random_state=42)
    transformer.fit_transform(X)

    # check that the quantile of the first column is all NaN
    assert np.isnan(transformer.quantiles_[:, 0]).all()
    # all other column should not contain NaN
    assert not np.isnan(transformer.quantiles_[:, 1:]).any()


@pytest.mark.parametrize("array_type", ["array", "sparse"])
def test_quantile_transformer_sorted_quantiles(array_type):
    # Non-regression test for:
    # https://github.com/scikit-learn/scikit-learn/issues/15733
    # Taken from upstream bug report:
    # https://github.com/numpy/numpy/issues/14685
    X = np.array([0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 1, 1, 9, 9, 9, 8, 8, 7] * 10)
    X = 0.1 * X.reshape(-1, 1)
    X = _convert_container(X, array_type)

    n_quantiles = 100
    qt = QuantileTransformer(n_quantiles=n_quantiles).fit(X)

    # Check that the estimated quantile thresholds are monotically
    # increasing:
    quantiles = qt.quantiles_[:, 0]
    assert len(quantiles) == 100
    assert all(np.diff(quantiles) >= 0)


def test_robust_scaler_invalid_range():
    for range_ in [
        (-1, 90),
        (-2, -3),
        (10, 101),
        (100.5, 101),
        (90, 50),
    ]:
        scaler = RobustScaler(quantile_range=range_)

        with pytest.raises(ValueError, match=r"Invalid quantile range: \("):
            scaler.fit(iris.data)


def test_scale_function_without_centering():
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero
    X_csr = sparse.csr_matrix(X)

    X_scaled = scale(X, with_mean=False)
    assert not np.any(np.isnan(X_scaled))

    X_csr_scaled = scale(X_csr, with_mean=False)
    assert not np.any(np.isnan(X_csr_scaled.data))

    # test csc has same outcome
    X_csc_scaled = scale(X_csr.tocsc(), with_mean=False)
    assert_array_almost_equal(X_scaled, X_csc_scaled.toarray())

    # raises value error on axis != 0
    with pytest.raises(ValueError):
        scale(X_csr, with_mean=False, axis=1)

    assert_array_almost_equal(
        X_scaled.mean(axis=0), [0.0, -0.01, 2.24, -0.35, -0.78], 2
    )
    assert_array_almost_equal(X_scaled.std(axis=0), [0.0, 1.0, 1.0, 1.0, 1.0])
    # Check that X has not been copied
    assert X_scaled is not X

    X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis(X_csr_scaled, 0)
    assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
    assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))

    # null scale
    X_csr_scaled = scale(X_csr, with_mean=False, with_std=False, copy=True)
    assert_array_almost_equal(X_csr.toarray(), X_csr_scaled.toarray())


def test_robust_scale_axis1():
    X = iris.data
    X_trans = robust_scale(X, axis=1)
    assert_array_almost_equal(np.median(X_trans, axis=1), 0)
    q = np.percentile(X_trans, q=(25, 75), axis=1)
    iqr = q[1] - q[0]
    assert_array_almost_equal(iqr, 1)


def test_robust_scale_1d_array():
    X = iris.data[:, 1]
    X_trans = robust_scale(X)
    assert_array_almost_equal(np.median(X_trans), 0)
    q = np.percentile(X_trans, q=(25, 75))
    iqr = q[1] - q[0]
    assert_array_almost_equal(iqr, 1)


def test_robust_scaler_zero_variance_features():
    # Check RobustScaler on toy data with zero variance features
    X = [[0.0, 1.0, +0.5], [0.0, 1.0, -0.1], [0.0, 1.0, +1.1]]

    scaler = RobustScaler()
    X_trans = scaler.fit_transform(X)

    # NOTE: for such a small sample size, what we expect in the third column
    # depends HEAVILY on the method used to calculate quantiles. The values
    # here were calculated to fit the quantiles produces by np.percentile
    # using numpy 1.9 Calculating quantiles with
    # scipy.stats.mstats.scoreatquantile or scipy.stats.mstats.mquantiles
    # would yield very different results!
    X_expected = [[0.0, 0.0, +0.0], [0.0, 0.0, -1.0], [0.0, 0.0, +1.0]]
    assert_array_almost_equal(X_trans, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # make sure new data gets transformed correctly
    X_new = [[+0.0, 2.0, 0.5], [-1.0, 1.0, 0.0], [+0.0, 1.0, 1.5]]
    X_trans_new = scaler.transform(X_new)
    X_expected_new = [[+0.0, 1.0, +0.0], [-1.0, 0.0, -0.83333], [+0.0, 0.0, +1.66667]]
    assert_array_almost_equal(X_trans_new, X_expected_new, decimal=3)


def test_robust_scaler_unit_variance():
    # Check RobustScaler with unit_variance=True on standard normal data with
    # outliers
    rng = np.random.RandomState(42)
    X = rng.randn(1000000, 1)
    X_with_outliers = np.vstack([X, np.ones((100, 1)) * 100, np.ones((100, 1)) * -100])

    quantile_range = (1, 99)
    robust_scaler = RobustScaler(quantile_range=quantile_range, unit_variance=True).fit(
        X_with_outliers
    )
    X_trans = robust_scaler.transform(X)

    assert robust_scaler.center_ == pytest.approx(0, abs=1e-3)
    assert robust_scaler.scale_ == pytest.approx(1, abs=1e-2)
    assert X_trans.std() == pytest.approx(1, abs=1e-2)


def test_maxabs_scaler_zero_variance_features():
    # Check MaxAbsScaler on toy data with zero variance features
    X = [[0.0, 1.0, +0.5], [0.0, 1.0, -0.3], [0.0, 1.0, +1.5], [0.0, 0.0, +0.0]]

    scaler = MaxAbsScaler()
    X_trans = scaler.fit_transform(X)
    X_expected = [
        [0.0, 1.0, 1.0 / 3.0],
        [0.0, 1.0, -0.2],
        [0.0, 1.0, 1.0],
        [0.0, 0.0, 0.0],
    ]
    assert_array_almost_equal(X_trans, X_expected)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # make sure new data gets transformed correctly
    X_new = [[+0.0, 2.0, 0.5], [-1.0, 1.0, 0.0], [+0.0, 1.0, 1.5]]
    X_trans_new = scaler.transform(X_new)
    X_expected_new = [[+0.0, 2.0, 1.0 / 3.0], [-1.0, 1.0, 0.0], [+0.0, 1.0, 1.0]]

    assert_array_almost_equal(X_trans_new, X_expected_new, decimal=2)

    # function interface
    X_trans = maxabs_scale(X)
    assert_array_almost_equal(X_trans, X_expected)

    # sparse data
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)
    X_trans_csr = scaler.fit_transform(X_csr)
    X_trans_csc = scaler.fit_transform(X_csc)
    X_expected = [
        [0.0, 1.0, 1.0 / 3.0],
        [0.0, 1.0, -0.2],
        [0.0, 1.0, 1.0],
        [0.0, 0.0, 0.0],
    ]
    assert_array_almost_equal(X_trans_csr.A, X_expected)
    assert_array_almost_equal(X_trans_csc.A, X_expected)
    X_trans_csr_inv = scaler.inverse_transform(X_trans_csr)
    X_trans_csc_inv = scaler.inverse_transform(X_trans_csc)
    assert_array_almost_equal(X, X_trans_csr_inv.A)
    assert_array_almost_equal(X, X_trans_csc_inv.A)


def test_maxabs_scaler_large_negative_value():
    # Check MaxAbsScaler on toy data with a large negative value
    X = [
        [0.0, 1.0, +0.5, -1.0],
        [0.0, 1.0, -0.3, -0.5],
        [0.0, 1.0, -100.0, 0.0],
        [0.0, 0.0, +0.0, -2.0],
    ]

    scaler = MaxAbsScaler()
    X_trans = scaler.fit_transform(X)
    X_expected = [
        [0.0, 1.0, 0.005, -0.5],
        [0.0, 1.0, -0.003, -0.25],
        [0.0, 1.0, -1.0, 0.0],
        [0.0, 0.0, 0.0, -1.0],
    ]
    assert_array_almost_equal(X_trans, X_expected)


def test_maxabs_scaler_transform_one_row_csr():
    # Check MaxAbsScaler on transforming csr matrix with one row
    X = sparse.csr_matrix([[0.5, 1.0, 1.0]])
    scaler = MaxAbsScaler()
    scaler = scaler.fit(X)
    X_trans = scaler.transform(X)
    X_expected = sparse.csr_matrix([[1.0, 1.0, 1.0]])
    assert_array_almost_equal(X_trans.toarray(), X_expected.toarray())
    X_scaled_back = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X.toarray(), X_scaled_back.toarray())


def test_maxabs_scaler_1d():
    # Test scaling of dataset along single axis
    for X in [X_1row, X_1col, X_list_1row, X_list_1row]:

        scaler = MaxAbsScaler(copy=True)
        X_scaled = scaler.fit(X).transform(X)

        if isinstance(X, list):
            X = np.array(X)  # cast only after scaling done

        if _check_dim_1axis(X) == 1:
            assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), np.ones(n_features))
        else:
            assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), 1.0)
        assert scaler.n_samples_seen_ == X.shape[0]

        # check inverse transform
        X_scaled_back = scaler.inverse_transform(X_scaled)
        assert_array_almost_equal(X_scaled_back, X)

    # Constant feature
    X = np.ones((5, 1))
    scaler = MaxAbsScaler()
    X_scaled = scaler.fit(X).transform(X)
    assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), 1.0)
    assert scaler.n_samples_seen_ == X.shape[0]

    # function interface
    X_1d = X_1row.ravel()
    max_abs = np.abs(X_1d).max()
    assert_array_almost_equal(X_1d / max_abs, maxabs_scale(X_1d, copy=True))


def test_maxabs_scaler_partial_fit():
    # Test if partial_fit run over many batches of size 1 and 50
    # gives the same results as fit
    X = X_2d[:100, :]
    n = X.shape[0]

    for chunk_size in [1, 2, 50, n, n + 42]:
        # Test mean at the end of the process
        scaler_batch = MaxAbsScaler().fit(X)

        scaler_incr = MaxAbsScaler()
        scaler_incr_csr = MaxAbsScaler()
        scaler_incr_csc = MaxAbsScaler()
        for batch in gen_batches(n, chunk_size):
            scaler_incr = scaler_incr.partial_fit(X[batch])
            X_csr = sparse.csr_matrix(X[batch])
            scaler_incr_csr = scaler_incr_csr.partial_fit(X_csr)
            X_csc = sparse.csc_matrix(X[batch])
            scaler_incr_csc = scaler_incr_csc.partial_fit(X_csc)

        assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr.max_abs_)
        assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr_csr.max_abs_)
        assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr_csc.max_abs_)
        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
        assert scaler_batch.n_samples_seen_ == scaler_incr_csr.n_samples_seen_
        assert scaler_batch.n_samples_seen_ == scaler_incr_csc.n_samples_seen_
        assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
        assert_array_almost_equal(scaler_batch.scale_, scaler_incr_csr.scale_)
        assert_array_almost_equal(scaler_batch.scale_, scaler_incr_csc.scale_)
        assert_array_almost_equal(scaler_batch.transform(X), scaler_incr.transform(X))

        # Test std after 1 step
        batch0 = slice(0, chunk_size)
        scaler_batch = MaxAbsScaler().fit(X[batch0])
        scaler_incr = MaxAbsScaler().partial_fit(X[batch0])

        assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr.max_abs_)
        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
        assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
        assert_array_almost_equal(scaler_batch.transform(X), scaler_incr.transform(X))

        # Test std until the end of partial fits, and
        scaler_batch = MaxAbsScaler().fit(X)
        scaler_incr = MaxAbsScaler()  # Clean estimator
        for i, batch in enumerate(gen_batches(n, chunk_size)):
            scaler_incr = scaler_incr.partial_fit(X[batch])
            assert_correct_incr(
                i,
                batch_start=batch.start,
                batch_stop=batch.stop,
                n=n,
                chunk_size=chunk_size,
                n_samples_seen=scaler_incr.n_samples_seen_,
            )


def test_normalizer_l1():
    rng = np.random.RandomState(0)
    X_dense = rng.randn(4, 5)
    X_sparse_unpruned = sparse.csr_matrix(X_dense)

    # set the row number 3 to zero
    X_dense[3, :] = 0.0

    # set the row number 3 to zero without pruning (can happen in real life)
    indptr_3 = X_sparse_unpruned.indptr[3]
    indptr_4 = X_sparse_unpruned.indptr[4]
    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0

    # build the pruned variant using the regular constructor
    X_sparse_pruned = sparse.csr_matrix(X_dense)

    # check inputs that support the no-copy optim
    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):

        normalizer = Normalizer(norm="l1", copy=True)
        X_norm = normalizer.transform(X)
        assert X_norm is not X
        X_norm1 = toarray(X_norm)

        normalizer = Normalizer(norm="l1", copy=False)
        X_norm = normalizer.transform(X)
        assert X_norm is X
        X_norm2 = toarray(X_norm)

        for X_norm in (X_norm1, X_norm2):
            row_sums = np.abs(X_norm).sum(axis=1)
            for i in range(3):
                assert_almost_equal(row_sums[i], 1.0)
            assert_almost_equal(row_sums[3], 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix):
        X = init(X_dense)
        X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sparse.csr_matrix)

        X_norm = toarray(X_norm)
        for i in range(3):
            assert_almost_equal(row_sums[i], 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)


def test_normalizer_l2():
    rng = np.random.RandomState(0)
    X_dense = rng.randn(4, 5)
    X_sparse_unpruned = sparse.csr_matrix(X_dense)

    # set the row number 3 to zero
    X_dense[3, :] = 0.0

    # set the row number 3 to zero without pruning (can happen in real life)
    indptr_3 = X_sparse_unpruned.indptr[3]
    indptr_4 = X_sparse_unpruned.indptr[4]
    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0

    # build the pruned variant using the regular constructor
    X_sparse_pruned = sparse.csr_matrix(X_dense)

    # check inputs that support the no-copy optim
    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):

        normalizer = Normalizer(norm="l2", copy=True)
        X_norm1 = normalizer.transform(X)
        assert X_norm1 is not X
        X_norm1 = toarray(X_norm1)

        normalizer = Normalizer(norm="l2", copy=False)
        X_norm2 = normalizer.transform(X)
        assert X_norm2 is X
        X_norm2 = toarray(X_norm2)

        for X_norm in (X_norm1, X_norm2):
            for i in range(3):
                assert_almost_equal(la.norm(X_norm[i]), 1.0)
            assert_almost_equal(la.norm(X_norm[3]), 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix):
        X = init(X_dense)
        X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sparse.csr_matrix)

        X_norm = toarray(X_norm)
        for i in range(3):
            assert_almost_equal(la.norm(X_norm[i]), 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)


def test_normalizer_max():
    rng = np.random.RandomState(0)
    X_dense = rng.randn(4, 5)
    X_sparse_unpruned = sparse.csr_matrix(X_dense)

    # set the row number 3 to zero
    X_dense[3, :] = 0.0

    # set the row number 3 to zero without pruning (can happen in real life)
    indptr_3 = X_sparse_unpruned.indptr[3]
    indptr_4 = X_sparse_unpruned.indptr[4]
    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0

    # build the pruned variant using the regular constructor
    X_sparse_pruned = sparse.csr_matrix(X_dense)

    # check inputs that support the no-copy optim
    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):

        normalizer = Normalizer(norm="max", copy=True)
        X_norm1 = normalizer.transform(X)
        assert X_norm1 is not X
        X_norm1 = toarray(X_norm1)

        normalizer = Normalizer(norm="max", copy=False)
        X_norm2 = normalizer.transform(X)
        assert X_norm2 is X
        X_norm2 = toarray(X_norm2)

        for X_norm in (X_norm1, X_norm2):
            row_maxs = abs(X_norm).max(axis=1)
            for i in range(3):
                assert_almost_equal(row_maxs[i], 1.0)
            assert_almost_equal(row_maxs[3], 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix):
        X = init(X_dense)
        X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sparse.csr_matrix)

        X_norm = toarray(X_norm)
        for i in range(3):
            assert_almost_equal(row_maxs[i], 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)


def test_normalizer_max_sign():
    # check that we normalize by a positive number even for negative data
    rng = np.random.RandomState(0)
    X_dense = rng.randn(4, 5)
    # set the row number 3 to zero
    X_dense[3, :] = 0.0
    # check for mixed data where the value with
    # largest magnitude is negative
    X_dense[2, abs(X_dense[2, :]).argmax()] *= -1
    X_all_neg = -np.abs(X_dense)
    X_all_neg_sparse = sparse.csr_matrix(X_all_neg)

    for X in (X_dense, X_all_neg, X_all_neg_sparse):
        normalizer = Normalizer(norm="max")
        X_norm = normalizer.transform(X)
        assert X_norm is not X
        X_norm = toarray(X_norm)
        assert_array_equal(np.sign(X_norm), np.sign(toarray(X)))


def test_normalize():
    # Test normalize function
    # Only tests functionality not used by the tests for Normalizer.
    X = np.random.RandomState(37).randn(3, 2)
    assert_array_equal(normalize(X, copy=False), normalize(X.T, axis=0, copy=False).T)
    with pytest.raises(ValueError):
        normalize([[0]], axis=2)
    with pytest.raises(ValueError):
        normalize([[0]], norm="l3")

    rs = np.random.RandomState(0)
    X_dense = rs.randn(10, 5)
    X_sparse = sparse.csr_matrix(X_dense)
    ones = np.ones((10))
    for X in (X_dense, X_sparse):
        for dtype in (np.float32, np.float64):
            for norm in ("l1", "l2"):
                X = X.astype(dtype)
                X_norm = normalize(X, norm=norm)
                assert X_norm.dtype == dtype

                X_norm = toarray(X_norm)
                if norm == "l1":
                    row_sums = np.abs(X_norm).sum(axis=1)
                else:
                    X_norm_squared = X_norm**2
                    row_sums = X_norm_squared.sum(axis=1)

                assert_array_almost_equal(row_sums, ones)

    # Test return_norm
    X_dense = np.array([[3.0, 0, 4.0], [1.0, 0.0, 0.0], [2.0, 3.0, 0.0]])
    for norm in ("l1", "l2", "max"):
        _, norms = normalize(X_dense, norm=norm, return_norm=True)
        if norm == "l1":
            assert_array_almost_equal(norms, np.array([7.0, 1.0, 5.0]))
        elif norm == "l2":
            assert_array_almost_equal(norms, np.array([5.0, 1.0, 3.60555127]))
        else:
            assert_array_almost_equal(norms, np.array([4.0, 1.0, 3.0]))

    X_sparse = sparse.csr_matrix(X_dense)
    for norm in ("l1", "l2"):
        with pytest.raises(NotImplementedError):
            normalize(X_sparse, norm=norm, return_norm=True)
    _, norms = normalize(X_sparse, norm="max", return_norm=True)
    assert_array_almost_equal(norms, np.array([4.0, 1.0, 3.0]))


def test_binarizer():
    X_ = np.array([[1, 0, 5], [2, 3, -1]])

    for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix):

        X = init(X_.copy())

        binarizer = Binarizer(threshold=2.0, copy=True)
        X_bin = toarray(binarizer.transform(X))
        assert np.sum(X_bin == 0) == 4
        assert np.sum(X_bin == 1) == 2
        X_bin = binarizer.transform(X)
        assert sparse.issparse(X) == sparse.issparse(X_bin)

        binarizer = Binarizer(copy=True).fit(X)
        X_bin = toarray(binarizer.transform(X))
        assert X_bin is not X
        assert np.sum(X_bin == 0) == 2
        assert np.sum(X_bin == 1) == 4

        binarizer = Binarizer(copy=True)
        X_bin = binarizer.transform(X)
        assert X_bin is not X
        X_bin = toarray(X_bin)
        assert np.sum(X_bin == 0) == 2
        assert np.sum(X_bin == 1) == 4

        binarizer = Binarizer(copy=False)
        X_bin = binarizer.transform(X)
        if init is not list:
            assert X_bin is X

        binarizer = Binarizer(copy=False)
        X_float = np.array([[1, 0, 5], [2, 3, -1]], dtype=np.float64)
        X_bin = binarizer.transform(X_float)
        if init is not list:
            assert X_bin is X_float

        X_bin = toarray(X_bin)
        assert np.sum(X_bin == 0) == 2
        assert np.sum(X_bin == 1) == 4

    binarizer = Binarizer(threshold=-0.5, copy=True)
    for init in (np.array, list):
        X = init(X_.copy())

        X_bin = toarray(binarizer.transform(X))
        assert np.sum(X_bin == 0) == 1
        assert np.sum(X_bin == 1) == 5
        X_bin = binarizer.transform(X)

    # Cannot use threshold < 0 for sparse
    with pytest.raises(ValueError):
        binarizer.transform(sparse.csc_matrix(X))


def test_center_kernel():
    # Test that KernelCenterer is equivalent to StandardScaler
    # in feature space
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    scaler = StandardScaler(with_std=False)
    scaler.fit(X_fit)
    X_fit_centered = scaler.transform(X_fit)
    K_fit = np.dot(X_fit, X_fit.T)

    # center fit time matrix
    centerer = KernelCenterer()
    K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T)
    K_fit_centered2 = centerer.fit_transform(K_fit)
    assert_array_almost_equal(K_fit_centered, K_fit_centered2)

    # center predict time matrix
    X_pred = rng.random_sample((2, 4))
    K_pred = np.dot(X_pred, X_fit.T)
    X_pred_centered = scaler.transform(X_pred)
    K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T)
    K_pred_centered2 = centerer.transform(K_pred)
    assert_array_almost_equal(K_pred_centered, K_pred_centered2)

    # check the results coherence with the method proposed in:
    # B. Schölkopf, A. Smola, and K.R. Müller,
    # "Nonlinear component analysis as a kernel eigenvalue problem"
    # equation (B.3)

    # K_centered3 = (I - 1_M) K (I - 1_M)
    #             =  K - 1_M K - K 1_M + 1_M K 1_M
    ones_M = np.ones_like(K_fit) / K_fit.shape[0]
    K_fit_centered3 = K_fit - ones_M @ K_fit - K_fit @ ones_M + ones_M @ K_fit @ ones_M
    assert_allclose(K_fit_centered, K_fit_centered3)

    # K_test_centered3 = (K_test - 1'_M K)(I - 1_M)
    #                  = K_test - 1'_M K - K_test 1_M + 1'_M K 1_M
    ones_prime_M = np.ones_like(K_pred) / K_fit.shape[0]
    K_pred_centered3 = (
        K_pred - ones_prime_M @ K_fit - K_pred @ ones_M + ones_prime_M @ K_fit @ ones_M
    )
    assert_allclose(K_pred_centered, K_pred_centered3)


def test_kernelcenterer_non_linear_kernel():
    """Check kernel centering for non-linear kernel."""
    rng = np.random.RandomState(0)
    X, X_test = rng.randn(100, 50), rng.randn(20, 50)

    def phi(X):
        """Our mapping function phi."""
        return np.vstack(
            [
                np.clip(X, a_min=0, a_max=None),
                -np.clip(X, a_min=None, a_max=0),
            ]
        )

    phi_X = phi(X)
    phi_X_test = phi(X_test)

    # centered the projection
    scaler = StandardScaler(with_std=False)
    phi_X_center = scaler.fit_transform(phi_X)
    phi_X_test_center = scaler.transform(phi_X_test)

    # create the different kernel
    K = phi_X @ phi_X.T
    K_test = phi_X_test @ phi_X.T
    K_center = phi_X_center @ phi_X_center.T
    K_test_center = phi_X_test_center @ phi_X_center.T

    kernel_centerer = KernelCenterer()
    kernel_centerer.fit(K)

    assert_allclose(kernel_centerer.transform(K), K_center)
    assert_allclose(kernel_centerer.transform(K_test), K_test_center)

    # check the results coherence with the method proposed in:
    # B. Schölkopf, A. Smola, and K.R. Müller,
    # "Nonlinear component analysis as a kernel eigenvalue problem"
    # equation (B.3)

    # K_centered = (I - 1_M) K (I - 1_M)
    #            =  K - 1_M K - K 1_M + 1_M K 1_M
    ones_M = np.ones_like(K) / K.shape[0]
    K_centered = K - ones_M @ K - K @ ones_M + ones_M @ K @ ones_M
    assert_allclose(kernel_centerer.transform(K), K_centered)

    # K_test_centered = (K_test - 1'_M K)(I - 1_M)
    #                 = K_test - 1'_M K - K_test 1_M + 1'_M K 1_M
    ones_prime_M = np.ones_like(K_test) / K.shape[0]
    K_test_centered = (
        K_test - ones_prime_M @ K - K_test @ ones_M + ones_prime_M @ K @ ones_M
    )
    assert_allclose(kernel_centerer.transform(K_test), K_test_centered)


def test_cv_pipeline_precomputed():
    # Cross-validate a regression on four coplanar points with the same
    # value. Use precomputed kernel to ensure Pipeline with KernelCenterer
    # is treated as a pairwise operation.
    X = np.array([[3, 0, 0], [0, 3, 0], [0, 0, 3], [1, 1, 1]])
    y_true = np.ones((4,))
    K = X.dot(X.T)
    kcent = KernelCenterer()
    pipeline = Pipeline([("kernel_centerer", kcent), ("svr", SVR())])

    # did the pipeline set the pairwise attribute?
    assert pipeline._get_tags()["pairwise"]

    # test cross-validation, score should be almost perfect
    # NB: this test is pretty vacuous -- it's mainly to test integration
    #     of Pipeline and KernelCenterer
    y_pred = cross_val_predict(pipeline, K, y_true, cv=2)
    assert_array_almost_equal(y_true, y_pred)


def test_fit_transform():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    for obj in (StandardScaler(), Normalizer(), Binarizer()):
        X_transformed = obj.fit(X).transform(X)
        X_transformed2 = obj.fit_transform(X)
        assert_array_equal(X_transformed, X_transformed2)


def test_add_dummy_feature():
    X = [[1, 0], [0, 1], [0, 1]]
    X = add_dummy_feature(X)
    assert_array_equal(X, [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


def test_add_dummy_feature_coo():
    X = sparse.coo_matrix([[1, 0], [0, 1], [0, 1]])
    X = add_dummy_feature(X)
    assert sparse.isspmatrix_coo(X), X
    assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


def test_add_dummy_feature_csc():
    X = sparse.csc_matrix([[1, 0], [0, 1], [0, 1]])
    X = add_dummy_feature(X)
    assert sparse.isspmatrix_csc(X), X
    assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


def test_add_dummy_feature_csr():
    X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
    X = add_dummy_feature(X)
    assert sparse.isspmatrix_csr(X), X
    assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])


def test_fit_cold_start():
    X = iris.data
    X_2d = X[:, :2]

    # Scalers that have a partial_fit method
    scalers = [
        StandardScaler(with_mean=False, with_std=False),
        MinMaxScaler(),
        MaxAbsScaler(),
    ]

    for scaler in scalers:
        scaler.fit_transform(X)
        # with a different shape, this may break the scaler unless the internal
        # state is reset
        scaler.fit_transform(X_2d)


def test_quantile_transform_valid_axis():
    X = np.array([[0, 25, 50, 75, 100], [2, 4, 6, 8, 10], [2.6, 4.1, 2.3, 9.5, 0.1]])

    with pytest.raises(
        ValueError, match="axis should be either equal to 0 or 1. Got axis=2"
    ):
        quantile_transform(X.T, axis=2)


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
def test_power_transformer_notfitted(method):
    pt = PowerTransformer(method=method)
    X = np.abs(X_1col)
    with pytest.raises(NotFittedError):
        pt.transform(X)
    with pytest.raises(NotFittedError):
        pt.inverse_transform(X)


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
@pytest.mark.parametrize("standardize", [True, False])
@pytest.mark.parametrize("X", [X_1col, X_2d])
def test_power_transformer_inverse(method, standardize, X):
    # Make sure we get the original input when applying transform and then
    # inverse transform
    X = np.abs(X) if method == "box-cox" else X
    pt = PowerTransformer(method=method, standardize=standardize)
    X_trans = pt.fit_transform(X)
    assert_almost_equal(X, pt.inverse_transform(X_trans))


def test_power_transformer_1d():
    X = np.abs(X_1col)

    for standardize in [True, False]:
        pt = PowerTransformer(method="box-cox", standardize=standardize)

        X_trans = pt.fit_transform(X)
        X_trans_func = power_transform(X, method="box-cox", standardize=standardize)

        X_expected, lambda_expected = stats.boxcox(X.flatten())

        if standardize:
            X_expected = scale(X_expected)

        assert_almost_equal(X_expected.reshape(-1, 1), X_trans)
        assert_almost_equal(X_expected.reshape(-1, 1), X_trans_func)

        assert_almost_equal(X, pt.inverse_transform(X_trans))
        assert_almost_equal(lambda_expected, pt.lambdas_[0])

        assert len(pt.lambdas_) == X.shape[1]
        assert isinstance(pt.lambdas_, np.ndarray)


def test_power_transformer_2d():
    X = np.abs(X_2d)

    for standardize in [True, False]:
        pt = PowerTransformer(method="box-cox", standardize=standardize)

        X_trans_class = pt.fit_transform(X)
        X_trans_func = power_transform(X, method="box-cox", standardize=standardize)

        for X_trans in [X_trans_class, X_trans_func]:
            for j in range(X_trans.shape[1]):
                X_expected, lmbda = stats.boxcox(X[:, j].flatten())

                if standardize:
                    X_expected = scale(X_expected)

                assert_almost_equal(X_trans[:, j], X_expected)
                assert_almost_equal(lmbda, pt.lambdas_[j])

            # Test inverse transformation
            X_inv = pt.inverse_transform(X_trans)
            assert_array_almost_equal(X_inv, X)

        assert len(pt.lambdas_) == X.shape[1]
        assert isinstance(pt.lambdas_, np.ndarray)


def test_power_transformer_boxcox_strictly_positive_exception():
    # Exceptions should be raised for negative arrays and zero arrays when
    # method is boxcox

    pt = PowerTransformer(method="box-cox")
    pt.fit(np.abs(X_2d))
    X_with_negatives = X_2d
    not_positive_message = "strictly positive"

    with pytest.raises(ValueError, match=not_positive_message):
        pt.transform(X_with_negatives)

    with pytest.raises(ValueError, match=not_positive_message):
        pt.fit(X_with_negatives)

    with pytest.raises(ValueError, match=not_positive_message):
        power_transform(X_with_negatives, method="box-cox")

    with pytest.raises(ValueError, match=not_positive_message):
        pt.transform(np.zeros(X_2d.shape))

    with pytest.raises(ValueError, match=not_positive_message):
        pt.fit(np.zeros(X_2d.shape))

    with pytest.raises(ValueError, match=not_positive_message):
        power_transform(np.zeros(X_2d.shape), method="box-cox")


@pytest.mark.parametrize("X", [X_2d, np.abs(X_2d), -np.abs(X_2d), np.zeros(X_2d.shape)])
def test_power_transformer_yeojohnson_any_input(X):
    # Yeo-Johnson method should support any kind of input
    power_transform(X, method="yeo-johnson")


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
def test_power_transformer_shape_exception(method):
    pt = PowerTransformer(method=method)
    X = np.abs(X_2d)
    pt.fit(X)

    # Exceptions should be raised for arrays with different num_columns
    # than during fitting
    wrong_shape_message = (
        r"X has \d+ features, but PowerTransformer is " r"expecting \d+ features"
    )

    with pytest.raises(ValueError, match=wrong_shape_message):
        pt.transform(X[:, 0:1])

    with pytest.raises(ValueError, match=wrong_shape_message):
        pt.inverse_transform(X[:, 0:1])


def test_power_transformer_lambda_zero():
    pt = PowerTransformer(method="box-cox", standardize=False)
    X = np.abs(X_2d)[:, 0:1]

    # Test the lambda = 0 case
    pt.lambdas_ = np.array([0])
    X_trans = pt.transform(X)
    assert_array_almost_equal(pt.inverse_transform(X_trans), X)


def test_power_transformer_lambda_one():
    # Make sure lambda = 1 corresponds to the identity for yeo-johnson
    pt = PowerTransformer(method="yeo-johnson", standardize=False)
    X = np.abs(X_2d)[:, 0:1]

    pt.lambdas_ = np.array([1])
    X_trans = pt.transform(X)
    assert_array_almost_equal(X_trans, X)


@pytest.mark.parametrize(
    "method, lmbda",
    [
        ("box-cox", 0.1),
        ("box-cox", 0.5),
        ("yeo-johnson", 0.1),
        ("yeo-johnson", 0.5),
        ("yeo-johnson", 1.0),
    ],
)
def test_optimization_power_transformer(method, lmbda):
    # Test the optimization procedure:
    # - set a predefined value for lambda
    # - apply inverse_transform to a normal dist (we get X_inv)
    # - apply fit_transform to X_inv (we get X_inv_trans)
    # - check that X_inv_trans is roughly equal to X

    rng = np.random.RandomState(0)
    n_samples = 20000
    X = rng.normal(loc=0, scale=1, size=(n_samples, 1))

    pt = PowerTransformer(method=method, standardize=False)
    pt.lambdas_ = [lmbda]
    X_inv = pt.inverse_transform(X)

    pt = PowerTransformer(method=method, standardize=False)
    X_inv_trans = pt.fit_transform(X_inv)

    assert_almost_equal(0, np.linalg.norm(X - X_inv_trans) / n_samples, decimal=2)
    assert_almost_equal(0, X_inv_trans.mean(), decimal=1)
    assert_almost_equal(1, X_inv_trans.std(), decimal=1)


def test_yeo_johnson_darwin_example():
    # test from original paper "A new family of power transformations to
    # improve normality or symmetry" by Yeo and Johnson.
    X = [6.1, -8.4, 1.0, 2.0, 0.7, 2.9, 3.5, 5.1, 1.8, 3.6, 7.0, 3.0, 9.3, 7.5, -6.0]
    X = np.array(X).reshape(-1, 1)
    lmbda = PowerTransformer(method="yeo-johnson").fit(X).lambdas_
    assert np.allclose(lmbda, 1.305, atol=1e-3)


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
def test_power_transformer_nans(method):
    # Make sure lambda estimation is not influenced by NaN values
    # and that transform() supports NaN silently

    X = np.abs(X_1col)
    pt = PowerTransformer(method=method)
    pt.fit(X)
    lmbda_no_nans = pt.lambdas_[0]

    # concat nans at the end and check lambda stays the same
    X = np.concatenate([X, np.full_like(X, np.nan)])
    X = shuffle(X, random_state=0)

    pt.fit(X)
    lmbda_nans = pt.lambdas_[0]

    assert_almost_equal(lmbda_no_nans, lmbda_nans, decimal=5)

    X_trans = pt.transform(X)
    assert_array_equal(np.isnan(X_trans), np.isnan(X))


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
@pytest.mark.parametrize("standardize", [True, False])
def test_power_transformer_fit_transform(method, standardize):
    # check that fit_transform() and fit().transform() return the same values
    X = X_1col
    if method == "box-cox":
        X = np.abs(X)

    pt = PowerTransformer(method, standardize=standardize)
    assert_array_almost_equal(pt.fit(X).transform(X), pt.fit_transform(X))


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
@pytest.mark.parametrize("standardize", [True, False])
def test_power_transformer_copy_True(method, standardize):
    # Check that neither fit, transform, fit_transform nor inverse_transform
    # modify X inplace when copy=True
    X = X_1col
    if method == "box-cox":
        X = np.abs(X)

    X_original = X.copy()
    assert X is not X_original  # sanity checks
    assert_array_almost_equal(X, X_original)

    pt = PowerTransformer(method, standardize=standardize, copy=True)

    pt.fit(X)
    assert_array_almost_equal(X, X_original)
    X_trans = pt.transform(X)
    assert X_trans is not X

    X_trans = pt.fit_transform(X)
    assert_array_almost_equal(X, X_original)
    assert X_trans is not X

    X_inv_trans = pt.inverse_transform(X_trans)
    assert X_trans is not X_inv_trans


@pytest.mark.parametrize("method", ["box-cox", "yeo-johnson"])
@pytest.mark.parametrize("standardize", [True, False])
def test_power_transformer_copy_False(method, standardize):
    # check that when copy=False fit doesn't change X inplace but transform,
    # fit_transform and inverse_transform do.
    X = X_1col
    if method == "box-cox":
        X = np.abs(X)

    X_original = X.copy()
    assert X is not X_original  # sanity checks
    assert_array_almost_equal(X, X_original)

    pt = PowerTransformer(method, standardize=standardize, copy=False)

    pt.fit(X)
    assert_array_almost_equal(X, X_original)  # fit didn't change X

    X_trans = pt.transform(X)
    assert X_trans is X

    if method == "box-cox":
        X = np.abs(X)
    X_trans = pt.fit_transform(X)
    assert X_trans is X

    X_inv_trans = pt.inverse_transform(X_trans)
    assert X_trans is X_inv_trans


@pytest.mark.parametrize(
    "X_2",
    [
        sparse.random(10, 1, density=0.8, random_state=0),
        sparse.csr_matrix(np.full((10, 1), fill_value=np.nan)),
    ],
)
def test_standard_scaler_sparse_partial_fit_finite_variance(X_2):
    # non-regression test for:
    # https://github.com/scikit-learn/scikit-learn/issues/16448
    X_1 = sparse.random(5, 1, density=0.8)
    scaler = StandardScaler(with_mean=False)
    scaler.fit(X_1).partial_fit(X_2)
    assert np.isfinite(scaler.var_[0])


@pytest.mark.parametrize("feature_range", [(0, 1), (-10, 10)])
def test_minmax_scaler_clip(feature_range):
    # test behaviour of the parameter 'clip' in MinMaxScaler
    X = iris.data
    scaler = MinMaxScaler(feature_range=feature_range, clip=True).fit(X)
    X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)
    X_test = [np.r_[X_min[:2] - 10, X_max[2:] + 10]]
    X_transformed = scaler.transform(X_test)
    assert_allclose(
        X_transformed,
        [[feature_range[0], feature_range[0], feature_range[1], feature_range[1]]],
    )


def test_standard_scaler_raise_error_for_1d_input():
    """Check that `inverse_transform` from `StandardScaler` raises an error
    with 1D array.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19518
    """
    scaler = StandardScaler().fit(X_2d)
    err_msg = "Expected 2D array, got 1D array instead"
    with pytest.raises(ValueError, match=err_msg):
        scaler.inverse_transform(X_2d[:, 0])


def test_power_transformer_significantly_non_gaussian():
    """Check that significantly non-Gaussian data before transforms correctly.

    For some explored lambdas, the transformed data may be constant and will
    be rejected. Non-regression test for
    https://github.com/scikit-learn/scikit-learn/issues/14959
    """

    X_non_gaussian = 1e6 * np.array(
        [0.6, 2.0, 3.0, 4.0] * 4 + [11, 12, 12, 16, 17, 20, 85, 90], dtype=np.float64
    ).reshape(-1, 1)
    pt = PowerTransformer()

    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        X_trans = pt.fit_transform(X_non_gaussian)

    assert not np.any(np.isnan(X_trans))
    assert X_trans.mean() == pytest.approx(0.0)
    assert X_trans.std() == pytest.approx(1.0)
    assert X_trans.min() > -2
    assert X_trans.max() < 2


@pytest.mark.parametrize(
    "Transformer",
    [
        MinMaxScaler,
        MaxAbsScaler,
        RobustScaler,
        StandardScaler,
        QuantileTransformer,
        PowerTransformer,
    ],
)
def test_one_to_one_features(Transformer):
    """Check one-to-one transformers give correct feature names."""
    tr = Transformer().fit(iris.data)
    names_out = tr.get_feature_names_out(iris.feature_names)
    assert_array_equal(names_out, iris.feature_names)


@pytest.mark.parametrize(
    "Transformer",
    [
        MinMaxScaler,
        MaxAbsScaler,
        RobustScaler,
        StandardScaler,
        QuantileTransformer,
        PowerTransformer,
        Normalizer,
        Binarizer,
    ],
)
def test_one_to_one_features_pandas(Transformer):
    """Check one-to-one transformers give correct feature names."""
    pd = pytest.importorskip("pandas")

    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    tr = Transformer().fit(df)

    names_out_df_default = tr.get_feature_names_out()
    assert_array_equal(names_out_df_default, iris.feature_names)

    names_out_df_valid_in = tr.get_feature_names_out(iris.feature_names)
    assert_array_equal(names_out_df_valid_in, iris.feature_names)

    msg = re.escape("input_features is not equal to feature_names_in_")
    with pytest.raises(ValueError, match=msg):
        invalid_names = list("abcd")
        tr.get_feature_names_out(invalid_names)


def test_kernel_centerer_feature_names_out():
    """Test that kernel centerer `feature_names_out`."""

    rng = np.random.RandomState(0)
    X = rng.random_sample((6, 4))
    X_pairwise = linear_kernel(X)
    centerer = KernelCenterer().fit(X_pairwise)

    names_out = centerer.get_feature_names_out()
    samples_out2 = X_pairwise.shape[1]
    assert_array_equal(names_out, [f"kernelcenterer{i}" for i in range(samples_out2)])