3RNN/Lib/site-packages/sklearn/utils/tests/test_param_validation.py

786 lines
24 KiB
Python
Raw Normal View History

2024-05-26 19:49:15 +02:00
from numbers import Integral, Real
import numpy as np
import pytest
from scipy.sparse import csr_matrix
from sklearn._config import config_context, get_config
from sklearn.base import BaseEstimator, _fit_context
from sklearn.model_selection import LeaveOneOut
from sklearn.utils import deprecated
from sklearn.utils._param_validation import (
HasMethods,
Hidden,
Interval,
InvalidParameterError,
MissingValues,
Options,
RealNotInt,
StrOptions,
_ArrayLikes,
_Booleans,
_Callables,
_CVObjects,
_InstancesOf,
_IterablesNotString,
_NanConstraint,
_NoneConstraint,
_PandasNAConstraint,
_RandomStates,
_SparseMatrices,
_VerboseHelper,
generate_invalid_param_val,
generate_valid_param,
make_constraint,
validate_params,
)
from sklearn.utils.fixes import CSR_CONTAINERS
# Some helpers for the tests
@validate_params(
{"a": [Real], "b": [Real], "c": [Real], "d": [Real]},
prefer_skip_nested_validation=True,
)
def _func(a, b=0, *args, c, d=0, **kwargs):
"""A function to test the validation of functions."""
class _Class:
"""A class to test the _InstancesOf constraint and the validation of methods."""
@validate_params({"a": [Real]}, prefer_skip_nested_validation=True)
def _method(self, a):
"""A validated method"""
@deprecated()
@validate_params({"a": [Real]}, prefer_skip_nested_validation=True)
def _deprecated_method(self, a):
"""A deprecated validated method"""
class _Estimator(BaseEstimator):
"""An estimator to test the validation of estimator parameters."""
_parameter_constraints: dict = {"a": [Real]}
def __init__(self, a):
self.a = a
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X=None, y=None):
pass
@pytest.mark.parametrize("interval_type", [Integral, Real])
def test_interval_range(interval_type):
"""Check the range of values depending on closed."""
interval = Interval(interval_type, -2, 2, closed="left")
assert -2 in interval
assert 2 not in interval
interval = Interval(interval_type, -2, 2, closed="right")
assert -2 not in interval
assert 2 in interval
interval = Interval(interval_type, -2, 2, closed="both")
assert -2 in interval
assert 2 in interval
interval = Interval(interval_type, -2, 2, closed="neither")
assert -2 not in interval
assert 2 not in interval
@pytest.mark.parametrize("interval_type", [Integral, Real])
def test_interval_large_integers(interval_type):
"""Check that Interval constraint work with large integers.
non-regression test for #26648.
"""
interval = Interval(interval_type, 0, 2, closed="neither")
assert 2**65 not in interval
assert 2**128 not in interval
assert float(2**65) not in interval
assert float(2**128) not in interval
interval = Interval(interval_type, 0, 2**128, closed="neither")
assert 2**65 in interval
assert 2**128 not in interval
assert float(2**65) in interval
assert float(2**128) not in interval
assert 2**1024 not in interval
def test_interval_inf_in_bounds():
"""Check that inf is included iff a bound is closed and set to None.
Only valid for real intervals.
"""
interval = Interval(Real, 0, None, closed="right")
assert np.inf in interval
interval = Interval(Real, None, 0, closed="left")
assert -np.inf in interval
interval = Interval(Real, None, None, closed="neither")
assert np.inf not in interval
assert -np.inf not in interval
@pytest.mark.parametrize(
"interval",
[Interval(Real, 0, 1, closed="left"), Interval(Real, None, None, closed="both")],
)
def test_nan_not_in_interval(interval):
"""Check that np.nan is not in any interval."""
assert np.nan not in interval
@pytest.mark.parametrize(
"params, error, match",
[
(
{"type": Integral, "left": 1.0, "right": 2, "closed": "both"},
TypeError,
r"Expecting left to be an int for an interval over the integers",
),
(
{"type": Integral, "left": 1, "right": 2.0, "closed": "neither"},
TypeError,
"Expecting right to be an int for an interval over the integers",
),
(
{"type": Integral, "left": None, "right": 0, "closed": "left"},
ValueError,
r"left can't be None when closed == left",
),
(
{"type": Integral, "left": 0, "right": None, "closed": "right"},
ValueError,
r"right can't be None when closed == right",
),
(
{"type": Integral, "left": 1, "right": -1, "closed": "both"},
ValueError,
r"right can't be less than left",
),
],
)
def test_interval_errors(params, error, match):
"""Check that informative errors are raised for invalid combination of parameters"""
with pytest.raises(error, match=match):
Interval(**params)
def test_stroptions():
"""Sanity check for the StrOptions constraint"""
options = StrOptions({"a", "b", "c"}, deprecated={"c"})
assert options.is_satisfied_by("a")
assert options.is_satisfied_by("c")
assert not options.is_satisfied_by("d")
assert "'c' (deprecated)" in str(options)
def test_options():
"""Sanity check for the Options constraint"""
options = Options(Real, {-0.5, 0.5, np.inf}, deprecated={-0.5})
assert options.is_satisfied_by(-0.5)
assert options.is_satisfied_by(np.inf)
assert not options.is_satisfied_by(1.23)
assert "-0.5 (deprecated)" in str(options)
@pytest.mark.parametrize(
"type, expected_type_name",
[
(int, "int"),
(Integral, "int"),
(Real, "float"),
(np.ndarray, "numpy.ndarray"),
],
)
def test_instances_of_type_human_readable(type, expected_type_name):
"""Check the string representation of the _InstancesOf constraint."""
constraint = _InstancesOf(type)
assert str(constraint) == f"an instance of '{expected_type_name}'"
def test_hasmethods():
"""Check the HasMethods constraint."""
constraint = HasMethods(["a", "b"])
class _Good:
def a(self):
pass # pragma: no cover
def b(self):
pass # pragma: no cover
class _Bad:
def a(self):
pass # pragma: no cover
assert constraint.is_satisfied_by(_Good())
assert not constraint.is_satisfied_by(_Bad())
assert str(constraint) == "an object implementing 'a' and 'b'"
@pytest.mark.parametrize(
"constraint",
[
Interval(Real, None, 0, closed="left"),
Interval(Real, 0, None, closed="left"),
Interval(Real, None, None, closed="neither"),
StrOptions({"a", "b", "c"}),
MissingValues(),
MissingValues(numeric_only=True),
_VerboseHelper(),
HasMethods("fit"),
_IterablesNotString(),
_CVObjects(),
],
)
def test_generate_invalid_param_val(constraint):
"""Check that the value generated does not satisfy the constraint"""
bad_value = generate_invalid_param_val(constraint)
assert not constraint.is_satisfied_by(bad_value)
@pytest.mark.parametrize(
"integer_interval, real_interval",
[
(
Interval(Integral, None, 3, closed="right"),
Interval(RealNotInt, -5, 5, closed="both"),
),
(
Interval(Integral, None, 3, closed="right"),
Interval(RealNotInt, -5, 5, closed="neither"),
),
(
Interval(Integral, None, 3, closed="right"),
Interval(RealNotInt, 4, 5, closed="both"),
),
(
Interval(Integral, None, 3, closed="right"),
Interval(RealNotInt, 5, None, closed="left"),
),
(
Interval(Integral, None, 3, closed="right"),
Interval(RealNotInt, 4, None, closed="neither"),
),
(
Interval(Integral, 3, None, closed="left"),
Interval(RealNotInt, -5, 5, closed="both"),
),
(
Interval(Integral, 3, None, closed="left"),
Interval(RealNotInt, -5, 5, closed="neither"),
),
(
Interval(Integral, 3, None, closed="left"),
Interval(RealNotInt, 1, 2, closed="both"),
),
(
Interval(Integral, 3, None, closed="left"),
Interval(RealNotInt, None, -5, closed="left"),
),
(
Interval(Integral, 3, None, closed="left"),
Interval(RealNotInt, None, -4, closed="neither"),
),
(
Interval(Integral, -5, 5, closed="both"),
Interval(RealNotInt, None, 1, closed="right"),
),
(
Interval(Integral, -5, 5, closed="both"),
Interval(RealNotInt, 1, None, closed="left"),
),
(
Interval(Integral, -5, 5, closed="both"),
Interval(RealNotInt, -10, -4, closed="neither"),
),
(
Interval(Integral, -5, 5, closed="both"),
Interval(RealNotInt, -10, -4, closed="right"),
),
(
Interval(Integral, -5, 5, closed="neither"),
Interval(RealNotInt, 6, 10, closed="neither"),
),
(
Interval(Integral, -5, 5, closed="neither"),
Interval(RealNotInt, 6, 10, closed="left"),
),
(
Interval(Integral, 2, None, closed="left"),
Interval(RealNotInt, 0, 1, closed="both"),
),
(
Interval(Integral, 1, None, closed="left"),
Interval(RealNotInt, 0, 1, closed="both"),
),
],
)
def test_generate_invalid_param_val_2_intervals(integer_interval, real_interval):
"""Check that the value generated for an interval constraint does not satisfy any of
the interval constraints.
"""
bad_value = generate_invalid_param_val(constraint=real_interval)
assert not real_interval.is_satisfied_by(bad_value)
assert not integer_interval.is_satisfied_by(bad_value)
bad_value = generate_invalid_param_val(constraint=integer_interval)
assert not real_interval.is_satisfied_by(bad_value)
assert not integer_interval.is_satisfied_by(bad_value)
@pytest.mark.parametrize(
"constraint",
[
_ArrayLikes(),
_InstancesOf(list),
_Callables(),
_NoneConstraint(),
_RandomStates(),
_SparseMatrices(),
_Booleans(),
Interval(Integral, None, None, closed="neither"),
],
)
def test_generate_invalid_param_val_all_valid(constraint):
"""Check that the function raises NotImplementedError when there's no invalid value
for the constraint.
"""
with pytest.raises(NotImplementedError):
generate_invalid_param_val(constraint)
@pytest.mark.parametrize(
"constraint",
[
_ArrayLikes(),
_Callables(),
_InstancesOf(list),
_NoneConstraint(),
_RandomStates(),
_SparseMatrices(),
_Booleans(),
_VerboseHelper(),
MissingValues(),
MissingValues(numeric_only=True),
StrOptions({"a", "b", "c"}),
Options(Integral, {1, 2, 3}),
Interval(Integral, None, None, closed="neither"),
Interval(Integral, 0, 10, closed="neither"),
Interval(Integral, 0, None, closed="neither"),
Interval(Integral, None, 0, closed="neither"),
Interval(Real, 0, 1, closed="neither"),
Interval(Real, 0, None, closed="both"),
Interval(Real, None, 0, closed="right"),
HasMethods("fit"),
_IterablesNotString(),
_CVObjects(),
],
)
def test_generate_valid_param(constraint):
"""Check that the value generated does satisfy the constraint."""
value = generate_valid_param(constraint)
assert constraint.is_satisfied_by(value)
@pytest.mark.parametrize(
"constraint_declaration, value",
[
(Interval(Real, 0, 1, closed="both"), 0.42),
(Interval(Integral, 0, None, closed="neither"), 42),
(StrOptions({"a", "b", "c"}), "b"),
(Options(type, {np.float32, np.float64}), np.float64),
(callable, lambda x: x + 1),
(None, None),
("array-like", [[1, 2], [3, 4]]),
("array-like", np.array([[1, 2], [3, 4]])),
("sparse matrix", csr_matrix([[1, 2], [3, 4]])),
*[
("sparse matrix", container([[1, 2], [3, 4]]))
for container in CSR_CONTAINERS
],
("random_state", 0),
("random_state", np.random.RandomState(0)),
("random_state", None),
(_Class, _Class()),
(int, 1),
(Real, 0.5),
("boolean", False),
("verbose", 1),
("nan", np.nan),
(MissingValues(), -1),
(MissingValues(), -1.0),
(MissingValues(), 2**1028),
(MissingValues(), None),
(MissingValues(), float("nan")),
(MissingValues(), np.nan),
(MissingValues(), "missing"),
(HasMethods("fit"), _Estimator(a=0)),
("cv_object", 5),
],
)
def test_is_satisfied_by(constraint_declaration, value):
"""Sanity check for the is_satisfied_by method"""
constraint = make_constraint(constraint_declaration)
assert constraint.is_satisfied_by(value)
@pytest.mark.parametrize(
"constraint_declaration, expected_constraint_class",
[
(Interval(Real, 0, 1, closed="both"), Interval),
(StrOptions({"option1", "option2"}), StrOptions),
(Options(Real, {0.42, 1.23}), Options),
("array-like", _ArrayLikes),
("sparse matrix", _SparseMatrices),
("random_state", _RandomStates),
(None, _NoneConstraint),
(callable, _Callables),
(int, _InstancesOf),
("boolean", _Booleans),
("verbose", _VerboseHelper),
(MissingValues(numeric_only=True), MissingValues),
(HasMethods("fit"), HasMethods),
("cv_object", _CVObjects),
("nan", _NanConstraint),
],
)
def test_make_constraint(constraint_declaration, expected_constraint_class):
"""Check that make_constraint dispatches to the appropriate constraint class"""
constraint = make_constraint(constraint_declaration)
assert constraint.__class__ is expected_constraint_class
def test_make_constraint_unknown():
"""Check that an informative error is raised when an unknown constraint is passed"""
with pytest.raises(ValueError, match="Unknown constraint"):
make_constraint("not a valid constraint")
def test_validate_params():
"""Check that validate_params works no matter how the arguments are passed"""
with pytest.raises(
InvalidParameterError, match="The 'a' parameter of _func must be"
):
_func("wrong", c=1)
with pytest.raises(
InvalidParameterError, match="The 'b' parameter of _func must be"
):
_func(*[1, "wrong"], c=1)
with pytest.raises(
InvalidParameterError, match="The 'c' parameter of _func must be"
):
_func(1, **{"c": "wrong"})
with pytest.raises(
InvalidParameterError, match="The 'd' parameter of _func must be"
):
_func(1, c=1, d="wrong")
# check in the presence of extra positional and keyword args
with pytest.raises(
InvalidParameterError, match="The 'b' parameter of _func must be"
):
_func(0, *["wrong", 2, 3], c=4, **{"e": 5})
with pytest.raises(
InvalidParameterError, match="The 'c' parameter of _func must be"
):
_func(0, *[1, 2, 3], c="four", **{"e": 5})
def test_validate_params_missing_params():
"""Check that no error is raised when there are parameters without
constraints
"""
@validate_params({"a": [int]}, prefer_skip_nested_validation=True)
def func(a, b):
pass
func(1, 2)
def test_decorate_validated_function():
"""Check that validate_params functions can be decorated"""
decorated_function = deprecated()(_func)
with pytest.warns(FutureWarning, match="Function _func is deprecated"):
decorated_function(1, 2, c=3)
# outer decorator does not interfere with validation
with pytest.warns(FutureWarning, match="Function _func is deprecated"):
with pytest.raises(
InvalidParameterError, match=r"The 'c' parameter of _func must be"
):
decorated_function(1, 2, c="wrong")
def test_validate_params_method():
"""Check that validate_params works with methods"""
with pytest.raises(
InvalidParameterError, match="The 'a' parameter of _Class._method must be"
):
_Class()._method("wrong")
# validated method can be decorated
with pytest.warns(FutureWarning, match="Function _deprecated_method is deprecated"):
with pytest.raises(
InvalidParameterError,
match="The 'a' parameter of _Class._deprecated_method must be",
):
_Class()._deprecated_method("wrong")
def test_validate_params_estimator():
"""Check that validate_params works with Estimator instances"""
# no validation in init
est = _Estimator("wrong")
with pytest.raises(
InvalidParameterError, match="The 'a' parameter of _Estimator must be"
):
est.fit()
def test_stroptions_deprecated_subset():
"""Check that the deprecated parameter must be a subset of options."""
with pytest.raises(ValueError, match="deprecated options must be a subset"):
StrOptions({"a", "b", "c"}, deprecated={"a", "d"})
def test_hidden_constraint():
"""Check that internal constraints are not exposed in the error message."""
@validate_params(
{"param": [Hidden(list), dict]}, prefer_skip_nested_validation=True
)
def f(param):
pass
# list and dict are valid params
f({"a": 1, "b": 2, "c": 3})
f([1, 2, 3])
with pytest.raises(
InvalidParameterError, match="The 'param' parameter"
) as exc_info:
f(param="bad")
# the list option is not exposed in the error message
err_msg = str(exc_info.value)
assert "an instance of 'dict'" in err_msg
assert "an instance of 'list'" not in err_msg
def test_hidden_stroptions():
"""Check that we can have 2 StrOptions constraints, one being hidden."""
@validate_params(
{"param": [StrOptions({"auto"}), Hidden(StrOptions({"warn"}))]},
prefer_skip_nested_validation=True,
)
def f(param):
pass
# "auto" and "warn" are valid params
f("auto")
f("warn")
with pytest.raises(
InvalidParameterError, match="The 'param' parameter"
) as exc_info:
f(param="bad")
# the "warn" option is not exposed in the error message
err_msg = str(exc_info.value)
assert "auto" in err_msg
assert "warn" not in err_msg
def test_validate_params_set_param_constraints_attribute():
"""Check that the validate_params decorator properly sets the parameter constraints
as attribute of the decorated function/method.
"""
assert hasattr(_func, "_skl_parameter_constraints")
assert hasattr(_Class()._method, "_skl_parameter_constraints")
def test_boolean_constraint_deprecated_int():
"""Check that validate_params raise a deprecation message but still passes
validation when using an int for a parameter accepting a boolean.
"""
@validate_params({"param": ["boolean"]}, prefer_skip_nested_validation=True)
def f(param):
pass
# True/False and np.bool_(True/False) are valid params
f(True)
f(np.bool_(False))
def test_no_validation():
"""Check that validation can be skipped for a parameter."""
@validate_params(
{"param1": [int, None], "param2": "no_validation"},
prefer_skip_nested_validation=True,
)
def f(param1=None, param2=None):
pass
# param1 is validated
with pytest.raises(InvalidParameterError, match="The 'param1' parameter"):
f(param1="wrong")
# param2 is not validated: any type is valid.
class SomeType:
pass
f(param2=SomeType)
f(param2=SomeType())
def test_pandas_na_constraint_with_pd_na():
"""Add a specific test for checking support for `pandas.NA`."""
pd = pytest.importorskip("pandas")
na_constraint = _PandasNAConstraint()
assert na_constraint.is_satisfied_by(pd.NA)
assert not na_constraint.is_satisfied_by(np.array([1, 2, 3]))
def test_iterable_not_string():
"""Check that a string does not satisfy the _IterableNotString constraint."""
constraint = _IterablesNotString()
assert constraint.is_satisfied_by([1, 2, 3])
assert constraint.is_satisfied_by(range(10))
assert not constraint.is_satisfied_by("some string")
def test_cv_objects():
"""Check that the _CVObjects constraint accepts all current ways
to pass cv objects."""
constraint = _CVObjects()
assert constraint.is_satisfied_by(5)
assert constraint.is_satisfied_by(LeaveOneOut())
assert constraint.is_satisfied_by([([1, 2], [3, 4]), ([3, 4], [1, 2])])
assert constraint.is_satisfied_by(None)
assert not constraint.is_satisfied_by("not a CV object")
def test_third_party_estimator():
"""Check that the validation from a scikit-learn estimator inherited by a third
party estimator does not impose a match between the dict of constraints and the
parameters of the estimator.
"""
class ThirdPartyEstimator(_Estimator):
def __init__(self, b):
self.b = b
super().__init__(a=0)
def fit(self, X=None, y=None):
super().fit(X, y)
# does not raise, even though "b" is not in the constraints dict and "a" is not
# a parameter of the estimator.
ThirdPartyEstimator(b=0).fit()
def test_interval_real_not_int():
"""Check for the type RealNotInt in the Interval constraint."""
constraint = Interval(RealNotInt, 0, 1, closed="both")
assert constraint.is_satisfied_by(1.0)
assert not constraint.is_satisfied_by(1)
def test_real_not_int():
"""Check for the RealNotInt type."""
assert isinstance(1.0, RealNotInt)
assert not isinstance(1, RealNotInt)
assert isinstance(np.float64(1), RealNotInt)
assert not isinstance(np.int64(1), RealNotInt)
def test_skip_param_validation():
"""Check that param validation can be skipped using config_context."""
@validate_params({"a": [int]}, prefer_skip_nested_validation=True)
def f(a):
pass
with pytest.raises(InvalidParameterError, match="The 'a' parameter"):
f(a="1")
# does not raise
with config_context(skip_parameter_validation=True):
f(a="1")
@pytest.mark.parametrize("prefer_skip_nested_validation", [True, False])
def test_skip_nested_validation(prefer_skip_nested_validation):
"""Check that nested validation can be skipped."""
@validate_params({"a": [int]}, prefer_skip_nested_validation=True)
def f(a):
pass
@validate_params(
{"b": [int]},
prefer_skip_nested_validation=prefer_skip_nested_validation,
)
def g(b):
# calls f with a bad parameter type
return f(a="invalid_param_value")
# Validation for g is never skipped.
with pytest.raises(InvalidParameterError, match="The 'b' parameter"):
g(b="invalid_param_value")
if prefer_skip_nested_validation:
g(b=1) # does not raise because inner f is not validated
else:
with pytest.raises(InvalidParameterError, match="The 'a' parameter"):
g(b=1)
@pytest.mark.parametrize(
"skip_parameter_validation, prefer_skip_nested_validation, expected_skipped",
[
(True, True, True),
(True, False, True),
(False, True, True),
(False, False, False),
],
)
def test_skip_nested_validation_and_config_context(
skip_parameter_validation, prefer_skip_nested_validation, expected_skipped
):
"""Check interaction between global skip and local skip."""
@validate_params(
{"a": [int]}, prefer_skip_nested_validation=prefer_skip_nested_validation
)
def g(a):
return get_config()["skip_parameter_validation"]
with config_context(skip_parameter_validation=skip_parameter_validation):
actual_skipped = g(1)
assert actual_skipped == expected_skipped