314 lines
11 KiB
Python
314 lines
11 KiB
Python
![]() |
"""
|
||
|
Testing for the gradient boosting loss functions and initial estimators.
|
||
|
"""
|
||
|
from itertools import product
|
||
|
import numpy as np
|
||
|
from numpy.testing import assert_allclose
|
||
|
import pytest
|
||
|
from pytest import approx
|
||
|
|
||
|
from sklearn.utils import check_random_state
|
||
|
from sklearn.ensemble._gb_losses import RegressionLossFunction
|
||
|
from sklearn.ensemble._gb_losses import LeastSquaresError
|
||
|
from sklearn.ensemble._gb_losses import LeastAbsoluteError
|
||
|
from sklearn.ensemble._gb_losses import HuberLossFunction
|
||
|
from sklearn.ensemble._gb_losses import QuantileLossFunction
|
||
|
from sklearn.ensemble._gb_losses import BinomialDeviance
|
||
|
from sklearn.ensemble._gb_losses import MultinomialDeviance
|
||
|
from sklearn.ensemble._gb_losses import ExponentialLoss
|
||
|
from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS
|
||
|
|
||
|
|
||
|
def test_binomial_deviance():
|
||
|
# Check binomial deviance loss.
|
||
|
# Check against alternative definitions in ESLII.
|
||
|
bd = BinomialDeviance(2)
|
||
|
|
||
|
# pred has the same BD for y in {0, 1}
|
||
|
assert (bd(np.array([0.]), np.array([0.])) ==
|
||
|
bd(np.array([1.]), np.array([0.])))
|
||
|
|
||
|
assert bd(np.array([1., 1, 1]), np.array([100., 100, 100])) == approx(0)
|
||
|
assert bd(np.array([1., 0, 0]), np.array([100., -100, -100])) == approx(0)
|
||
|
|
||
|
# check if same results as alternative definition of deviance, from ESLII
|
||
|
# Eq. (10.18): -loglike = log(1 + exp(-2*z*f))
|
||
|
# Note:
|
||
|
# - We use y = {0, 1}, ESL (10.18) uses z in {-1, 1}, hence y=2*y-1
|
||
|
# - ESL 2*f = pred_raw, hence the factor 2 of ESL disappears.
|
||
|
# - Deviance = -2*loglike + .., hence a factor of 2 in front.
|
||
|
def alt_dev(y, raw_pred):
|
||
|
z = 2 * y - 1
|
||
|
return 2 * np.mean(np.log(1 + np.exp(-z * raw_pred)))
|
||
|
|
||
|
test_data = product(
|
||
|
(np.array([0., 0, 0]), np.array([1., 1, 1])),
|
||
|
(np.array([-5., -5, -5]), np.array([3., 3, 3])))
|
||
|
|
||
|
for datum in test_data:
|
||
|
assert bd(*datum) == approx(alt_dev(*datum))
|
||
|
|
||
|
# check the negative gradient against altenative formula from ESLII
|
||
|
# Note: negative_gradient is half the negative gradient.
|
||
|
def alt_ng(y, raw_pred):
|
||
|
z = 2 * y - 1
|
||
|
return z / (1 + np.exp(z * raw_pred))
|
||
|
|
||
|
for datum in test_data:
|
||
|
assert bd.negative_gradient(*datum) == approx(alt_ng(*datum))
|
||
|
|
||
|
|
||
|
def test_sample_weight_smoke():
|
||
|
rng = check_random_state(13)
|
||
|
y = rng.rand(100)
|
||
|
pred = rng.rand(100)
|
||
|
|
||
|
# least squares
|
||
|
loss = LeastSquaresError()
|
||
|
loss_wo_sw = loss(y, pred)
|
||
|
loss_w_sw = loss(y, pred, np.ones(pred.shape[0], dtype=np.float32))
|
||
|
assert loss_wo_sw == approx(loss_w_sw)
|
||
|
|
||
|
|
||
|
def test_sample_weight_init_estimators():
|
||
|
# Smoke test for init estimators with sample weights.
|
||
|
rng = check_random_state(13)
|
||
|
X = rng.rand(100, 2)
|
||
|
sample_weight = np.ones(100)
|
||
|
reg_y = rng.rand(100)
|
||
|
|
||
|
clf_y = rng.randint(0, 2, size=100)
|
||
|
|
||
|
for Loss in LOSS_FUNCTIONS.values():
|
||
|
if Loss is None:
|
||
|
continue
|
||
|
if issubclass(Loss, RegressionLossFunction):
|
||
|
y = reg_y
|
||
|
loss = Loss()
|
||
|
else:
|
||
|
k = 2
|
||
|
y = clf_y
|
||
|
if Loss.is_multi_class:
|
||
|
# skip multiclass
|
||
|
continue
|
||
|
loss = Loss(k)
|
||
|
|
||
|
init_est = loss.init_estimator()
|
||
|
init_est.fit(X, y)
|
||
|
out = loss.get_init_raw_predictions(X, init_est)
|
||
|
assert out.shape == (y.shape[0], 1)
|
||
|
|
||
|
sw_init_est = loss.init_estimator()
|
||
|
sw_init_est.fit(X, y, sample_weight=sample_weight)
|
||
|
sw_out = loss.get_init_raw_predictions(X, sw_init_est)
|
||
|
assert sw_out.shape == (y.shape[0], 1)
|
||
|
|
||
|
# check if predictions match
|
||
|
assert_allclose(out, sw_out, rtol=1e-2)
|
||
|
|
||
|
|
||
|
def test_quantile_loss_function():
|
||
|
# Non regression test for the QuantileLossFunction object
|
||
|
# There was a sign problem when evaluating the function
|
||
|
# for negative values of 'ytrue - ypred'
|
||
|
x = np.asarray([-1.0, 0.0, 1.0])
|
||
|
y_found = QuantileLossFunction(0.9)(x, np.zeros_like(x))
|
||
|
y_expected = np.asarray([0.1, 0.0, 0.9]).mean()
|
||
|
np.testing.assert_allclose(y_found, y_expected)
|
||
|
|
||
|
|
||
|
def test_sample_weight_deviance():
|
||
|
# Test if deviance supports sample weights.
|
||
|
rng = check_random_state(13)
|
||
|
sample_weight = np.ones(100)
|
||
|
reg_y = rng.rand(100)
|
||
|
clf_y = rng.randint(0, 2, size=100)
|
||
|
mclf_y = rng.randint(0, 3, size=100)
|
||
|
|
||
|
for Loss in LOSS_FUNCTIONS.values():
|
||
|
if Loss is None:
|
||
|
continue
|
||
|
if issubclass(Loss, RegressionLossFunction):
|
||
|
y = reg_y
|
||
|
p = reg_y
|
||
|
loss = Loss()
|
||
|
else:
|
||
|
k = 2
|
||
|
y = clf_y
|
||
|
p = clf_y
|
||
|
if Loss.is_multi_class:
|
||
|
k = 3
|
||
|
y = mclf_y
|
||
|
# one-hot encoding
|
||
|
p = np.zeros((y.shape[0], k), dtype=np.float64)
|
||
|
for i in range(k):
|
||
|
p[:, i] = y == i
|
||
|
loss = Loss(k)
|
||
|
|
||
|
deviance_w_w = loss(y, p, sample_weight)
|
||
|
deviance_wo_w = loss(y, p)
|
||
|
assert deviance_wo_w == deviance_w_w
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'n_classes, n_samples', [(3, 100), (5, 57), (7, 13)]
|
||
|
)
|
||
|
def test_multinomial_deviance(n_classes, n_samples):
|
||
|
# Check multinomial deviance with and without sample weights.
|
||
|
rng = np.random.RandomState(13)
|
||
|
sample_weight = np.ones(n_samples)
|
||
|
y_true = rng.randint(0, n_classes, size=n_samples)
|
||
|
y_pred = np.zeros((n_samples, n_classes), dtype=np.float64)
|
||
|
for klass in range(y_pred.shape[1]):
|
||
|
y_pred[:, klass] = y_true == klass
|
||
|
|
||
|
loss = MultinomialDeviance(n_classes)
|
||
|
loss_wo_sw = loss(y_true, y_pred)
|
||
|
assert loss_wo_sw > 0
|
||
|
loss_w_sw = loss(y_true, y_pred, sample_weight=sample_weight)
|
||
|
assert loss_wo_sw == approx(loss_w_sw)
|
||
|
|
||
|
# Multinomial deviance uses weighted average loss rather than
|
||
|
# weighted sum loss, so we make sure that the value remains the same
|
||
|
# when we device the weight by 2.
|
||
|
loss_w_sw = loss(y_true, y_pred, sample_weight=0.5 * sample_weight)
|
||
|
assert loss_wo_sw == approx(loss_w_sw)
|
||
|
|
||
|
|
||
|
def test_mdl_computation_weighted():
|
||
|
raw_predictions = np.array([[1., -1., -.1], [-2., 1., 2.]])
|
||
|
y_true = np.array([0, 1])
|
||
|
weights = np.array([1, 3])
|
||
|
expected_loss = 1.0909323
|
||
|
# MultinomialDeviance loss computation with weights.
|
||
|
loss = MultinomialDeviance(3)
|
||
|
assert loss(y_true, raw_predictions, weights) == approx(expected_loss)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('n', [0, 1, 2])
|
||
|
def test_mdl_exception(n):
|
||
|
# Check that MultinomialDeviance throws an exception when n_classes <= 2
|
||
|
err_msg = 'MultinomialDeviance requires more than 2 classes.'
|
||
|
with pytest.raises(ValueError, match=err_msg):
|
||
|
MultinomialDeviance(n)
|
||
|
|
||
|
|
||
|
def test_init_raw_predictions_shapes():
|
||
|
# Make sure get_init_raw_predictions returns float64 arrays with shape
|
||
|
# (n_samples, K) where K is 1 for binary classification and regression, and
|
||
|
# K = n_classes for multiclass classification
|
||
|
rng = np.random.RandomState(0)
|
||
|
|
||
|
n_samples = 100
|
||
|
X = rng.normal(size=(n_samples, 5))
|
||
|
y = rng.normal(size=n_samples)
|
||
|
for loss in (LeastSquaresError(),
|
||
|
LeastAbsoluteError(),
|
||
|
QuantileLossFunction(),
|
||
|
HuberLossFunction()):
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
assert raw_predictions.shape == (n_samples, 1)
|
||
|
assert raw_predictions.dtype == np.float64
|
||
|
|
||
|
y = rng.randint(0, 2, size=n_samples)
|
||
|
for loss in (BinomialDeviance(n_classes=2),
|
||
|
ExponentialLoss(n_classes=2)):
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
assert raw_predictions.shape == (n_samples, 1)
|
||
|
assert raw_predictions.dtype == np.float64
|
||
|
|
||
|
for n_classes in range(3, 5):
|
||
|
y = rng.randint(0, n_classes, size=n_samples)
|
||
|
loss = MultinomialDeviance(n_classes=n_classes)
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
assert raw_predictions.shape == (n_samples, n_classes)
|
||
|
assert raw_predictions.dtype == np.float64
|
||
|
|
||
|
|
||
|
def test_init_raw_predictions_values():
|
||
|
# Make sure the get_init_raw_predictions() returns the expected values for
|
||
|
# each loss.
|
||
|
rng = np.random.RandomState(0)
|
||
|
|
||
|
n_samples = 100
|
||
|
X = rng.normal(size=(n_samples, 5))
|
||
|
y = rng.normal(size=n_samples)
|
||
|
|
||
|
# Least squares loss
|
||
|
loss = LeastSquaresError()
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
# Make sure baseline prediction is the mean of all targets
|
||
|
assert_allclose(raw_predictions, y.mean())
|
||
|
|
||
|
# Least absolute and huber loss
|
||
|
for Loss in (LeastAbsoluteError, HuberLossFunction):
|
||
|
loss = Loss()
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
# Make sure baseline prediction is the median of all targets
|
||
|
assert_allclose(raw_predictions, np.median(y))
|
||
|
|
||
|
# Quantile loss
|
||
|
for alpha in (.1, .5, .9):
|
||
|
loss = QuantileLossFunction(alpha=alpha)
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
# Make sure baseline prediction is the alpha-quantile of all targets
|
||
|
assert_allclose(raw_predictions, np.percentile(y, alpha * 100))
|
||
|
|
||
|
y = rng.randint(0, 2, size=n_samples)
|
||
|
|
||
|
# Binomial deviance
|
||
|
loss = BinomialDeviance(n_classes=2)
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
# Make sure baseline prediction is equal to link_function(p), where p
|
||
|
# is the proba of the positive class. We want predict_proba() to return p,
|
||
|
# and by definition
|
||
|
# p = inverse_link_function(raw_prediction) = sigmoid(raw_prediction)
|
||
|
# So we want raw_prediction = link_function(p) = log(p / (1 - p))
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
p = y.mean()
|
||
|
assert_allclose(raw_predictions, np.log(p / (1 - p)))
|
||
|
|
||
|
# Exponential loss
|
||
|
loss = ExponentialLoss(n_classes=2)
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
p = y.mean()
|
||
|
assert_allclose(raw_predictions, .5 * np.log(p / (1 - p)))
|
||
|
|
||
|
# Multinomial deviance loss
|
||
|
for n_classes in range(3, 5):
|
||
|
y = rng.randint(0, n_classes, size=n_samples)
|
||
|
loss = MultinomialDeviance(n_classes=n_classes)
|
||
|
init_estimator = loss.init_estimator().fit(X, y)
|
||
|
raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
|
||
|
for k in range(n_classes):
|
||
|
p = (y == k).mean()
|
||
|
assert_allclose(raw_predictions[:, k], np.log(p))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('seed', range(5))
|
||
|
def test_lad_equals_quantile_50(seed):
|
||
|
# Make sure quantile loss with alpha = .5 is equivalent to LAD
|
||
|
lad = LeastAbsoluteError()
|
||
|
ql = QuantileLossFunction(alpha=0.5)
|
||
|
|
||
|
n_samples = 50
|
||
|
rng = np.random.RandomState(seed)
|
||
|
raw_predictions = rng.normal(size=(n_samples))
|
||
|
y_true = rng.normal(size=(n_samples))
|
||
|
|
||
|
lad_loss = lad(y_true, raw_predictions)
|
||
|
ql_loss = ql(y_true, raw_predictions)
|
||
|
assert lad_loss == approx(2 * ql_loss)
|
||
|
|
||
|
weights = np.linspace(0, 1, n_samples) ** 2
|
||
|
lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights)
|
||
|
ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights)
|
||
|
assert lad_weighted_loss == approx(2 * ql_weighted_loss)
|