1338 lines
45 KiB
Python
1338 lines
45 KiB
Python
|
"""
|
||
|
Testing for the gradient boosting module (sklearn.ensemble.gradient_boosting).
|
||
|
"""
|
||
|
import re
|
||
|
import warnings
|
||
|
import numpy as np
|
||
|
from numpy.testing import assert_allclose
|
||
|
|
||
|
from scipy.sparse import csr_matrix
|
||
|
from scipy.sparse import csc_matrix
|
||
|
from scipy.sparse import coo_matrix
|
||
|
from scipy.special import expit
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from sklearn import datasets
|
||
|
from sklearn.base import clone
|
||
|
from sklearn.datasets import make_classification, make_regression
|
||
|
from sklearn.ensemble import GradientBoostingClassifier
|
||
|
from sklearn.ensemble import GradientBoostingRegressor
|
||
|
from sklearn.ensemble._gradient_boosting import predict_stages
|
||
|
from sklearn.preprocessing import scale
|
||
|
from sklearn.metrics import mean_squared_error
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.utils import check_random_state, tosequence
|
||
|
from sklearn.utils._mocking import NoSampleWeightWrapper
|
||
|
from sklearn.utils._testing import assert_array_almost_equal
|
||
|
from sklearn.utils._testing import assert_array_equal
|
||
|
from sklearn.utils._testing import skip_if_32bit
|
||
|
from sklearn.utils._param_validation import InvalidParameterError
|
||
|
from sklearn.exceptions import DataConversionWarning
|
||
|
from sklearn.exceptions import NotFittedError
|
||
|
from sklearn.dummy import DummyClassifier, DummyRegressor
|
||
|
from sklearn.pipeline import make_pipeline
|
||
|
from sklearn.linear_model import LinearRegression
|
||
|
from sklearn.svm import NuSVR
|
||
|
|
||
|
|
||
|
GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier, GradientBoostingRegressor]
|
||
|
|
||
|
# toy sample
|
||
|
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
|
||
|
y = [-1, -1, -1, 1, 1, 1]
|
||
|
T = [[-1, -1], [2, 2], [3, 2]]
|
||
|
true_result = [-1, 1, 1]
|
||
|
|
||
|
# also make regression dataset
|
||
|
X_reg, y_reg = make_regression(
|
||
|
n_samples=100, n_features=4, n_informative=8, noise=10, random_state=7
|
||
|
)
|
||
|
y_reg = scale(y_reg)
|
||
|
|
||
|
rng = np.random.RandomState(0)
|
||
|
# also load the iris dataset
|
||
|
# and randomly permute it
|
||
|
iris = datasets.load_iris()
|
||
|
perm = rng.permutation(iris.target.size)
|
||
|
iris.data = iris.data[perm]
|
||
|
iris.target = iris.target[perm]
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("loss", ("log_loss", "exponential"))
|
||
|
def test_classification_toy(loss, global_random_seed):
|
||
|
# Check classification on a toy dataset.
|
||
|
clf = GradientBoostingClassifier(
|
||
|
loss=loss, n_estimators=10, random_state=global_random_seed
|
||
|
)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
clf.predict(T)
|
||
|
|
||
|
clf.fit(X, y)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 10 == len(clf.estimators_)
|
||
|
|
||
|
log_loss_decrease = clf.train_score_[:-1] - clf.train_score_[1:]
|
||
|
assert np.any(log_loss_decrease >= 0.0)
|
||
|
|
||
|
leaves = clf.apply(X)
|
||
|
assert leaves.shape == (6, 10, 1)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("loss", ("log_loss", "exponential"))
|
||
|
def test_classification_synthetic(loss, global_random_seed):
|
||
|
# Test GradientBoostingClassifier on synthetic dataset used by
|
||
|
# Hastie et al. in ESLII - Figure 10.9
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=global_random_seed)
|
||
|
|
||
|
X_train, X_test = X[:2000], X[2000:]
|
||
|
y_train, y_test = y[:2000], y[2000:]
|
||
|
|
||
|
# Increasing the number of trees should decrease the test error
|
||
|
common_params = {
|
||
|
"max_depth": 1,
|
||
|
"learning_rate": 1.0,
|
||
|
"loss": loss,
|
||
|
"random_state": global_random_seed,
|
||
|
}
|
||
|
gbrt_100_stumps = GradientBoostingClassifier(n_estimators=100, **common_params)
|
||
|
gbrt_100_stumps.fit(X_train, y_train)
|
||
|
|
||
|
gbrt_200_stumps = GradientBoostingClassifier(n_estimators=200, **common_params)
|
||
|
gbrt_200_stumps.fit(X_train, y_train)
|
||
|
|
||
|
assert gbrt_100_stumps.score(X_test, y_test) < gbrt_200_stumps.score(X_test, y_test)
|
||
|
|
||
|
# Decision stumps are better suited for this dataset with a large number of
|
||
|
# estimators.
|
||
|
common_params = {
|
||
|
"n_estimators": 200,
|
||
|
"learning_rate": 1.0,
|
||
|
"loss": loss,
|
||
|
"random_state": global_random_seed,
|
||
|
}
|
||
|
gbrt_stumps = GradientBoostingClassifier(max_depth=1, **common_params)
|
||
|
gbrt_stumps.fit(X_train, y_train)
|
||
|
|
||
|
gbrt_10_nodes = GradientBoostingClassifier(max_leaf_nodes=10, **common_params)
|
||
|
gbrt_10_nodes.fit(X_train, y_train)
|
||
|
|
||
|
assert gbrt_stumps.score(X_test, y_test) > gbrt_10_nodes.score(X_test, y_test)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("loss", ("squared_error", "absolute_error", "huber"))
|
||
|
@pytest.mark.parametrize("subsample", (1.0, 0.5))
|
||
|
def test_regression_dataset(loss, subsample, global_random_seed):
|
||
|
# Check consistency on regression dataset with least squares
|
||
|
# and least absolute deviation.
|
||
|
ones = np.ones(len(y_reg))
|
||
|
last_y_pred = None
|
||
|
for sample_weight in [None, ones, 2 * ones]:
|
||
|
# learning_rate, max_depth and n_estimators were adjusted to get a mode
|
||
|
# that is accurate enough to reach a low MSE on the training set while
|
||
|
# keeping the resource used to execute this test low enough.
|
||
|
reg = GradientBoostingRegressor(
|
||
|
n_estimators=30,
|
||
|
loss=loss,
|
||
|
max_depth=4,
|
||
|
subsample=subsample,
|
||
|
min_samples_split=2,
|
||
|
random_state=global_random_seed,
|
||
|
learning_rate=0.5,
|
||
|
)
|
||
|
|
||
|
reg.fit(X_reg, y_reg, sample_weight=sample_weight)
|
||
|
leaves = reg.apply(X_reg)
|
||
|
assert leaves.shape == (100, 30)
|
||
|
|
||
|
y_pred = reg.predict(X_reg)
|
||
|
mse = mean_squared_error(y_reg, y_pred)
|
||
|
assert mse < 0.05
|
||
|
|
||
|
if last_y_pred is not None:
|
||
|
# FIXME: We temporarily bypass this test. This is due to the fact
|
||
|
# that GBRT with and without `sample_weight` do not use the same
|
||
|
# implementation of the median during the initialization with the
|
||
|
# `DummyRegressor`. In the future, we should make sure that both
|
||
|
# implementations should be the same. See PR #17377 for more.
|
||
|
# assert_allclose(last_y_pred, y_pred)
|
||
|
pass
|
||
|
|
||
|
last_y_pred = y_pred
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("subsample", (1.0, 0.5))
|
||
|
@pytest.mark.parametrize("sample_weight", (None, 1))
|
||
|
def test_iris(subsample, sample_weight, global_random_seed):
|
||
|
if sample_weight == 1:
|
||
|
sample_weight = np.ones(len(iris.target))
|
||
|
# Check consistency on dataset iris.
|
||
|
clf = GradientBoostingClassifier(
|
||
|
n_estimators=100,
|
||
|
loss="log_loss",
|
||
|
random_state=global_random_seed,
|
||
|
subsample=subsample,
|
||
|
)
|
||
|
clf.fit(iris.data, iris.target, sample_weight=sample_weight)
|
||
|
score = clf.score(iris.data, iris.target)
|
||
|
assert score > 0.9
|
||
|
|
||
|
leaves = clf.apply(iris.data)
|
||
|
assert leaves.shape == (150, 100, 3)
|
||
|
|
||
|
|
||
|
def test_regression_synthetic(global_random_seed):
|
||
|
# Test on synthetic regression datasets used in Leo Breiman,
|
||
|
# `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996).
|
||
|
random_state = check_random_state(global_random_seed)
|
||
|
regression_params = {
|
||
|
"n_estimators": 100,
|
||
|
"max_depth": 4,
|
||
|
"min_samples_split": 2,
|
||
|
"learning_rate": 0.1,
|
||
|
"loss": "squared_error",
|
||
|
"random_state": global_random_seed,
|
||
|
}
|
||
|
|
||
|
# Friedman1
|
||
|
X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0)
|
||
|
X_train, y_train = X[:200], y[:200]
|
||
|
X_test, y_test = X[200:], y[200:]
|
||
|
|
||
|
clf = GradientBoostingRegressor(**regression_params)
|
||
|
clf.fit(X_train, y_train)
|
||
|
mse = mean_squared_error(y_test, clf.predict(X_test))
|
||
|
assert mse < 6.5
|
||
|
|
||
|
# Friedman2
|
||
|
X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
|
||
|
X_train, y_train = X[:200], y[:200]
|
||
|
X_test, y_test = X[200:], y[200:]
|
||
|
|
||
|
clf = GradientBoostingRegressor(**regression_params)
|
||
|
clf.fit(X_train, y_train)
|
||
|
mse = mean_squared_error(y_test, clf.predict(X_test))
|
||
|
assert mse < 2500.0
|
||
|
|
||
|
# Friedman3
|
||
|
X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
|
||
|
X_train, y_train = X[:200], y[:200]
|
||
|
X_test, y_test = X[200:], y[200:]
|
||
|
|
||
|
clf = GradientBoostingRegressor(**regression_params)
|
||
|
clf.fit(X_train, y_train)
|
||
|
mse = mean_squared_error(y_test, clf.predict(X_test))
|
||
|
assert mse < 0.025
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"GradientBoosting, X, y",
|
||
|
[
|
||
|
(GradientBoostingRegressor, X_reg, y_reg),
|
||
|
(GradientBoostingClassifier, iris.data, iris.target),
|
||
|
],
|
||
|
)
|
||
|
def test_feature_importances(GradientBoosting, X, y):
|
||
|
# smoke test to check that the gradient boosting expose an attribute
|
||
|
# feature_importances_
|
||
|
gbdt = GradientBoosting()
|
||
|
assert not hasattr(gbdt, "feature_importances_")
|
||
|
gbdt.fit(X, y)
|
||
|
assert hasattr(gbdt, "feature_importances_")
|
||
|
|
||
|
|
||
|
def test_probability_log(global_random_seed):
|
||
|
# Predict probabilities.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=global_random_seed)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
clf.predict_proba(T)
|
||
|
|
||
|
clf.fit(X, y)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
|
||
|
# check if probabilities are in [0, 1].
|
||
|
y_proba = clf.predict_proba(T)
|
||
|
assert np.all(y_proba >= 0.0)
|
||
|
assert np.all(y_proba <= 1.0)
|
||
|
|
||
|
# derive predictions from probabilities
|
||
|
y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
|
||
|
assert_array_equal(y_pred, true_result)
|
||
|
|
||
|
|
||
|
def test_single_class_with_sample_weight():
|
||
|
sample_weight = [0, 0, 0, 1, 1, 1]
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
msg = (
|
||
|
"y contains 1 class after sample_weight trimmed classes with "
|
||
|
"zero weights, while a minimum of 2 classes are required."
|
||
|
)
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
clf.fit(X, y, sample_weight=sample_weight)
|
||
|
|
||
|
|
||
|
def test_check_inputs_predict_stages():
|
||
|
# check that predict_stages through an error if the type of X is not
|
||
|
# supported
|
||
|
x, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
x_sparse_csc = csc_matrix(x)
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
clf.fit(x, y)
|
||
|
score = np.zeros((y.shape)).reshape(-1, 1)
|
||
|
err_msg = "When X is a sparse matrix, a CSR format is expected"
|
||
|
with pytest.raises(ValueError, match=err_msg):
|
||
|
predict_stages(clf.estimators_, x_sparse_csc, clf.learning_rate, score)
|
||
|
x_fortran = np.asfortranarray(x)
|
||
|
with pytest.raises(ValueError, match="X should be C-ordered np.ndarray"):
|
||
|
predict_stages(clf.estimators_, x_fortran, clf.learning_rate, score)
|
||
|
|
||
|
|
||
|
def test_max_feature_regression(global_random_seed):
|
||
|
# Test to make sure random state is set properly.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=global_random_seed)
|
||
|
|
||
|
X_train, X_test = X[:2000], X[2000:]
|
||
|
y_train, y_test = y[:2000], y[2000:]
|
||
|
|
||
|
gbrt = GradientBoostingClassifier(
|
||
|
n_estimators=100,
|
||
|
min_samples_split=5,
|
||
|
max_depth=2,
|
||
|
learning_rate=0.1,
|
||
|
max_features=2,
|
||
|
random_state=global_random_seed,
|
||
|
)
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
log_loss = gbrt._loss(y_test, gbrt.decision_function(X_test))
|
||
|
assert log_loss < 0.5, "GB failed with deviance %.4f" % log_loss
|
||
|
|
||
|
|
||
|
def test_feature_importance_regression(
|
||
|
fetch_california_housing_fxt, global_random_seed
|
||
|
):
|
||
|
"""Test that Gini importance is calculated correctly.
|
||
|
|
||
|
This test follows the example from [1]_ (pg. 373).
|
||
|
|
||
|
.. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
|
||
|
of statistical learning. New York: Springer series in statistics.
|
||
|
"""
|
||
|
california = fetch_california_housing_fxt()
|
||
|
X, y = california.data, california.target
|
||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||
|
X, y, random_state=global_random_seed
|
||
|
)
|
||
|
|
||
|
reg = GradientBoostingRegressor(
|
||
|
loss="huber",
|
||
|
learning_rate=0.1,
|
||
|
max_leaf_nodes=6,
|
||
|
n_estimators=100,
|
||
|
random_state=global_random_seed,
|
||
|
)
|
||
|
reg.fit(X_train, y_train)
|
||
|
sorted_idx = np.argsort(reg.feature_importances_)[::-1]
|
||
|
sorted_features = [california.feature_names[s] for s in sorted_idx]
|
||
|
|
||
|
# The most important feature is the median income by far.
|
||
|
assert sorted_features[0] == "MedInc"
|
||
|
|
||
|
# The three subsequent features are the following. Their relative ordering
|
||
|
# might change a bit depending on the randomness of the trees and the
|
||
|
# train / test split.
|
||
|
assert set(sorted_features[1:4]) == {"Longitude", "AveOccup", "Latitude"}
|
||
|
|
||
|
|
||
|
# TODO(1.3): Remove warning filter
|
||
|
@pytest.mark.filterwarnings("ignore:`max_features='auto'` has been deprecated in 1.1")
|
||
|
def test_max_feature_auto():
|
||
|
# Test if max features is set properly for floats and str.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
|
||
|
_, n_features = X.shape
|
||
|
|
||
|
X_train = X[:2000]
|
||
|
y_train = y[:2000]
|
||
|
|
||
|
gbrt = GradientBoostingClassifier(n_estimators=1, max_features="auto")
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
assert gbrt.max_features_ == int(np.sqrt(n_features))
|
||
|
|
||
|
gbrt = GradientBoostingRegressor(n_estimators=1, max_features="auto")
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
assert gbrt.max_features_ == n_features
|
||
|
|
||
|
gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
assert gbrt.max_features_ == int(n_features * 0.3)
|
||
|
|
||
|
gbrt = GradientBoostingRegressor(n_estimators=1, max_features="sqrt")
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
assert gbrt.max_features_ == int(np.sqrt(n_features))
|
||
|
|
||
|
gbrt = GradientBoostingRegressor(n_estimators=1, max_features="log2")
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
assert gbrt.max_features_ == int(np.log2(n_features))
|
||
|
|
||
|
gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.01 / X.shape[1])
|
||
|
gbrt.fit(X_train, y_train)
|
||
|
assert gbrt.max_features_ == 1
|
||
|
|
||
|
|
||
|
def test_staged_predict():
|
||
|
# Test whether staged decision function eventually gives
|
||
|
# the same prediction.
|
||
|
X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0)
|
||
|
X_train, y_train = X[:200], y[:200]
|
||
|
X_test = X[200:]
|
||
|
clf = GradientBoostingRegressor()
|
||
|
# test raise ValueError if not fitted
|
||
|
with pytest.raises(ValueError):
|
||
|
np.fromiter(clf.staged_predict(X_test), dtype=np.float64)
|
||
|
|
||
|
clf.fit(X_train, y_train)
|
||
|
y_pred = clf.predict(X_test)
|
||
|
|
||
|
# test if prediction for last stage equals ``predict``
|
||
|
for y in clf.staged_predict(X_test):
|
||
|
assert y.shape == y_pred.shape
|
||
|
|
||
|
assert_array_almost_equal(y_pred, y)
|
||
|
|
||
|
|
||
|
def test_staged_predict_proba():
|
||
|
# Test whether staged predict proba eventually gives
|
||
|
# the same prediction.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=1200, random_state=1)
|
||
|
X_train, y_train = X[:200], y[:200]
|
||
|
X_test, y_test = X[200:], y[200:]
|
||
|
clf = GradientBoostingClassifier(n_estimators=20)
|
||
|
# test raise NotFittedError if not
|
||
|
with pytest.raises(NotFittedError):
|
||
|
np.fromiter(clf.staged_predict_proba(X_test), dtype=np.float64)
|
||
|
|
||
|
clf.fit(X_train, y_train)
|
||
|
|
||
|
# test if prediction for last stage equals ``predict``
|
||
|
for y_pred in clf.staged_predict(X_test):
|
||
|
assert y_test.shape == y_pred.shape
|
||
|
|
||
|
assert_array_equal(clf.predict(X_test), y_pred)
|
||
|
|
||
|
# test if prediction for last stage equals ``predict_proba``
|
||
|
for staged_proba in clf.staged_predict_proba(X_test):
|
||
|
assert y_test.shape[0] == staged_proba.shape[0]
|
||
|
assert 2 == staged_proba.shape[1]
|
||
|
|
||
|
assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Estimator", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_staged_functions_defensive(Estimator, global_random_seed):
|
||
|
# test that staged_functions make defensive copies
|
||
|
rng = np.random.RandomState(global_random_seed)
|
||
|
X = rng.uniform(size=(10, 3))
|
||
|
y = (4 * X[:, 0]).astype(int) + 1 # don't predict zeros
|
||
|
estimator = Estimator()
|
||
|
estimator.fit(X, y)
|
||
|
for func in ["predict", "decision_function", "predict_proba"]:
|
||
|
staged_func = getattr(estimator, "staged_" + func, None)
|
||
|
if staged_func is None:
|
||
|
# regressor has no staged_predict_proba
|
||
|
continue
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
staged_result = list(staged_func(X))
|
||
|
staged_result[1][:] = 0
|
||
|
assert np.all(staged_result[0] != 0)
|
||
|
|
||
|
|
||
|
def test_serialization():
|
||
|
# Check model serialization.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
|
||
|
clf.fit(X, y)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
try:
|
||
|
import cPickle as pickle
|
||
|
except ImportError:
|
||
|
import pickle
|
||
|
|
||
|
serialized_clf = pickle.dumps(clf, protocol=pickle.HIGHEST_PROTOCOL)
|
||
|
clf = None
|
||
|
clf = pickle.loads(serialized_clf)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
|
||
|
def test_degenerate_targets():
|
||
|
# Check if we can fit even though all targets are equal.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
|
||
|
# classifier should raise exception
|
||
|
with pytest.raises(ValueError):
|
||
|
clf.fit(X, np.ones(len(X)))
|
||
|
|
||
|
clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
|
||
|
clf.fit(X, np.ones(len(X)))
|
||
|
clf.predict([rng.rand(2)])
|
||
|
assert_array_equal(np.ones((1,), dtype=np.float64), clf.predict([rng.rand(2)]))
|
||
|
|
||
|
|
||
|
def test_quantile_loss(global_random_seed):
|
||
|
# Check if quantile loss with alpha=0.5 equals absolute_error.
|
||
|
clf_quantile = GradientBoostingRegressor(
|
||
|
n_estimators=100,
|
||
|
loss="quantile",
|
||
|
max_depth=4,
|
||
|
alpha=0.5,
|
||
|
random_state=global_random_seed,
|
||
|
)
|
||
|
|
||
|
clf_quantile.fit(X_reg, y_reg)
|
||
|
y_quantile = clf_quantile.predict(X_reg)
|
||
|
|
||
|
clf_ae = GradientBoostingRegressor(
|
||
|
n_estimators=100,
|
||
|
loss="absolute_error",
|
||
|
max_depth=4,
|
||
|
random_state=global_random_seed,
|
||
|
)
|
||
|
|
||
|
clf_ae.fit(X_reg, y_reg)
|
||
|
y_ae = clf_ae.predict(X_reg)
|
||
|
assert_allclose(y_quantile, y_ae)
|
||
|
|
||
|
|
||
|
def test_symbol_labels():
|
||
|
# Test with non-integer class labels.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
|
||
|
symbol_y = tosequence(map(str, y))
|
||
|
|
||
|
clf.fit(X, symbol_y)
|
||
|
assert_array_equal(clf.predict(T), tosequence(map(str, true_result)))
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
|
||
|
def test_float_class_labels():
|
||
|
# Test with float class labels.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
|
||
|
float_y = np.asarray(y, dtype=np.float32)
|
||
|
|
||
|
clf.fit(X, float_y)
|
||
|
assert_array_equal(clf.predict(T), np.asarray(true_result, dtype=np.float32))
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
|
||
|
def test_shape_y():
|
||
|
# Test with float class labels.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
|
||
|
y_ = np.asarray(y, dtype=np.int32)
|
||
|
y_ = y_[:, np.newaxis]
|
||
|
|
||
|
# This will raise a DataConversionWarning that we want to
|
||
|
# "always" raise, elsewhere the warnings gets ignored in the
|
||
|
# later tests, and the tests that check for this warning fail
|
||
|
warn_msg = (
|
||
|
"A column-vector y was passed when a 1d array was expected. "
|
||
|
"Please change the shape of y to \\(n_samples, \\), for "
|
||
|
"example using ravel()."
|
||
|
)
|
||
|
with pytest.warns(DataConversionWarning, match=warn_msg):
|
||
|
clf.fit(X, y_)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
|
||
|
def test_mem_layout():
|
||
|
# Test with different memory layouts of X and y
|
||
|
X_ = np.asfortranarray(X)
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
clf.fit(X_, y)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
X_ = np.ascontiguousarray(X)
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
clf.fit(X_, y)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
y_ = np.asarray(y, dtype=np.int32)
|
||
|
y_ = np.ascontiguousarray(y_)
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
clf.fit(X, y_)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
y_ = np.asarray(y, dtype=np.int32)
|
||
|
y_ = np.asfortranarray(y_)
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
|
||
|
clf.fit(X, y_)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
assert 100 == len(clf.estimators_)
|
||
|
|
||
|
|
||
|
def test_oob_improvement():
|
||
|
# Test if oob improvement has correct shape and regression test.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5)
|
||
|
clf.fit(X, y)
|
||
|
assert clf.oob_improvement_.shape[0] == 100
|
||
|
# hard-coded regression test - change if modification in OOB computation
|
||
|
assert_array_almost_equal(
|
||
|
clf.oob_improvement_[:5], np.array([0.19, 0.15, 0.12, -0.12, -0.11]), decimal=2
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_oob_improvement_raise():
|
||
|
# Test if oob improvement has correct shape.
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=1.0)
|
||
|
clf.fit(X, y)
|
||
|
with pytest.raises(AttributeError):
|
||
|
clf.oob_improvement_
|
||
|
|
||
|
|
||
|
def test_oob_multilcass_iris():
|
||
|
# Check OOB improvement on multi-class dataset.
|
||
|
clf = GradientBoostingClassifier(
|
||
|
n_estimators=100, loss="log_loss", random_state=1, subsample=0.5
|
||
|
)
|
||
|
clf.fit(iris.data, iris.target)
|
||
|
score = clf.score(iris.data, iris.target)
|
||
|
assert score > 0.9
|
||
|
assert clf.oob_improvement_.shape[0] == clf.n_estimators
|
||
|
# hard-coded regression test - change if modification in OOB computation
|
||
|
# FIXME: the following snippet does not yield the same results on 32 bits
|
||
|
# assert_array_almost_equal(clf.oob_improvement_[:5],
|
||
|
# np.array([12.68, 10.45, 8.18, 6.43, 5.13]),
|
||
|
# decimal=2)
|
||
|
|
||
|
|
||
|
def test_verbose_output():
|
||
|
# Check verbose=1 does not cause error.
|
||
|
from io import StringIO
|
||
|
|
||
|
import sys
|
||
|
|
||
|
old_stdout = sys.stdout
|
||
|
sys.stdout = StringIO()
|
||
|
clf = GradientBoostingClassifier(
|
||
|
n_estimators=100, random_state=1, verbose=1, subsample=0.8
|
||
|
)
|
||
|
clf.fit(X, y)
|
||
|
verbose_output = sys.stdout
|
||
|
sys.stdout = old_stdout
|
||
|
|
||
|
# check output
|
||
|
verbose_output.seek(0)
|
||
|
header = verbose_output.readline().rstrip()
|
||
|
# with OOB
|
||
|
true_header = " ".join(["%10s"] + ["%16s"] * 3) % (
|
||
|
"Iter",
|
||
|
"Train Loss",
|
||
|
"OOB Improve",
|
||
|
"Remaining Time",
|
||
|
)
|
||
|
assert true_header == header
|
||
|
|
||
|
n_lines = sum(1 for l in verbose_output.readlines())
|
||
|
# one for 1-10 and then 9 for 20-100
|
||
|
assert 10 + 9 == n_lines
|
||
|
|
||
|
|
||
|
def test_more_verbose_output():
|
||
|
# Check verbose=2 does not cause error.
|
||
|
from io import StringIO
|
||
|
import sys
|
||
|
|
||
|
old_stdout = sys.stdout
|
||
|
sys.stdout = StringIO()
|
||
|
clf = GradientBoostingClassifier(n_estimators=100, random_state=1, verbose=2)
|
||
|
clf.fit(X, y)
|
||
|
verbose_output = sys.stdout
|
||
|
sys.stdout = old_stdout
|
||
|
|
||
|
# check output
|
||
|
verbose_output.seek(0)
|
||
|
header = verbose_output.readline().rstrip()
|
||
|
# no OOB
|
||
|
true_header = " ".join(["%10s"] + ["%16s"] * 2) % (
|
||
|
"Iter",
|
||
|
"Train Loss",
|
||
|
"Remaining Time",
|
||
|
)
|
||
|
assert true_header == header
|
||
|
|
||
|
n_lines = sum(1 for l in verbose_output.readlines())
|
||
|
# 100 lines for n_estimators==100
|
||
|
assert 100 == n_lines
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start(Cls, global_random_seed):
|
||
|
# Test if warm start equals fit.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=global_random_seed)
|
||
|
est = Cls(n_estimators=200, max_depth=1, random_state=global_random_seed)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
est_ws = Cls(
|
||
|
n_estimators=100, max_depth=1, warm_start=True, random_state=global_random_seed
|
||
|
)
|
||
|
est_ws.fit(X, y)
|
||
|
est_ws.set_params(n_estimators=200)
|
||
|
est_ws.fit(X, y)
|
||
|
|
||
|
if Cls is GradientBoostingRegressor:
|
||
|
assert_allclose(est_ws.predict(X), est.predict(X))
|
||
|
else:
|
||
|
# Random state is preserved and hence predict_proba must also be
|
||
|
# same
|
||
|
assert_array_equal(est_ws.predict(X), est.predict(X))
|
||
|
assert_allclose(est_ws.predict_proba(X), est.predict_proba(X))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_n_estimators(Cls, global_random_seed):
|
||
|
# Test if warm start equals fit - set n_estimators.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=global_random_seed)
|
||
|
est = Cls(n_estimators=300, max_depth=1, random_state=global_random_seed)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
est_ws = Cls(
|
||
|
n_estimators=100, max_depth=1, warm_start=True, random_state=global_random_seed
|
||
|
)
|
||
|
est_ws.fit(X, y)
|
||
|
est_ws.set_params(n_estimators=300)
|
||
|
est_ws.fit(X, y)
|
||
|
|
||
|
assert_allclose(est_ws.predict(X), est.predict(X))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_max_depth(Cls):
|
||
|
# Test if possible to fit trees of different depth in ensemble.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
est = Cls(n_estimators=100, max_depth=1, warm_start=True)
|
||
|
est.fit(X, y)
|
||
|
est.set_params(n_estimators=110, max_depth=2)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
# last 10 trees have different depth
|
||
|
assert est.estimators_[0, 0].max_depth == 1
|
||
|
for i in range(1, 11):
|
||
|
assert est.estimators_[-i, 0].max_depth == 2
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_clear(Cls):
|
||
|
# Test if fit clears state.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
est = Cls(n_estimators=100, max_depth=1)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
est_2 = Cls(n_estimators=100, max_depth=1, warm_start=True)
|
||
|
est_2.fit(X, y) # inits state
|
||
|
est_2.set_params(warm_start=False)
|
||
|
est_2.fit(X, y) # clears old state and equals est
|
||
|
|
||
|
assert_array_almost_equal(est_2.predict(X), est.predict(X))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_smaller_n_estimators(Cls):
|
||
|
# Test if warm start with smaller n_estimators raises error
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
est = Cls(n_estimators=100, max_depth=1, warm_start=True)
|
||
|
est.fit(X, y)
|
||
|
est.set_params(n_estimators=99)
|
||
|
with pytest.raises(ValueError):
|
||
|
est.fit(X, y)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_equal_n_estimators(Cls):
|
||
|
# Test if warm start with equal n_estimators does nothing
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
est = Cls(n_estimators=100, max_depth=1)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
est2 = clone(est)
|
||
|
est2.set_params(n_estimators=est.n_estimators, warm_start=True)
|
||
|
est2.fit(X, y)
|
||
|
|
||
|
assert_array_almost_equal(est2.predict(X), est.predict(X))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_oob_switch(Cls):
|
||
|
# Test if oob can be turned on during warm start.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
est = Cls(n_estimators=100, max_depth=1, warm_start=True)
|
||
|
est.fit(X, y)
|
||
|
est.set_params(n_estimators=110, subsample=0.5)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
assert_array_equal(est.oob_improvement_[:100], np.zeros(100))
|
||
|
# the last 10 are not zeros
|
||
|
assert_array_equal(est.oob_improvement_[-10:] == 0.0, np.zeros(10, dtype=bool))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_oob(Cls):
|
||
|
# Test if warm start OOB equals fit.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
est = Cls(n_estimators=200, max_depth=1, subsample=0.5, random_state=1)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
est_ws = Cls(
|
||
|
n_estimators=100, max_depth=1, subsample=0.5, random_state=1, warm_start=True
|
||
|
)
|
||
|
est_ws.fit(X, y)
|
||
|
est_ws.set_params(n_estimators=200)
|
||
|
est_ws.fit(X, y)
|
||
|
|
||
|
assert_array_almost_equal(est_ws.oob_improvement_[:100], est.oob_improvement_[:100])
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_sparse(Cls):
|
||
|
# Test that all sparse matrix types are supported
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
sparse_matrix_type = [csr_matrix, csc_matrix, coo_matrix]
|
||
|
est_dense = Cls(
|
||
|
n_estimators=100, max_depth=1, subsample=0.5, random_state=1, warm_start=True
|
||
|
)
|
||
|
est_dense.fit(X, y)
|
||
|
est_dense.predict(X)
|
||
|
est_dense.set_params(n_estimators=200)
|
||
|
est_dense.fit(X, y)
|
||
|
y_pred_dense = est_dense.predict(X)
|
||
|
|
||
|
for sparse_constructor in sparse_matrix_type:
|
||
|
X_sparse = sparse_constructor(X)
|
||
|
|
||
|
est_sparse = Cls(
|
||
|
n_estimators=100,
|
||
|
max_depth=1,
|
||
|
subsample=0.5,
|
||
|
random_state=1,
|
||
|
warm_start=True,
|
||
|
)
|
||
|
est_sparse.fit(X_sparse, y)
|
||
|
est_sparse.predict(X)
|
||
|
est_sparse.set_params(n_estimators=200)
|
||
|
est_sparse.fit(X_sparse, y)
|
||
|
y_pred_sparse = est_sparse.predict(X)
|
||
|
|
||
|
assert_array_almost_equal(
|
||
|
est_dense.oob_improvement_[:100], est_sparse.oob_improvement_[:100]
|
||
|
)
|
||
|
assert_array_almost_equal(y_pred_dense, y_pred_sparse)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_warm_start_fortran(Cls, global_random_seed):
|
||
|
# Test that feeding a X in Fortran-ordered is giving the same results as
|
||
|
# in C-ordered
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=global_random_seed)
|
||
|
est_c = Cls(n_estimators=1, random_state=global_random_seed, warm_start=True)
|
||
|
est_fortran = Cls(n_estimators=1, random_state=global_random_seed, warm_start=True)
|
||
|
|
||
|
est_c.fit(X, y)
|
||
|
est_c.set_params(n_estimators=11)
|
||
|
est_c.fit(X, y)
|
||
|
|
||
|
X_fortran = np.asfortranarray(X)
|
||
|
est_fortran.fit(X_fortran, y)
|
||
|
est_fortran.set_params(n_estimators=11)
|
||
|
est_fortran.fit(X_fortran, y)
|
||
|
|
||
|
assert_allclose(est_c.predict(X), est_fortran.predict(X))
|
||
|
|
||
|
|
||
|
def early_stopping_monitor(i, est, locals):
|
||
|
"""Returns True on the 10th iteration."""
|
||
|
if i == 9:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("Cls", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_monitor_early_stopping(Cls):
|
||
|
# Test if monitor return value works.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
|
||
|
est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5)
|
||
|
est.fit(X, y, monitor=early_stopping_monitor)
|
||
|
assert est.n_estimators == 20 # this is not altered
|
||
|
assert est.estimators_.shape[0] == 10
|
||
|
assert est.train_score_.shape[0] == 10
|
||
|
assert est.oob_improvement_.shape[0] == 10
|
||
|
|
||
|
# try refit
|
||
|
est.set_params(n_estimators=30)
|
||
|
est.fit(X, y)
|
||
|
assert est.n_estimators == 30
|
||
|
assert est.estimators_.shape[0] == 30
|
||
|
assert est.train_score_.shape[0] == 30
|
||
|
|
||
|
est = Cls(
|
||
|
n_estimators=20, max_depth=1, random_state=1, subsample=0.5, warm_start=True
|
||
|
)
|
||
|
est.fit(X, y, monitor=early_stopping_monitor)
|
||
|
assert est.n_estimators == 20
|
||
|
assert est.estimators_.shape[0] == 10
|
||
|
assert est.train_score_.shape[0] == 10
|
||
|
assert est.oob_improvement_.shape[0] == 10
|
||
|
|
||
|
# try refit
|
||
|
est.set_params(n_estimators=30, warm_start=False)
|
||
|
est.fit(X, y)
|
||
|
assert est.n_estimators == 30
|
||
|
assert est.train_score_.shape[0] == 30
|
||
|
assert est.estimators_.shape[0] == 30
|
||
|
assert est.oob_improvement_.shape[0] == 30
|
||
|
|
||
|
|
||
|
def test_complete_classification():
|
||
|
# Test greedy trees with max_depth + 1 leafs.
|
||
|
from sklearn.tree._tree import TREE_LEAF
|
||
|
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
k = 4
|
||
|
|
||
|
est = GradientBoostingClassifier(
|
||
|
n_estimators=20, max_depth=None, random_state=1, max_leaf_nodes=k + 1
|
||
|
)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
tree = est.estimators_[0, 0].tree_
|
||
|
assert tree.max_depth == k
|
||
|
assert tree.children_left[tree.children_left == TREE_LEAF].shape[0] == k + 1
|
||
|
|
||
|
|
||
|
def test_complete_regression():
|
||
|
# Test greedy trees with max_depth + 1 leafs.
|
||
|
from sklearn.tree._tree import TREE_LEAF
|
||
|
|
||
|
k = 4
|
||
|
|
||
|
est = GradientBoostingRegressor(
|
||
|
n_estimators=20, max_depth=None, random_state=1, max_leaf_nodes=k + 1
|
||
|
)
|
||
|
est.fit(X_reg, y_reg)
|
||
|
|
||
|
tree = est.estimators_[-1, 0].tree_
|
||
|
assert tree.children_left[tree.children_left == TREE_LEAF].shape[0] == k + 1
|
||
|
|
||
|
|
||
|
def test_zero_estimator_reg(global_random_seed):
|
||
|
# Test if init='zero' works for regression by checking that it is better
|
||
|
# than a simple baseline.
|
||
|
|
||
|
baseline = DummyRegressor(strategy="mean").fit(X_reg, y_reg)
|
||
|
mse_baseline = mean_squared_error(baseline.predict(X_reg), y_reg)
|
||
|
est = GradientBoostingRegressor(
|
||
|
n_estimators=5,
|
||
|
max_depth=1,
|
||
|
random_state=global_random_seed,
|
||
|
init="zero",
|
||
|
learning_rate=0.5,
|
||
|
)
|
||
|
est.fit(X_reg, y_reg)
|
||
|
y_pred = est.predict(X_reg)
|
||
|
mse_gbdt = mean_squared_error(y_reg, y_pred)
|
||
|
assert mse_gbdt < mse_baseline
|
||
|
|
||
|
|
||
|
def test_zero_estimator_clf(global_random_seed):
|
||
|
# Test if init='zero' works for classification.
|
||
|
X = iris.data
|
||
|
y = np.array(iris.target)
|
||
|
|
||
|
est = GradientBoostingClassifier(
|
||
|
n_estimators=20, max_depth=1, random_state=global_random_seed, init="zero"
|
||
|
)
|
||
|
est.fit(X, y)
|
||
|
|
||
|
assert est.score(X, y) > 0.96
|
||
|
|
||
|
# binary clf
|
||
|
mask = y != 0
|
||
|
y[mask] = 1
|
||
|
y[~mask] = 0
|
||
|
est = GradientBoostingClassifier(
|
||
|
n_estimators=20, max_depth=1, random_state=global_random_seed, init="zero"
|
||
|
)
|
||
|
est.fit(X, y)
|
||
|
assert est.score(X, y) > 0.96
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("GBEstimator", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_max_leaf_nodes_max_depth(GBEstimator):
|
||
|
# Test precedence of max_leaf_nodes over max_depth.
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
|
||
|
k = 4
|
||
|
|
||
|
est = GBEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y)
|
||
|
tree = est.estimators_[0, 0].tree_
|
||
|
assert tree.max_depth == 1
|
||
|
|
||
|
est = GBEstimator(max_depth=1).fit(X, y)
|
||
|
tree = est.estimators_[0, 0].tree_
|
||
|
assert tree.max_depth == 1
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("GBEstimator", GRADIENT_BOOSTING_ESTIMATORS)
|
||
|
def test_min_impurity_decrease(GBEstimator):
|
||
|
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
|
||
|
|
||
|
est = GBEstimator(min_impurity_decrease=0.1)
|
||
|
est.fit(X, y)
|
||
|
for tree in est.estimators_.flat:
|
||
|
# Simply check if the parameter is passed on correctly. Tree tests
|
||
|
# will suffice for the actual working of this param
|
||
|
assert tree.min_impurity_decrease == 0.1
|
||
|
|
||
|
|
||
|
def test_warm_start_wo_nestimators_change():
|
||
|
# Test if warm_start does nothing if n_estimators is not changed.
|
||
|
# Regression test for #3513.
|
||
|
clf = GradientBoostingClassifier(n_estimators=10, warm_start=True)
|
||
|
clf.fit([[0, 1], [2, 3]], [0, 1])
|
||
|
assert clf.estimators_.shape[0] == 10
|
||
|
clf.fit([[0, 1], [2, 3]], [0, 1])
|
||
|
assert clf.estimators_.shape[0] == 10
|
||
|
|
||
|
|
||
|
def test_probability_exponential(global_random_seed):
|
||
|
# Predict probabilities.
|
||
|
clf = GradientBoostingClassifier(
|
||
|
loss="exponential", n_estimators=100, random_state=global_random_seed
|
||
|
)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
clf.predict_proba(T)
|
||
|
|
||
|
clf.fit(X, y)
|
||
|
assert_array_equal(clf.predict(T), true_result)
|
||
|
|
||
|
# check if probabilities are in [0, 1].
|
||
|
y_proba = clf.predict_proba(T)
|
||
|
assert np.all(y_proba >= 0.0)
|
||
|
assert np.all(y_proba <= 1.0)
|
||
|
score = clf.decision_function(T).ravel()
|
||
|
assert_allclose(y_proba[:, 1], expit(2 * score))
|
||
|
|
||
|
# derive predictions from probabilities
|
||
|
y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
|
||
|
assert_array_equal(y_pred, true_result)
|
||
|
|
||
|
|
||
|
def test_non_uniform_weights_toy_edge_case_reg():
|
||
|
X = [[1, 0], [1, 0], [1, 0], [0, 1]]
|
||
|
y = [0, 0, 1, 0]
|
||
|
# ignore the first 2 training samples by setting their weight to 0
|
||
|
sample_weight = [0, 0, 1, 1]
|
||
|
for loss in ("huber", "squared_error", "absolute_error", "quantile"):
|
||
|
gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss)
|
||
|
gb.fit(X, y, sample_weight=sample_weight)
|
||
|
assert gb.predict([[1, 0]])[0] > 0.5
|
||
|
|
||
|
|
||
|
def test_non_uniform_weights_toy_edge_case_clf():
|
||
|
X = [[1, 0], [1, 0], [1, 0], [0, 1]]
|
||
|
y = [0, 0, 1, 0]
|
||
|
# ignore the first 2 training samples by setting their weight to 0
|
||
|
sample_weight = [0, 0, 1, 1]
|
||
|
for loss in ("log_loss", "exponential"):
|
||
|
gb = GradientBoostingClassifier(n_estimators=5, loss=loss)
|
||
|
gb.fit(X, y, sample_weight=sample_weight)
|
||
|
assert_array_equal(gb.predict([[1, 0]]), [1])
|
||
|
|
||
|
|
||
|
@skip_if_32bit
|
||
|
@pytest.mark.parametrize(
|
||
|
"EstimatorClass", (GradientBoostingClassifier, GradientBoostingRegressor)
|
||
|
)
|
||
|
@pytest.mark.parametrize("sparse_matrix", (csr_matrix, csc_matrix, coo_matrix))
|
||
|
def test_sparse_input(EstimatorClass, sparse_matrix):
|
||
|
y, X = datasets.make_multilabel_classification(
|
||
|
random_state=0, n_samples=50, n_features=1, n_classes=20
|
||
|
)
|
||
|
y = y[:, 0]
|
||
|
X_sparse = sparse_matrix(X)
|
||
|
|
||
|
dense = EstimatorClass(
|
||
|
n_estimators=10, random_state=0, max_depth=2, min_impurity_decrease=1e-7
|
||
|
).fit(X, y)
|
||
|
sparse = EstimatorClass(
|
||
|
n_estimators=10, random_state=0, max_depth=2, min_impurity_decrease=1e-7
|
||
|
).fit(X_sparse, y)
|
||
|
|
||
|
assert_array_almost_equal(sparse.apply(X), dense.apply(X))
|
||
|
assert_array_almost_equal(sparse.predict(X), dense.predict(X))
|
||
|
assert_array_almost_equal(sparse.feature_importances_, dense.feature_importances_)
|
||
|
|
||
|
assert_array_almost_equal(sparse.predict(X_sparse), dense.predict(X))
|
||
|
assert_array_almost_equal(dense.predict(X_sparse), sparse.predict(X))
|
||
|
|
||
|
if issubclass(EstimatorClass, GradientBoostingClassifier):
|
||
|
assert_array_almost_equal(sparse.predict_proba(X), dense.predict_proba(X))
|
||
|
assert_array_almost_equal(
|
||
|
sparse.predict_log_proba(X), dense.predict_log_proba(X)
|
||
|
)
|
||
|
|
||
|
assert_array_almost_equal(
|
||
|
sparse.decision_function(X_sparse), sparse.decision_function(X)
|
||
|
)
|
||
|
assert_array_almost_equal(
|
||
|
dense.decision_function(X_sparse), sparse.decision_function(X)
|
||
|
)
|
||
|
for res_sparse, res in zip(
|
||
|
sparse.staged_decision_function(X_sparse),
|
||
|
sparse.staged_decision_function(X),
|
||
|
):
|
||
|
assert_array_almost_equal(res_sparse, res)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"GradientBoostingEstimator", [GradientBoostingClassifier, GradientBoostingRegressor]
|
||
|
)
|
||
|
def test_gradient_boosting_early_stopping(GradientBoostingEstimator):
|
||
|
# Check if early stopping works as expected, that is empirically check that the
|
||
|
# number of trained estimators is increasing when the tolerance decreases.
|
||
|
|
||
|
X, y = make_classification(n_samples=1000, random_state=0)
|
||
|
n_estimators = 1000
|
||
|
|
||
|
gb_large_tol = GradientBoostingEstimator(
|
||
|
n_estimators=n_estimators,
|
||
|
n_iter_no_change=10,
|
||
|
learning_rate=0.1,
|
||
|
max_depth=3,
|
||
|
random_state=42,
|
||
|
tol=1e-1,
|
||
|
)
|
||
|
|
||
|
gb_small_tol = GradientBoostingEstimator(
|
||
|
n_estimators=n_estimators,
|
||
|
n_iter_no_change=10,
|
||
|
learning_rate=0.1,
|
||
|
max_depth=3,
|
||
|
random_state=42,
|
||
|
tol=1e-3,
|
||
|
)
|
||
|
|
||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
|
||
|
gb_large_tol.fit(X_train, y_train)
|
||
|
gb_small_tol.fit(X_train, y_train)
|
||
|
|
||
|
assert gb_large_tol.n_estimators_ < gb_small_tol.n_estimators_ < n_estimators
|
||
|
|
||
|
assert gb_large_tol.score(X_test, y_test) > 0.7
|
||
|
assert gb_small_tol.score(X_test, y_test) > 0.7
|
||
|
|
||
|
|
||
|
def test_gradient_boosting_without_early_stopping():
|
||
|
# When early stopping is not used, the number of trained estimators
|
||
|
# must be the one specified.
|
||
|
X, y = make_classification(n_samples=1000, random_state=0)
|
||
|
|
||
|
gbc = GradientBoostingClassifier(
|
||
|
n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42
|
||
|
)
|
||
|
gbc.fit(X, y)
|
||
|
gbr = GradientBoostingRegressor(
|
||
|
n_estimators=30, learning_rate=0.1, max_depth=3, random_state=42
|
||
|
)
|
||
|
gbr.fit(X, y)
|
||
|
|
||
|
# The number of trained estimators must be the one specified.
|
||
|
assert gbc.n_estimators_ == 50
|
||
|
assert gbr.n_estimators_ == 30
|
||
|
|
||
|
|
||
|
def test_gradient_boosting_validation_fraction():
|
||
|
X, y = make_classification(n_samples=1000, random_state=0)
|
||
|
|
||
|
gbc = GradientBoostingClassifier(
|
||
|
n_estimators=100,
|
||
|
n_iter_no_change=10,
|
||
|
validation_fraction=0.1,
|
||
|
learning_rate=0.1,
|
||
|
max_depth=3,
|
||
|
random_state=42,
|
||
|
)
|
||
|
gbc2 = clone(gbc).set_params(validation_fraction=0.3)
|
||
|
gbc3 = clone(gbc).set_params(n_iter_no_change=20)
|
||
|
|
||
|
gbr = GradientBoostingRegressor(
|
||
|
n_estimators=100,
|
||
|
n_iter_no_change=10,
|
||
|
learning_rate=0.1,
|
||
|
max_depth=3,
|
||
|
validation_fraction=0.1,
|
||
|
random_state=42,
|
||
|
)
|
||
|
gbr2 = clone(gbr).set_params(validation_fraction=0.3)
|
||
|
gbr3 = clone(gbr).set_params(n_iter_no_change=20)
|
||
|
|
||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
|
||
|
# Check if validation_fraction has an effect
|
||
|
gbc.fit(X_train, y_train)
|
||
|
gbc2.fit(X_train, y_train)
|
||
|
assert gbc.n_estimators_ != gbc2.n_estimators_
|
||
|
|
||
|
gbr.fit(X_train, y_train)
|
||
|
gbr2.fit(X_train, y_train)
|
||
|
assert gbr.n_estimators_ != gbr2.n_estimators_
|
||
|
|
||
|
# Check if n_estimators_ increase monotonically with n_iter_no_change
|
||
|
# Set validation
|
||
|
gbc3.fit(X_train, y_train)
|
||
|
gbr3.fit(X_train, y_train)
|
||
|
assert gbr.n_estimators_ < gbr3.n_estimators_
|
||
|
assert gbc.n_estimators_ < gbc3.n_estimators_
|
||
|
|
||
|
|
||
|
def test_early_stopping_stratified():
|
||
|
# Make sure data splitting for early stopping is stratified
|
||
|
X = [[1, 2], [2, 3], [3, 4], [4, 5]]
|
||
|
y = [0, 0, 0, 1]
|
||
|
|
||
|
gbc = GradientBoostingClassifier(n_iter_no_change=5)
|
||
|
with pytest.raises(
|
||
|
ValueError, match="The least populated class in y has only 1 member"
|
||
|
):
|
||
|
gbc.fit(X, y)
|
||
|
|
||
|
|
||
|
def _make_multiclass():
|
||
|
return make_classification(n_classes=3, n_clusters_per_class=1)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"gb, dataset_maker, init_estimator",
|
||
|
[
|
||
|
(GradientBoostingClassifier, make_classification, DummyClassifier),
|
||
|
(GradientBoostingClassifier, _make_multiclass, DummyClassifier),
|
||
|
(GradientBoostingRegressor, make_regression, DummyRegressor),
|
||
|
],
|
||
|
ids=["binary classification", "multiclass classification", "regression"],
|
||
|
)
|
||
|
def test_gradient_boosting_with_init(
|
||
|
gb, dataset_maker, init_estimator, global_random_seed
|
||
|
):
|
||
|
# Check that GradientBoostingRegressor works when init is a sklearn
|
||
|
# estimator.
|
||
|
# Check that an error is raised if trying to fit with sample weight but
|
||
|
# initial estimator does not support sample weight
|
||
|
|
||
|
X, y = dataset_maker()
|
||
|
sample_weight = np.random.RandomState(global_random_seed).rand(100)
|
||
|
|
||
|
# init supports sample weights
|
||
|
init_est = init_estimator()
|
||
|
gb(init=init_est).fit(X, y, sample_weight=sample_weight)
|
||
|
|
||
|
# init does not support sample weights
|
||
|
init_est = NoSampleWeightWrapper(init_estimator())
|
||
|
gb(init=init_est).fit(X, y) # ok no sample weights
|
||
|
with pytest.raises(ValueError, match="estimator.*does not support sample weights"):
|
||
|
gb(init=init_est).fit(X, y, sample_weight=sample_weight)
|
||
|
|
||
|
|
||
|
def test_gradient_boosting_with_init_pipeline():
|
||
|
# Check that the init estimator can be a pipeline (see issue #13466)
|
||
|
|
||
|
X, y = make_regression(random_state=0)
|
||
|
init = make_pipeline(LinearRegression())
|
||
|
gb = GradientBoostingRegressor(init=init)
|
||
|
gb.fit(X, y) # pipeline without sample_weight works fine
|
||
|
|
||
|
with pytest.raises(
|
||
|
ValueError,
|
||
|
match="The initial estimator Pipeline does not support sample weights",
|
||
|
):
|
||
|
gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
|
||
|
|
||
|
# Passing sample_weight to a pipeline raises a ValueError. This test makes
|
||
|
# sure we make the distinction between ValueError raised by a pipeline that
|
||
|
# was passed sample_weight, and a InvalidParameterError raised by a regular
|
||
|
# estimator whose input checking failed.
|
||
|
invalid_nu = 1.5
|
||
|
err_msg = (
|
||
|
"The 'nu' parameter of NuSVR must be a float in the"
|
||
|
f" range (0.0, 1.0]. Got {invalid_nu} instead."
|
||
|
)
|
||
|
with pytest.raises(InvalidParameterError, match=re.escape(err_msg)):
|
||
|
# Note that NuSVR properly supports sample_weight
|
||
|
init = NuSVR(gamma="auto", nu=invalid_nu)
|
||
|
gb = GradientBoostingRegressor(init=init)
|
||
|
gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
|
||
|
|
||
|
|
||
|
def test_early_stopping_n_classes():
|
||
|
# when doing early stopping (_, , y_train, _ = train_test_split(X, y))
|
||
|
# there might be classes in y that are missing in y_train. As the init
|
||
|
# estimator will be trained on y_train, we need to raise an error if this
|
||
|
# happens.
|
||
|
|
||
|
X = [[1]] * 10
|
||
|
y = [0, 0] + [1] * 8 # only 2 negative class over 10 samples
|
||
|
gb = GradientBoostingClassifier(
|
||
|
n_iter_no_change=5, random_state=0, validation_fraction=0.8
|
||
|
)
|
||
|
with pytest.raises(
|
||
|
ValueError, match="The training data after the early stopping split"
|
||
|
):
|
||
|
gb.fit(X, y)
|
||
|
|
||
|
# No error if we let training data be big enough
|
||
|
gb = GradientBoostingClassifier(
|
||
|
n_iter_no_change=5, random_state=0, validation_fraction=0.4
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_gbr_degenerate_feature_importances():
|
||
|
# growing an ensemble of single node trees. See #13620
|
||
|
X = np.zeros((10, 10))
|
||
|
y = np.ones((10,))
|
||
|
gbr = GradientBoostingRegressor().fit(X, y)
|
||
|
assert_array_equal(gbr.feature_importances_, np.zeros(10, dtype=np.float64))
|
||
|
|
||
|
|
||
|
# TODO(1.3): Remove
|
||
|
def test_loss_deprecated():
|
||
|
est1 = GradientBoostingClassifier(loss="deviance", random_state=0)
|
||
|
|
||
|
with pytest.warns(FutureWarning, match=r"The loss.* 'deviance' was deprecated"):
|
||
|
est1.fit(X, y)
|
||
|
|
||
|
est2 = GradientBoostingClassifier(loss="log_loss", random_state=0)
|
||
|
est2.fit(X, y)
|
||
|
assert_allclose(est1.predict(X), est2.predict(X))
|
||
|
|
||
|
|
||
|
# TODO(1.3): remove
|
||
|
@pytest.mark.parametrize(
|
||
|
"Estimator", [GradientBoostingClassifier, GradientBoostingRegressor]
|
||
|
)
|
||
|
def test_loss_attribute_deprecation(Estimator):
|
||
|
# Check that we raise the proper deprecation warning if accessing
|
||
|
# `loss_`.
|
||
|
X = np.array([[1, 2], [3, 4]])
|
||
|
y = np.array([1, 0])
|
||
|
est = Estimator().fit(X, y)
|
||
|
|
||
|
with pytest.warns(FutureWarning, match="`loss_` was deprecated"):
|
||
|
est.loss_
|