3RNN/Lib/site-packages/sklearn/ensemble/_hist_gradient_boosting/utils.py

"""This module contains utility routines."""

from ...base import is_classifier
from .binning import _BinMapper


def get_equivalent_estimator(estimator, lib="lightgbm", n_classes=None):
    """Return an unfitted estimator from another lib with matching hyperparams.

    This utility function takes care of renaming the sklearn parameters into
    their LightGBM, XGBoost or CatBoost equivalent parameters.

    # unmapped XGB parameters:
    # - min_samples_leaf
    # - min_data_in_bin
    # - min_split_gain (there is min_split_loss though?)

    # unmapped Catboost parameters:
    # max_leaves
    # min_*
    """

    if lib not in ("lightgbm", "xgboost", "catboost"):
        raise ValueError(
            "accepted libs are lightgbm, xgboost, and catboost.  got {}".format(lib)
        )

    sklearn_params = estimator.get_params()

    if sklearn_params["loss"] == "auto":
        raise ValueError(
            "auto loss is not accepted. We need to know if "
            "the problem is binary or multiclass classification."
        )
    if sklearn_params["early_stopping"]:
        raise NotImplementedError("Early stopping should be deactivated.")

    lightgbm_loss_mapping = {
        "squared_error": "regression_l2",
        "absolute_error": "regression_l1",
        "log_loss": "binary" if n_classes == 2 else "multiclass",
        "gamma": "gamma",
        "poisson": "poisson",
    }

    lightgbm_params = {
        "objective": lightgbm_loss_mapping[sklearn_params["loss"]],
        "learning_rate": sklearn_params["learning_rate"],
        "n_estimators": sklearn_params["max_iter"],
        "num_leaves": sklearn_params["max_leaf_nodes"],
        "max_depth": sklearn_params["max_depth"],
        "min_data_in_leaf": sklearn_params["min_samples_leaf"],
        "reg_lambda": sklearn_params["l2_regularization"],
        "max_bin": sklearn_params["max_bins"],
        "min_data_in_bin": 1,
        "min_sum_hessian_in_leaf": 1e-3,
        "min_split_gain": 0,
        "verbosity": 10 if sklearn_params["verbose"] else -10,
        "boost_from_average": True,
        "enable_bundle": False,  # also makes feature order consistent
        "subsample_for_bin": _BinMapper().subsample,
        "poisson_max_delta_step": 1e-12,
        "feature_fraction_bynode": sklearn_params["max_features"],
    }

    if sklearn_params["loss"] == "log_loss" and n_classes > 2:
        # LightGBM multiplies hessians by 2 in multiclass loss.
        lightgbm_params["min_sum_hessian_in_leaf"] *= 2
        # LightGBM 3.0 introduced a different scaling of the hessian for the multiclass
        # case.
        # It is equivalent of scaling the learning rate.
        # See https://github.com/microsoft/LightGBM/pull/3256.
        if n_classes is not None:
            lightgbm_params["learning_rate"] *= n_classes / (n_classes - 1)

    # XGB
    xgboost_loss_mapping = {
        "squared_error": "reg:linear",
        "absolute_error": "LEAST_ABSOLUTE_DEV_NOT_SUPPORTED",
        "log_loss": "reg:logistic" if n_classes == 2 else "multi:softmax",
        "gamma": "reg:gamma",
        "poisson": "count:poisson",
    }

    xgboost_params = {
        "tree_method": "hist",
        "grow_policy": "lossguide",  # so that we can set max_leaves
        "objective": xgboost_loss_mapping[sklearn_params["loss"]],
        "learning_rate": sklearn_params["learning_rate"],
        "n_estimators": sklearn_params["max_iter"],
        "max_leaves": sklearn_params["max_leaf_nodes"],
        "max_depth": sklearn_params["max_depth"] or 0,
        "lambda": sklearn_params["l2_regularization"],
        "max_bin": sklearn_params["max_bins"],
        "min_child_weight": 1e-3,
        "verbosity": 2 if sklearn_params["verbose"] else 0,
        "silent": sklearn_params["verbose"] == 0,
        "n_jobs": -1,
        "colsample_bynode": sklearn_params["max_features"],
    }

    # Catboost
    catboost_loss_mapping = {
        "squared_error": "RMSE",
        # catboost does not support MAE when leaf_estimation_method is Newton
        "absolute_error": "LEAST_ASBOLUTE_DEV_NOT_SUPPORTED",
        "log_loss": "Logloss" if n_classes == 2 else "MultiClass",
        "gamma": None,
        "poisson": "Poisson",
    }

    catboost_params = {
        "loss_function": catboost_loss_mapping[sklearn_params["loss"]],
        "learning_rate": sklearn_params["learning_rate"],
        "iterations": sklearn_params["max_iter"],
        "depth": sklearn_params["max_depth"],
        "reg_lambda": sklearn_params["l2_regularization"],
        "max_bin": sklearn_params["max_bins"],
        "feature_border_type": "Median",
        "leaf_estimation_method": "Newton",
        "verbose": bool(sklearn_params["verbose"]),
    }

    if lib == "lightgbm":
        from lightgbm import LGBMClassifier, LGBMRegressor

        if is_classifier(estimator):
            return LGBMClassifier(**lightgbm_params)
        else:
            return LGBMRegressor(**lightgbm_params)

    elif lib == "xgboost":
        from xgboost import XGBClassifier, XGBRegressor

        if is_classifier(estimator):
            return XGBClassifier(**xgboost_params)
        else:
            return XGBRegressor(**xgboost_params)

    else:
        from catboost import CatBoostClassifier, CatBoostRegressor

        if is_classifier(estimator):
            return CatBoostClassifier(**catboost_params)
        else:
            return CatBoostRegressor(**catboost_params)
1.0 2024-05-26 19:49:15 +02:00			`"""This module contains utility routines."""`

			`from ...base import is_classifier`
			`from .binning import _BinMapper`


			`def get_equivalent_estimator(estimator, lib="lightgbm", n_classes=None):`
			`"""Return an unfitted estimator from another lib with matching hyperparams.`

			`This utility function takes care of renaming the sklearn parameters into`
			`their LightGBM, XGBoost or CatBoost equivalent parameters.`

			`# unmapped XGB parameters:`
			`# - min_samples_leaf`
			`# - min_data_in_bin`
			`# - min_split_gain (there is min_split_loss though?)`

			`# unmapped Catboost parameters:`
			`# max_leaves`
			`# min_*`
			`"""`

			`if lib not in ("lightgbm", "xgboost", "catboost"):`
			`raise ValueError(`
			`"accepted libs are lightgbm, xgboost, and catboost. got {}".format(lib)`
			`)`

			`sklearn_params = estimator.get_params()`

			`if sklearn_params["loss"] == "auto":`
			`raise ValueError(`
			`"auto loss is not accepted. We need to know if "`
			`"the problem is binary or multiclass classification."`
			`)`
			`if sklearn_params["early_stopping"]:`
			`raise NotImplementedError("Early stopping should be deactivated.")`

			`lightgbm_loss_mapping = {`
			`"squared_error": "regression_l2",`
			`"absolute_error": "regression_l1",`
			`"log_loss": "binary" if n_classes == 2 else "multiclass",`
			`"gamma": "gamma",`
			`"poisson": "poisson",`
			`}`

			`lightgbm_params = {`
			`"objective": lightgbm_loss_mapping[sklearn_params["loss"]],`
			`"learning_rate": sklearn_params["learning_rate"],`
			`"n_estimators": sklearn_params["max_iter"],`
			`"num_leaves": sklearn_params["max_leaf_nodes"],`
			`"max_depth": sklearn_params["max_depth"],`
			`"min_data_in_leaf": sklearn_params["min_samples_leaf"],`
			`"reg_lambda": sklearn_params["l2_regularization"],`
			`"max_bin": sklearn_params["max_bins"],`
			`"min_data_in_bin": 1,`
			`"min_sum_hessian_in_leaf": 1e-3,`
			`"min_split_gain": 0,`
			`"verbosity": 10 if sklearn_params["verbose"] else -10,`
			`"boost_from_average": True,`
			`"enable_bundle": False, # also makes feature order consistent`
			`"subsample_for_bin": _BinMapper().subsample,`
			`"poisson_max_delta_step": 1e-12,`
			`"feature_fraction_bynode": sklearn_params["max_features"],`
			`}`

			`if sklearn_params["loss"] == "log_loss" and n_classes > 2:`
			`# LightGBM multiplies hessians by 2 in multiclass loss.`
			`lightgbm_params["min_sum_hessian_in_leaf"] *= 2`
			`# LightGBM 3.0 introduced a different scaling of the hessian for the multiclass`
			`# case.`
			`# It is equivalent of scaling the learning rate.`
			`# See https://github.com/microsoft/LightGBM/pull/3256.`
			`if n_classes is not None:`
			`lightgbm_params["learning_rate"] *= n_classes / (n_classes - 1)`

			`# XGB`
			`xgboost_loss_mapping = {`
			`"squared_error": "reg:linear",`
			`"absolute_error": "LEAST_ABSOLUTE_DEV_NOT_SUPPORTED",`
			`"log_loss": "reg:logistic" if n_classes == 2 else "multi:softmax",`
			`"gamma": "reg:gamma",`
			`"poisson": "count:poisson",`
			`}`

			`xgboost_params = {`
			`"tree_method": "hist",`
			`"grow_policy": "lossguide", # so that we can set max_leaves`
			`"objective": xgboost_loss_mapping[sklearn_params["loss"]],`
			`"learning_rate": sklearn_params["learning_rate"],`
			`"n_estimators": sklearn_params["max_iter"],`
			`"max_leaves": sklearn_params["max_leaf_nodes"],`
			`"max_depth": sklearn_params["max_depth"] or 0,`
			`"lambda": sklearn_params["l2_regularization"],`
			`"max_bin": sklearn_params["max_bins"],`
			`"min_child_weight": 1e-3,`
			`"verbosity": 2 if sklearn_params["verbose"] else 0,`
			`"silent": sklearn_params["verbose"] == 0,`
			`"n_jobs": -1,`
			`"colsample_bynode": sklearn_params["max_features"],`
			`}`

			`# Catboost`
			`catboost_loss_mapping = {`
			`"squared_error": "RMSE",`
			`# catboost does not support MAE when leaf_estimation_method is Newton`
			`"absolute_error": "LEAST_ASBOLUTE_DEV_NOT_SUPPORTED",`
			`"log_loss": "Logloss" if n_classes == 2 else "MultiClass",`
			`"gamma": None,`
			`"poisson": "Poisson",`
			`}`

			`catboost_params = {`
			`"loss_function": catboost_loss_mapping[sklearn_params["loss"]],`
			`"learning_rate": sklearn_params["learning_rate"],`
			`"iterations": sklearn_params["max_iter"],`
			`"depth": sklearn_params["max_depth"],`
			`"reg_lambda": sklearn_params["l2_regularization"],`
			`"max_bin": sklearn_params["max_bins"],`
			`"feature_border_type": "Median",`
			`"leaf_estimation_method": "Newton",`
			`"verbose": bool(sklearn_params["verbose"]),`
			`}`

			`if lib == "lightgbm":`
			`from lightgbm import LGBMClassifier, LGBMRegressor`

			`if is_classifier(estimator):`
			`return LGBMClassifier(**lightgbm_params)`
			`else:`
			`return LGBMRegressor(**lightgbm_params)`

			`elif lib == "xgboost":`
			`from xgboost import XGBClassifier, XGBRegressor`

			`if is_classifier(estimator):`
			`return XGBClassifier(**xgboost_params)`
			`else:`
			`return XGBRegressor(**xgboost_params)`

			`else:`
			`from catboost import CatBoostClassifier, CatBoostRegressor`

			`if is_classifier(estimator):`
			`return CatBoostClassifier(**catboost_params)`
			`else:`
			`return CatBoostRegressor(**catboost_params)`