projektAI/venv/Lib/site-packages/sklearn/neural_network/_stochastic_optimizers.py

"""Stochastic optimization methods for MLP
"""

# Authors: Jiyuan Qian <jq401@nyu.edu>
# License: BSD 3 clause

import numpy as np


class BaseOptimizer:
    """Base (Stochastic) gradient descent optimizer

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    Attributes
    ----------
    learning_rate : float
        the current learning rate
    """

    def __init__(self, params, learning_rate_init=0.1):
        self.params = [param for param in params]
        self.learning_rate_init = learning_rate_init
        self.learning_rate = float(learning_rate_init)

    def update_params(self, grads):
        """Update parameters with given gradients

        Parameters
        ----------
        grads : list, length = len(params)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params
        """
        updates = self._get_updates(grads)
        for param, update in zip(self.params, updates):
            param += update

    def iteration_ends(self, time_step):
        """Perform update to learning rate and potentially other states at the
        end of an iteration
        """
        pass

    def trigger_stopping(self, msg, verbose):
        """Decides whether it is time to stop training

        Parameters
        ----------
        msg : str
            Message passed in for verbose output

        verbose : bool
            Print message to stdin if True

        Returns
        -------
        is_stopping : bool
            True if training needs to stop
        """
        if verbose:
            print(msg + " Stopping.")
        return True


class SGDOptimizer(BaseOptimizer):
    """Stochastic gradient descent optimizer with momentum

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'
        Learning rate schedule for weight updates.

        -'constant', is a constant learning rate given by
         'learning_rate_init'.

        -'invscaling' gradually decreases the learning rate 'learning_rate_' at
          each time step 't' using an inverse scaling exponent of 'power_t'.
          learning_rate_ = learning_rate_init / pow(t, power_t)

        -'adaptive', keeps the learning rate constant to
         'learning_rate_init' as long as the training keeps decreasing.
         Each time 2 consecutive epochs fail to decrease the training loss by
         tol, or fail to increase validation score by tol if 'early_stopping'
         is on, the current learning rate is divided by 5.

    momentum : float, default=0.9
        Value of momentum used, must be larger than or equal to 0

    nesterov : bool, default=True
        Whether to use nesterov's momentum or not. Use nesterov's if True

    power_t : float, default=0.5
        Power of time step 't' in inverse scaling. See `lr_schedule` for
        more details.

    Attributes
    ----------
    learning_rate : float
        the current learning rate

    velocities : list, length = len(params)
        velocities that are used to update params
    """

    def __init__(self, params, learning_rate_init=0.1, lr_schedule='constant',
                 momentum=0.9, nesterov=True, power_t=0.5):
        super().__init__(params, learning_rate_init)

        self.lr_schedule = lr_schedule
        self.momentum = momentum
        self.nesterov = nesterov
        self.power_t = power_t
        self.velocities = [np.zeros_like(param) for param in params]

    def iteration_ends(self, time_step):
        """Perform updates to learning rate and potential other states at the
        end of an iteration

        Parameters
        ----------
        time_step : int
            number of training samples trained on so far, used to update
            learning rate for 'invscaling'
        """
        if self.lr_schedule == 'invscaling':
            self.learning_rate = (float(self.learning_rate_init) /
                                  (time_step + 1) ** self.power_t)

    def trigger_stopping(self, msg, verbose):
        if self.lr_schedule != 'adaptive':
            if verbose:
                print(msg + " Stopping.")
            return True

        if self.learning_rate <= 1e-6:
            if verbose:
                print(msg + " Learning rate too small. Stopping.")
            return True

        self.learning_rate /= 5.
        if verbose:
            print(msg + " Setting learning rate to %f" %
                  self.learning_rate)
        return False

    def _get_updates(self, grads):
        """Get the values used to update params with given gradients

        Parameters
        ----------
        grads : list, length = len(coefs_) + len(intercepts_)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params

        Returns
        -------
        updates : list, length = len(grads)
            The values to add to params
        """
        updates = [self.momentum * velocity - self.learning_rate * grad
                   for velocity, grad in zip(self.velocities, grads)]
        self.velocities = updates

        if self.nesterov:
            updates = [self.momentum * velocity - self.learning_rate * grad
                       for velocity, grad in zip(self.velocities, grads)]

        return updates


class AdamOptimizer(BaseOptimizer):
    """Stochastic gradient descent optimizer with Adam

    Note: All default values are from the original Adam paper

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.001
        The initial learning rate used. It controls the step-size in updating
        the weights

    beta_1 : float, default=0.9
        Exponential decay rate for estimates of first moment vector, should be
        in [0, 1)

    beta_2 : float, default=0.999
        Exponential decay rate for estimates of second moment vector, should be
        in [0, 1)

    epsilon : float, default=1e-8
        Value for numerical stability

    Attributes
    ----------
    learning_rate : float
        The current learning rate

    t : int
        Timestep

    ms : list, length = len(params)
        First moment vectors

    vs : list, length = len(params)
        Second moment vectors

    References
    ----------
    Kingma, Diederik, and Jimmy Ba.
    "Adam: A method for stochastic optimization."
    arXiv preprint arXiv:1412.6980 (2014).
    """

    def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,
                 beta_2=0.999, epsilon=1e-8):
        super().__init__(params, learning_rate_init)

        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.epsilon = epsilon
        self.t = 0
        self.ms = [np.zeros_like(param) for param in params]
        self.vs = [np.zeros_like(param) for param in params]

    def _get_updates(self, grads):
        """Get the values used to update params with given gradients

        Parameters
        ----------
        grads : list, length = len(coefs_) + len(intercepts_)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params

        Returns
        -------
        updates : list, length = len(grads)
            The values to add to params
        """
        self.t += 1
        self.ms = [self.beta_1 * m + (1 - self.beta_1) * grad
                   for m, grad in zip(self.ms, grads)]
        self.vs = [self.beta_2 * v + (1 - self.beta_2) * (grad ** 2)
                   for v, grad in zip(self.vs, grads)]
        self.learning_rate = (self.learning_rate_init *
                              np.sqrt(1 - self.beta_2 ** self.t) /
                              (1 - self.beta_1 ** self.t))
        updates = [-self.learning_rate * m / (np.sqrt(v) + self.epsilon)
                   for m, v in zip(self.ms, self.vs)]
        return updates
Działa 2021-06-06 22:13:05 +02:00			`"""Stochastic optimization methods for MLP`
			`"""`

			`# Authors: Jiyuan Qian <jq401@nyu.edu>`
			`# License: BSD 3 clause`

			`import numpy as np`


			`class BaseOptimizer:`
			`"""Base (Stochastic) gradient descent optimizer`

			`Parameters`
			`----------`
			`params : list, length = len(coefs_) + len(intercepts_)`
			`The concatenated list containing coefs_ and intercepts_ in MLP model.`
			`Used for initializing velocities and updating params`

			`learning_rate_init : float, default=0.1`
			`The initial learning rate used. It controls the step-size in updating`
			`the weights`

			`Attributes`
			`----------`
			`learning_rate : float`
			`the current learning rate`
			`"""`

			`def __init__(self, params, learning_rate_init=0.1):`
			`self.params = [param for param in params]`
			`self.learning_rate_init = learning_rate_init`
			`self.learning_rate = float(learning_rate_init)`

			`def update_params(self, grads):`
			`"""Update parameters with given gradients`

			`Parameters`
			`----------`
			`grads : list, length = len(params)`
			`Containing gradients with respect to coefs_ and intercepts_ in MLP`
			`model. So length should be aligned with params`
			`"""`
			`updates = self._get_updates(grads)`
			`for param, update in zip(self.params, updates):`
			`param += update`

			`def iteration_ends(self, time_step):`
			`"""Perform update to learning rate and potentially other states at the`
			`end of an iteration`
			`"""`
			`pass`

			`def trigger_stopping(self, msg, verbose):`
			`"""Decides whether it is time to stop training`

			`Parameters`
			`----------`
			`msg : str`
			`Message passed in for verbose output`

			`verbose : bool`
			`Print message to stdin if True`

			`Returns`
			`-------`
			`is_stopping : bool`
			`True if training needs to stop`
			`"""`
			`if verbose:`
			`print(msg + " Stopping.")`
			`return True`


			`class SGDOptimizer(BaseOptimizer):`
			`"""Stochastic gradient descent optimizer with momentum`

			`Parameters`
			`----------`
			`params : list, length = len(coefs_) + len(intercepts_)`
			`The concatenated list containing coefs_ and intercepts_ in MLP model.`
			`Used for initializing velocities and updating params`

			`learning_rate_init : float, default=0.1`
			`The initial learning rate used. It controls the step-size in updating`
			`the weights`

			`lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'`
			`Learning rate schedule for weight updates.`

			`-'constant', is a constant learning rate given by`
			`'learning_rate_init'.`

			`-'invscaling' gradually decreases the learning rate 'learning_rate_' at`
			`each time step 't' using an inverse scaling exponent of 'power_t'.`
			`learning_rate_ = learning_rate_init / pow(t, power_t)`

			`-'adaptive', keeps the learning rate constant to`
			`'learning_rate_init' as long as the training keeps decreasing.`
			`Each time 2 consecutive epochs fail to decrease the training loss by`
			`tol, or fail to increase validation score by tol if 'early_stopping'`
			`is on, the current learning rate is divided by 5.`

			`momentum : float, default=0.9`
			`Value of momentum used, must be larger than or equal to 0`

			`nesterov : bool, default=True`
			`Whether to use nesterov's momentum or not. Use nesterov's if True`

			`power_t : float, default=0.5`
			Power of time step 't' in inverse scaling. See `lr_schedule` for
			`more details.`

			`Attributes`
			`----------`
			`learning_rate : float`
			`the current learning rate`

			`velocities : list, length = len(params)`
			`velocities that are used to update params`
			`"""`

			`def __init__(self, params, learning_rate_init=0.1, lr_schedule='constant',`
			`momentum=0.9, nesterov=True, power_t=0.5):`
			`super().__init__(params, learning_rate_init)`

			`self.lr_schedule = lr_schedule`
			`self.momentum = momentum`
			`self.nesterov = nesterov`
			`self.power_t = power_t`
			`self.velocities = [np.zeros_like(param) for param in params]`

			`def iteration_ends(self, time_step):`
			`"""Perform updates to learning rate and potential other states at the`
			`end of an iteration`

			`Parameters`
			`----------`
			`time_step : int`
			`number of training samples trained on so far, used to update`
			`learning rate for 'invscaling'`
			`"""`
			`if self.lr_schedule == 'invscaling':`
			`self.learning_rate = (float(self.learning_rate_init) /`
			`(time_step + 1) ** self.power_t)`

			`def trigger_stopping(self, msg, verbose):`
			`if self.lr_schedule != 'adaptive':`
			`if verbose:`
			`print(msg + " Stopping.")`
			`return True`

			`if self.learning_rate <= 1e-6:`
			`if verbose:`
			`print(msg + " Learning rate too small. Stopping.")`
			`return True`

			`self.learning_rate /= 5.`
			`if verbose:`
			`print(msg + " Setting learning rate to %f" %`
			`self.learning_rate)`
			`return False`

			`def _get_updates(self, grads):`
			`"""Get the values used to update params with given gradients`

			`Parameters`
			`----------`
			`grads : list, length = len(coefs_) + len(intercepts_)`
			`Containing gradients with respect to coefs_ and intercepts_ in MLP`
			`model. So length should be aligned with params`

			`Returns`
			`-------`
			`updates : list, length = len(grads)`
			`The values to add to params`
			`"""`
			`updates = [self.momentum * velocity - self.learning_rate * grad`
			`for velocity, grad in zip(self.velocities, grads)]`
			`self.velocities = updates`

			`if self.nesterov:`
			`updates = [self.momentum * velocity - self.learning_rate * grad`
			`for velocity, grad in zip(self.velocities, grads)]`

			`return updates`


			`class AdamOptimizer(BaseOptimizer):`
			`"""Stochastic gradient descent optimizer with Adam`

			`Note: All default values are from the original Adam paper`

			`Parameters`
			`----------`
			`params : list, length = len(coefs_) + len(intercepts_)`
			`The concatenated list containing coefs_ and intercepts_ in MLP model.`
			`Used for initializing velocities and updating params`

			`learning_rate_init : float, default=0.001`
			`The initial learning rate used. It controls the step-size in updating`
			`the weights`

			`beta_1 : float, default=0.9`
			`Exponential decay rate for estimates of first moment vector, should be`
			`in [0, 1)`

			`beta_2 : float, default=0.999`
			`Exponential decay rate for estimates of second moment vector, should be`
			`in [0, 1)`

			`epsilon : float, default=1e-8`
			`Value for numerical stability`

			`Attributes`
			`----------`
			`learning_rate : float`
			`The current learning rate`

			`t : int`
			`Timestep`

			`ms : list, length = len(params)`
			`First moment vectors`

			`vs : list, length = len(params)`
			`Second moment vectors`

			`References`
			`----------`
			`Kingma, Diederik, and Jimmy Ba.`
			`"Adam: A method for stochastic optimization."`
			`arXiv preprint arXiv:1412.6980 (2014).`
			`"""`

			`def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,`
			`beta_2=0.999, epsilon=1e-8):`
			`super().__init__(params, learning_rate_init)`

			`self.beta_1 = beta_1`
			`self.beta_2 = beta_2`
			`self.epsilon = epsilon`
			`self.t = 0`
			`self.ms = [np.zeros_like(param) for param in params]`
			`self.vs = [np.zeros_like(param) for param in params]`

			`def _get_updates(self, grads):`
			`"""Get the values used to update params with given gradients`

			`Parameters`
			`----------`
			`grads : list, length = len(coefs_) + len(intercepts_)`
			`Containing gradients with respect to coefs_ and intercepts_ in MLP`
			`model. So length should be aligned with params`

			`Returns`
			`-------`
			`updates : list, length = len(grads)`
			`The values to add to params`
			`"""`
			`self.t += 1`
			`self.ms = [self.beta_1 * m + (1 - self.beta_1) * grad`
			`for m, grad in zip(self.ms, grads)]`
			`self.vs = [self.beta_2 * v + (1 - self.beta_2) * (grad ** 2)`
			`for v, grad in zip(self.vs, grads)]`
			`self.learning_rate = (self.learning_rate_init *`
			`np.sqrt(1 - self.beta_2 ** self.t) /`
			`(1 - self.beta_1 ** self.t))`
			`updates = [-self.learning_rate * m / (np.sqrt(v) + self.epsilon)`
			`for m, v in zip(self.ms, self.vs)]`
			`return updates`