185 lines
6.0 KiB
Python
185 lines
6.0 KiB
Python
|
# Sebastian Raschka 2014-2020
|
||
|
# mlxtend Machine Learning Library Extensions
|
||
|
#
|
||
|
# Implementation of the mulitnomial logistic regression algorithm for
|
||
|
# classification.
|
||
|
|
||
|
# Author: Sebastian Raschka <sebastianraschka.com>
|
||
|
#
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
import numpy as np
|
||
|
from time import time
|
||
|
from .._base import _BaseModel
|
||
|
from .._base import _IterativeModel
|
||
|
from .._base import _MultiClass
|
||
|
from .._base import _Classifier
|
||
|
|
||
|
|
||
|
class SoftmaxRegression(_BaseModel, _IterativeModel,
|
||
|
_Classifier, _MultiClass):
|
||
|
|
||
|
"""Softmax regression classifier.
|
||
|
|
||
|
Parameters
|
||
|
------------
|
||
|
eta : float (default: 0.01)
|
||
|
Learning rate (between 0.0 and 1.0)
|
||
|
epochs : int (default: 50)
|
||
|
Passes over the training dataset.
|
||
|
Prior to each epoch, the dataset is shuffled
|
||
|
if `minibatches > 1` to prevent cycles in stochastic gradient descent.
|
||
|
l2 : float
|
||
|
Regularization parameter for L2 regularization.
|
||
|
No regularization if l2=0.0.
|
||
|
minibatches : int (default: 1)
|
||
|
The number of minibatches for gradient-based optimization.
|
||
|
If 1: Gradient Descent learning
|
||
|
If len(y): Stochastic Gradient Descent (SGD) online learning
|
||
|
If 1 < minibatches < len(y): SGD Minibatch learning
|
||
|
n_classes : int (default: None)
|
||
|
A positive integer to declare the number of class labels
|
||
|
if not all class labels are present in a partial training set.
|
||
|
Gets the number of class labels automatically if None.
|
||
|
random_seed : int (default: None)
|
||
|
Set random state for shuffling and initializing the weights.
|
||
|
print_progress : int (default: 0)
|
||
|
Prints progress in fitting to stderr.
|
||
|
0: No output
|
||
|
1: Epochs elapsed and cost
|
||
|
2: 1 plus time elapsed
|
||
|
3: 2 plus estimated time until completion
|
||
|
|
||
|
Attributes
|
||
|
-----------
|
||
|
w_ : 2d-array, shape={n_features, 1}
|
||
|
Model weights after fitting.
|
||
|
b_ : 1d-array, shape={1,}
|
||
|
Bias unit after fitting.
|
||
|
cost_ : list
|
||
|
List of floats, the average cross_entropy for each epoch.
|
||
|
|
||
|
Examples
|
||
|
-----------
|
||
|
For usage examples, please see
|
||
|
http://rasbt.github.io/mlxtend/user_guide/classifier/SoftmaxRegression/
|
||
|
|
||
|
"""
|
||
|
def __init__(self, eta=0.01, epochs=50,
|
||
|
l2=0.0,
|
||
|
minibatches=1,
|
||
|
n_classes=None,
|
||
|
random_seed=None,
|
||
|
print_progress=0):
|
||
|
|
||
|
_BaseModel.__init__(self)
|
||
|
_IterativeModel.__init__(self)
|
||
|
_Classifier.__init__(self)
|
||
|
_MultiClass.__init__(self)
|
||
|
|
||
|
self.eta = eta
|
||
|
self.epochs = epochs
|
||
|
self.l2 = l2
|
||
|
self.minibatches = minibatches
|
||
|
self.n_classes = n_classes
|
||
|
self.random_seed = random_seed
|
||
|
self.print_progress = print_progress
|
||
|
self._is_fitted = False
|
||
|
|
||
|
def _net_input(self, X, W, b):
|
||
|
return (X.dot(W) + b)
|
||
|
|
||
|
def _softmax(self, z):
|
||
|
e_x = np.exp(z - z.max(axis=1, keepdims=True))
|
||
|
out = e_x / e_x.sum(axis=1, keepdims=True)
|
||
|
return out
|
||
|
# return (np.exp(z.T) / np.sum(np.exp(z), axis=1)).T
|
||
|
|
||
|
def _cross_entropy(self, output, y_target):
|
||
|
return - np.sum(np.log(output) * (y_target), axis=1)
|
||
|
|
||
|
def _cost(self, cross_entropy):
|
||
|
L2_term = self.l2 * np.sum(self.w_ ** 2)
|
||
|
cross_entropy = cross_entropy + L2_term
|
||
|
return 0.5 * np.mean(cross_entropy)
|
||
|
|
||
|
def _to_classlabels(self, z):
|
||
|
return z.argmax(axis=1)
|
||
|
|
||
|
def _fit(self, X, y, init_params=True):
|
||
|
self._check_target_array(y)
|
||
|
if init_params:
|
||
|
if self.n_classes is None:
|
||
|
self.n_classes = np.max(y) + 1
|
||
|
self._n_features = X.shape[1]
|
||
|
|
||
|
self.b_, self.w_ = self._init_params(
|
||
|
weights_shape=(self._n_features, self.n_classes),
|
||
|
bias_shape=(self.n_classes,),
|
||
|
random_seed=self.random_seed)
|
||
|
self.cost_ = []
|
||
|
|
||
|
y_enc = self._one_hot(y=y, n_labels=self.n_classes, dtype=np.float)
|
||
|
|
||
|
self.init_time_ = time()
|
||
|
rgen = np.random.RandomState(self.random_seed)
|
||
|
for i in range(self.epochs):
|
||
|
for idx in self._yield_minibatches_idx(
|
||
|
rgen=rgen,
|
||
|
n_batches=self.minibatches,
|
||
|
data_ary=y,
|
||
|
shuffle=True):
|
||
|
|
||
|
# givens:
|
||
|
# w_ -> n_feat x n_classes
|
||
|
# b_ -> n_classes
|
||
|
|
||
|
# net_input, softmax and diff -> n_samples x n_classes:
|
||
|
net = self._net_input(X[idx], self.w_, self.b_)
|
||
|
softm = self._softmax(net)
|
||
|
diff = softm - y_enc[idx]
|
||
|
|
||
|
# gradient -> n_features x n_classes
|
||
|
grad = np.dot(X[idx].T, diff)
|
||
|
|
||
|
# update in opp. direction of the cost gradient
|
||
|
self.w_ -= (self.eta * grad +
|
||
|
self.eta * self.l2 * self.w_)
|
||
|
self.b_ -= (self.eta * np.sum(diff, axis=0))
|
||
|
|
||
|
# compute cost of the whole epoch
|
||
|
net = self._net_input(X, self.w_, self.b_)
|
||
|
softm = self._softmax(net)
|
||
|
cross_ent = self._cross_entropy(output=softm, y_target=y_enc)
|
||
|
cost = self._cost(cross_ent)
|
||
|
self.cost_.append(cost)
|
||
|
|
||
|
if self.print_progress:
|
||
|
self._print_progress(iteration=i + 1,
|
||
|
n_iter=self.epochs,
|
||
|
cost=cost)
|
||
|
|
||
|
return self
|
||
|
|
||
|
def predict_proba(self, X):
|
||
|
"""Predict class probabilities of X from the net input.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
|
||
|
Training vectors, where n_samples is the number of samples and
|
||
|
n_features is the number of features.
|
||
|
|
||
|
Returns
|
||
|
----------
|
||
|
Class probabilties : array-like, shape= [n_samples, n_classes]
|
||
|
|
||
|
"""
|
||
|
net = self._net_input(X, self.w_, self.b_)
|
||
|
softm = self._softmax(net)
|
||
|
return softm
|
||
|
|
||
|
def _predict(self, X):
|
||
|
probas = self.predict_proba(X)
|
||
|
return self._to_classlabels(probas)
|