projektAI/venv/Lib/site-packages/mlxtend/feature_extraction/linear_discriminant_analysis.py

155 lines
5.5 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Linear Discriminant Analysis for dimensionality reduction
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause
import numpy as np
from .._base import _BaseModel
class LinearDiscriminantAnalysis(_BaseModel):
"""
Linear Discriminant Analysis Class
Parameters
----------
n_discriminants : int (default: None)
The number of discrimants for transformation.
Keeps the original dimensions of the dataset if `None`.
Attributes
----------
w_ : array-like, shape=[n_features, n_discriminants]
Projection matrix
e_vals_ : array-like, shape=[n_features]
Eigenvalues in sorted order.
e_vecs_ : array-like, shape=[n_features]
Eigenvectors in sorted order.
Examples
-----------
For usage examples, please see
http://rasbt.github.io/mlxtend/user_guide/feature_extraction/LinearDiscriminantAnalysis/
"""
def __init__(self, n_discriminants=None):
if n_discriminants is not None and n_discriminants < 1:
raise AttributeError('n_discriminants must be > 1 or None')
self.n_discriminants = n_discriminants
def fit(self, X, y, n_classes=None):
""" Fit the LDA model with X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
n_classes : int (default: None)
A positive integer to declare the number of class labels
if not all class labels are present in a partial training set.
Gets the number of class labels automatically if None.
Returns
-------
self : object
"""
self._is_fitted = False
self._check_arrays(X=X, y=y)
self._fit(X=X, y=y, n_classes=n_classes)
self._is_fitted = True
return self
def _fit(self, X, y, n_classes=None):
if self.n_discriminants is None or self.n_discriminants > X.shape[1]:
n_discriminants = X.shape[1]
else:
n_discriminants = self.n_discriminants
if n_classes:
self._n_classes = n_classes
else:
self._n_classes = np.max(y) + 1
self._n_features = X.shape[1]
mean_vecs = self._mean_vectors(X=X, y=y, n_classes=self._n_classes)
within_scatter = self._within_scatter(X=X,
y=y,
n_classes=self._n_classes,
mean_vectors=mean_vecs)
between_scatter = self._between_scatter(X=X,
y=y,
mean_vectors=mean_vecs)
self.e_vals_, self.e_vecs_ = self._eigendecom(
within_scatter=within_scatter, between_scatter=between_scatter)
self.w_ = self._projection_matrix(eig_vals=self.e_vals_,
eig_vecs=self.e_vecs_,
n_discriminants=n_discriminants)
return self
def transform(self, X):
""" Apply the linear transformation on X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
X_projected : np.ndarray, shape = [n_samples, n_discriminants]
Projected training vectors.
"""
if not hasattr(self, 'w_'):
raise AttributeError('Object as not been fitted, yet.')
self._check_arrays(X=X)
return X.dot(self.w_)
def _mean_vectors(self, X, y, n_classes):
mean_vectors = []
for cl in range(n_classes):
mean_vectors.append(np.mean(X[y == cl], axis=0))
return mean_vectors
def _within_scatter(self, X, y, n_classes, mean_vectors):
S_W = np.zeros((X.shape[1], X.shape[1]))
for cl, mv in zip(range(n_classes), mean_vectors):
class_sc_mat = np.zeros((X.shape[1], X.shape[1]))
for row in X[y == cl]:
row, mv = row.reshape(X.shape[1], 1), mv.reshape(X.shape[1], 1)
class_sc_mat += (row - mv).dot((row - mv).T)
S_W += class_sc_mat
return S_W
def _between_scatter(self, X, y, mean_vectors):
overall_mean = np.mean(X, axis=0)
S_B = np.zeros((X.shape[1], X.shape[1]))
for i, mean_vec in enumerate(mean_vectors):
n = X[y == i + 1, :].shape[0]
mean_vec = mean_vec.reshape(X.shape[1], 1)
overall_mean = overall_mean.reshape(X.shape[1], 1)
S_B += n * (mean_vec - overall_mean).dot(
(mean_vec - overall_mean).T)
return S_B
def _eigendecom(self, within_scatter, between_scatter):
e_vals, e_vecs = np.linalg.eig(np.linalg.inv(within_scatter).dot(
between_scatter))
sort_idx = np.argsort(e_vals)[::-1]
e_vals, e_vecs = e_vals[sort_idx], e_vecs[:, sort_idx]
return e_vals, e_vecs
def _projection_matrix(self, eig_vals, eig_vecs, n_discriminants):
matrix_w = np.vstack([eig_vecs[:, i] for
i in range(n_discriminants)]).T
return matrix_w