projektAI/venv/Lib/site-packages/mlxtend/feature_extraction/rbf_kernel_pca.py
2021-06-06 22:13:05 +02:00

142 lines
4.8 KiB
Python

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Principal Component Analysis for dimensionality reduction.
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause
import numpy as np
from scipy.spatial import distance
from .._base import _BaseModel
class RBFKernelPCA(_BaseModel):
"""
RBF Kernel Principal Component Analysis for dimensionality reduction.
Parameters
----------
gamma : float (default: 15.0)
Free parameter (coefficient) of the RBF kernel.
n_components : int (default: None)
The number of principal components for transformation.
Keeps the original dimensions of the dataset if `None`.
copy_X : bool (default: True)
Copies training data, which is required to compute the projection
of new data via the transform method. Uses a reference to X if False.
Attributes
----------
e_vals_ : array-like, shape=[n_features]
Eigenvalues in sorted order.
e_vecs_ : array-like, shape=[n_features]
Eigenvectors in sorted order.
X_projected_ : array-like, shape=[n_samples, n_components]
Training samples projected along the component axes.
Examples
-----------
For usage examples, please see
http://rasbt.github.io/mlxtend/user_guide/feature_extraction/RBFKernelPCA/
"""
def __init__(self, gamma=15.0, n_components=None, copy_X=True):
if n_components is not None and n_components < 1:
raise AttributeError('n_components must be > 1 or None')
self.n_components = n_components
self.gamma = gamma
self.copy_X = copy_X
self._is_fitted = False
def fit(self, X):
"""Learn model from training data.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
self : object
"""
self._is_fitted = False
self._check_arrays(X=X)
self._fit(X=X)
self._is_fitted = True
return self
def _fit(self, X):
if self.n_components is None or self.n_components > X.shape[1]:
n_components = X.shape[1]
else:
n_components = self.n_components
kernel_mat = self._kernel_matrix(X=X, gamma=self.gamma)
self.e_vals_, self.e_vecs_ = self._eigendecom(kernel_mat)
self.X_projected_ = self._projection_matrix(eig_vecs=self.e_vecs_,
n_components=n_components)
if self.copy_X:
self.X_ = X.copy()
else:
self.X_ = X
return self
def transform(self, X):
""" Apply the non-linear transformation on X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
X_projected : np.ndarray, shape = [n_samples, n_components]
Projected training vectors.
"""
if not hasattr(self, 'X_'):
raise AttributeError('Object as not been fitted, yet.')
self._check_arrays(X=X)
# pair_dist = np.array([np.sum((X - row)**2) for row in self.X_])
pair_dist = np.ones((self.X_.shape[0], X.shape[0]))
for idx in range(X.shape[0]):
pair_dist[:, idx] = ((self.X_ - X[idx])**2).sum(axis=1)
K = np.exp((-1) * self.gamma * pair_dist)
e_vecs = self._projection_matrix(eig_vecs=self.e_vecs_,
n_components=self.n_components)
return K.T.dot(e_vecs / self.e_vals_[:e_vecs.shape[1]])
def _kernel_matrix(self, X, gamma):
# Calculating the squared Euclidean distances for every pair of points
# in the MxN dimensional dataset.
sq_dists = distance.pdist(X, 'sqeuclidean')
# Converting the pairwise distances into a symmetric MxM matrix.
mat_sq_dists = distance.squareform(sq_dists)
# Computing the MxM kernel matrix.
K = np.exp((-1) * gamma * mat_sq_dists)
# Centering the symmetric NxN kernel matrix.
N = K.shape[0]
one_n = np.ones((N, N)) / N
K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n)
return K
def _eigendecom(self, kernel_mat):
e_vals, e_vecs = np.linalg.eigh(kernel_mat)
sort_idx = np.argsort(e_vals)[::-1]
e_vals, e_vecs = e_vals[sort_idx], e_vecs[:, sort_idx]
return e_vals, e_vecs
def _projection_matrix(self, eig_vecs, n_components):
matrix_w = np.vstack([eig_vecs[:, i] for i in range(n_components)]).T
return matrix_w