projektAI/venv/Lib/site-packages/mlxtend/cluster/kmeans.py

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Estimator for Linear Regression
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause


import numpy as np
from .._base import _Cluster
from .._base import _BaseModel
from .._base import _IterativeModel
# from scipy.spatial.distance import euclidean


class Kmeans(_BaseModel, _Cluster, _IterativeModel):
    """ K-means clustering class.

    Added in 0.4.1dev

    Parameters
    ------------
    k : int
        Number of clusters
    max_iter : int (default: 10)
        Number of iterations during cluster assignment.
        Cluster re-assignment stops automatically when the algorithm
        converged.
    convergence_tolerance : float (default: 1e-05)
        Compares current centroids with centroids of the previous iteration
        using the given tolerance (a small positive float)to determine
        if the algorithm converged early.
    random_seed : int (default: None)
        Set random state for the initial centroid assignment.
    print_progress : int (default: 0)
        Prints progress in fitting to stderr.
        0: No output
        1: Iterations elapsed
        2: 1 plus time elapsed
        3: 2 plus estimated time until completion

    Attributes
    -----------
    centroids_ : 2d-array, shape={k, n_features}
        Feature values of the k cluster centroids.
    custers_ : dictionary
        The cluster assignments stored as a Python dictionary;
        the dictionary keys denote the cluster indeces and the items are
        Python lists of the sample indices that were assigned to each
        cluster.
    iterations_ : int
        Number of iterations until convergence.

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/classifier/Kmeans/

    """

    def __init__(self, k, max_iter=10,
                 convergence_tolerance=1e-05,
                 random_seed=None, print_progress=0):

        _BaseModel.__init__(self)
        _Cluster.__init__(self)
        _IterativeModel.__init__(self)
        self.k = k
        self.max_iter = max_iter
        self.convergence_tolerance = convergence_tolerance
        self.random_seed = random_seed
        self.print_progress = print_progress
        self._is_fitted = False

    def _fit(self, X, init_params=True):
        """Learn cluster centroids from training data.

        Called in self.fit

        """
        n_samples = X.shape[0]

        if init_params:
            self.iterations_ = 0
            # initialize centroids
            rgen = np.random.RandomState(self.random_seed)
            idx = rgen.choice(n_samples, self.k, replace=False)
            self.centroids_ = X[idx]

        for _ in range(self.max_iter):
            # assign samples to cluster centroids
            self.clusters_ = {i: [] for i in range(self.k)}
            for sample_idx, cluster_idx in enumerate(
                    self._get_cluster_idx(X=X, centroids=self.centroids_)):
                self.clusters_[cluster_idx].append(sample_idx)

            # recompute centroids
            new_centroids = np.array([np.mean(X[self.clusters_[k]], axis=0)
                                      for k in sorted(self.clusters_.keys())])

            # stop if cluster assignment doesn't change

            if np.allclose(self.centroids_, new_centroids,
                           rtol=self.convergence_tolerance,
                           atol=1e-08, equal_nan=False):
                break
            else:
                self.centroids_ = new_centroids

            self.iterations_ += 1
            if self.print_progress:
                self._print_progress(iteration=self.iterations_,
                                     n_iter=self.max_iter)

        return self

    def _get_cluster_idx(self, X, centroids):
        for sample_idx, sample in enumerate(X):
            # dist = [euclidean(sample, c) for c in self.centroids_]
            dist = np.sqrt(np.sum(np.square(sample - self.centroids_), axis=1))

            yield np.argmin(dist)

    def _predict(self, X):
        """Predict cluster labels of X.

        Called in self.predict

        """
        pred = np.array([idx for idx in self._get_cluster_idx(X=X,
                         centroids=self.centroids_)])
        return pred
Działa 2021-06-06 22:13:05 +02:00			`# Sebastian Raschka 2014-2020`
			`# mlxtend Machine Learning Library Extensions`
			`#`
			`# Estimator for Linear Regression`
			`# Author: Sebastian Raschka <sebastianraschka.com>`
			`#`
			`# License: BSD 3 clause`


			`import numpy as np`
			`from .._base import _Cluster`
			`from .._base import _BaseModel`
			`from .._base import _IterativeModel`
			`# from scipy.spatial.distance import euclidean`


			`class Kmeans(_BaseModel, _Cluster, _IterativeModel):`
			`""" K-means clustering class.`

			`Added in 0.4.1dev`

			`Parameters`
			`------------`
			`k : int`
			`Number of clusters`
			`max_iter : int (default: 10)`
			`Number of iterations during cluster assignment.`
			`Cluster re-assignment stops automatically when the algorithm`
			`converged.`
			`convergence_tolerance : float (default: 1e-05)`
			`Compares current centroids with centroids of the previous iteration`
			`using the given tolerance (a small positive float)to determine`
			`if the algorithm converged early.`
			`random_seed : int (default: None)`
			`Set random state for the initial centroid assignment.`
			`print_progress : int (default: 0)`
			`Prints progress in fitting to stderr.`
			`0: No output`
			`1: Iterations elapsed`
			`2: 1 plus time elapsed`
			`3: 2 plus estimated time until completion`

			`Attributes`
			`-----------`
			`centroids_ : 2d-array, shape={k, n_features}`
			`Feature values of the k cluster centroids.`
			`custers_ : dictionary`
			`The cluster assignments stored as a Python dictionary;`
			`the dictionary keys denote the cluster indeces and the items are`
			`Python lists of the sample indices that were assigned to each`
			`cluster.`
			`iterations_ : int`
			`Number of iterations until convergence.`

			`Examples`
			`-----------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/classifier/Kmeans/`

			`"""`

			`def __init__(self, k, max_iter=10,`
			`convergence_tolerance=1e-05,`
			`random_seed=None, print_progress=0):`

			`_BaseModel.__init__(self)`
			`_Cluster.__init__(self)`
			`_IterativeModel.__init__(self)`
			`self.k = k`
			`self.max_iter = max_iter`
			`self.convergence_tolerance = convergence_tolerance`
			`self.random_seed = random_seed`
			`self.print_progress = print_progress`
			`self._is_fitted = False`

			`def _fit(self, X, init_params=True):`
			`"""Learn cluster centroids from training data.`

			`Called in self.fit`

			`"""`
			`n_samples = X.shape[0]`

			`if init_params:`
			`self.iterations_ = 0`
			`# initialize centroids`
			`rgen = np.random.RandomState(self.random_seed)`
			`idx = rgen.choice(n_samples, self.k, replace=False)`
			`self.centroids_ = X[idx]`

			`for _ in range(self.max_iter):`
			`# assign samples to cluster centroids`
			`self.clusters_ = {i: [] for i in range(self.k)}`
			`for sample_idx, cluster_idx in enumerate(`
			`self._get_cluster_idx(X=X, centroids=self.centroids_)):`
			`self.clusters_[cluster_idx].append(sample_idx)`

			`# recompute centroids`
			`new_centroids = np.array([np.mean(X[self.clusters_[k]], axis=0)`
			`for k in sorted(self.clusters_.keys())])`

			`# stop if cluster assignment doesn't change`

			`if np.allclose(self.centroids_, new_centroids,`
			`rtol=self.convergence_tolerance,`
			`atol=1e-08, equal_nan=False):`
			`break`
			`else:`
			`self.centroids_ = new_centroids`

			`self.iterations_ += 1`
			`if self.print_progress:`
			`self._print_progress(iteration=self.iterations_,`
			`n_iter=self.max_iter)`

			`return self`

			`def _get_cluster_idx(self, X, centroids):`
			`for sample_idx, sample in enumerate(X):`
			`# dist = [euclidean(sample, c) for c in self.centroids_]`
			`dist = np.sqrt(np.sum(np.square(sample - self.centroids_), axis=1))`

			`yield np.argmin(dist)`

			`def _predict(self, X):`
			`"""Predict cluster labels of X.`

			`Called in self.predict`

			`"""`
			`pred = np.array([idx for idx in self._get_cluster_idx(X=X,`
			`centroids=self.centroids_)])`
			`return pred`