38 lines
880 B
Python
38 lines
880 B
Python
"""
|
|
Common utilities for testing clustering.
|
|
|
|
"""
|
|
|
|
import numpy as np
|
|
|
|
###############################################################################
|
|
# Generate sample data
|
|
|
|
|
|
def generate_clustered_data(
|
|
seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4
|
|
):
|
|
prng = np.random.RandomState(seed)
|
|
|
|
# the data is voluntary shifted away from zero to check clustering
|
|
# algorithm robustness with regards to non centered data
|
|
means = (
|
|
np.array(
|
|
[
|
|
[1, 1, 1, 0],
|
|
[-1, -1, 0, 1],
|
|
[1, -1, 1, 1],
|
|
[-1, 1, 1, 0],
|
|
]
|
|
)
|
|
+ 10
|
|
)
|
|
|
|
X = np.empty((0, n_features))
|
|
for i in range(n_clusters):
|
|
X = np.r_[
|
|
X,
|
|
means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features),
|
|
]
|
|
return X
|