29 lines
848 B
Python
29 lines
848 B
Python
|
"""
|
||
|
Common utilities for testing clustering.
|
||
|
|
||
|
"""
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
###############################################################################
|
||
|
# Generate sample data
|
||
|
|
||
|
def generate_clustered_data(seed=0, n_clusters=3, n_features=2,
|
||
|
n_samples_per_cluster=20, std=.4):
|
||
|
prng = np.random.RandomState(seed)
|
||
|
|
||
|
# the data is voluntary shifted away from zero to check clustering
|
||
|
# algorithm robustness with regards to non centered data
|
||
|
means = np.array([[1, 1, 1, 0],
|
||
|
[-1, -1, 0, 1],
|
||
|
[1, -1, 1, 1],
|
||
|
[-1, 1, 1, 0],
|
||
|
]) + 10
|
||
|
|
||
|
X = np.empty((0, n_features))
|
||
|
for i in range(n_clusters):
|
||
|
X = np.r_[X, means[i][:n_features]
|
||
|
+ std * prng.randn(n_samples_per_cluster, n_features)]
|
||
|
return X
|