39 lines
881 B
Python
39 lines
881 B
Python
|
"""
|
||
|
Common utilities for testing clustering.
|
||
|
|
||
|
"""
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
###############################################################################
|
||
|
# Generate sample data
|
||
|
|
||
|
|
||
|
def generate_clustered_data(
|
||
|
seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4
|
||
|
):
|
||
|
prng = np.random.RandomState(seed)
|
||
|
|
||
|
# the data is voluntary shifted away from zero to check clustering
|
||
|
# algorithm robustness with regards to non centered data
|
||
|
means = (
|
||
|
np.array(
|
||
|
[
|
||
|
[1, 1, 1, 0],
|
||
|
[-1, -1, 0, 1],
|
||
|
[1, -1, 1, 1],
|
||
|
[-1, 1, 1, 0],
|
||
|
]
|
||
|
)
|
||
|
+ 10
|
||
|
)
|
||
|
|
||
|
X = np.empty((0, n_features))
|
||
|
for i in range(n_clusters):
|
||
|
X = np.r_[
|
||
|
X,
|
||
|
means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features),
|
||
|
]
|
||
|
return X
|