""" Common utilities for testing clustering. """ import numpy as np ############################################################################### # Generate sample data def generate_clustered_data( seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4 ): prng = np.random.RandomState(seed) # the data is voluntary shifted away from zero to check clustering # algorithm robustness with regards to non centered data means = ( np.array( [ [1, 1, 1, 0], [-1, -1, 0, 1], [1, -1, 1, 1], [-1, 1, 1, 0], ] ) + 10 ) X = np.empty((0, n_features)) for i in range(n_clusters): X = np.r_[ X, means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features), ] return X