import numpy as np import pandas as pd etykiety = list(range(4)) features = ['color', 'transparency', 'smell', 'elastic', 'dirt', 'weight', 'sound', 'reflectiveness'] n_features = len(features) products = ['glass', 'mixed', 'metal', 'paper'] n_products = len(products) glass_mean = [0.5, 0.8, 0.1, 0.01, 0.1, 0.9, 0.95, 0.5 ] glass_var = [1, 0.4, 0.1, 0.1, 0.4, 0.2, 0.2, 0.3] mixed_mean = [0.5, 0.3, 0.8, 0.5, 0.7, 0.6, 0.5, 0.3] mixed_var = [1, 0.2, 0.6, 1, 0.5, 0.5, 0.4, 0.6] metal_mean = [0.8, 0.01, 0.1, 0.3 , 0.5, 0.7, 0.6, 0.4] metal_var = [0.1,0.1, 0.2, 0.1, 0.3, 0.3, 0.3, 0.3] paper_mean = [0.5, 0.2, 0.4, 0.9, 0.2, 0.4, 0.2, 0.1] paper_var = [1, 0.1, 0.3, 0.4, 0.5, 0.3, 0.3, 0.4] n_samples = 1000 data = np.zeros((n_samples * n_products, n_features + 1)) for product, means, vars in zip(range(4), [glass_mean, mixed_mean, metal_mean, paper_mean ], [glass_var, mixed_var, metal_var, paper_var]): for index, feature in enumerate(features): data[n_samples*product:n_samples *(product+1),index] = np.random.normal(means[index], vars[index]/3, 1000) data[n_samples*product:n_samples *(product+1),-1] = product data[:,:-1][data[:,:-1]<0] = 0 data[:,:-1][data[:,:-1]>1] = 1 df = pd.DataFrame(data, columns=[*features, "label"]) df.to_csv("data.csv")