43 lines
1.3 KiB
Python
43 lines
1.3 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
|
|
etykiety = list(range(4))
|
|
|
|
features = ['color', 'transparency', 'smell', 'elastic', 'dirt', 'weight', 'sound', 'reflectiveness']
|
|
n_features = len(features)
|
|
|
|
products = ['glass', 'mixed', 'metal', 'paper']
|
|
n_products = len(products)
|
|
|
|
glass_mean = [0.5, 0.8, 0.1, 0.01, 0.1, 0.9, 0.95, 0.5 ]
|
|
glass_var = [1, 0.4, 0.1, 0.1, 0.4, 0.2, 0.2, 0.3]
|
|
|
|
mixed_mean = [0.5, 0.3, 0.8, 0.5, 0.7, 0.6, 0.5, 0.3]
|
|
mixed_var = [1, 0.2, 0.6, 1, 0.5, 0.5, 0.4, 0.6]
|
|
|
|
metal_mean = [0.8, 0.01, 0.1, 0.3 , 0.5, 0.7, 0.6, 0.4]
|
|
metal_var = [0.1,0.1, 0.2, 0.1, 0.3, 0.3, 0.3, 0.3]
|
|
|
|
paper_mean = [0.5, 0.2, 0.4, 0.9, 0.2, 0.4, 0.2, 0.1]
|
|
paper_var = [1, 0.1, 0.3, 0.4, 0.5, 0.3, 0.3, 0.4]
|
|
|
|
|
|
|
|
|
|
n_samples = 1000
|
|
|
|
data = np.zeros((n_samples * n_products, n_features + 1))
|
|
|
|
for product, means, vars in zip(range(4), [glass_mean, mixed_mean, metal_mean, paper_mean ], [glass_var, mixed_var, metal_var, paper_var]):
|
|
for index, feature in enumerate(features):
|
|
data[n_samples*product:n_samples *(product+1),index] = np.random.normal(means[index], vars[index]/3, 1000)
|
|
|
|
data[n_samples*product:n_samples *(product+1),-1] = product
|
|
|
|
|
|
|
|
data[:,:-1][data[:,:-1]<0] = 0
|
|
data[:,:-1][data[:,:-1]>1] = 1
|
|
|
|
df = pd.DataFrame(data, columns=[*features, "label"])
|
|
df.to_csv("data.csv") |