AI_PROJEKT_2021/generator.py
2021-06-22 19:02:44 +02:00

43 lines
1.3 KiB
Python

import numpy as np
import pandas as pd
etykiety = list(range(4))
features = ['color', 'transparency', 'smell', 'elastic', 'dirt', 'weight', 'sound', 'reflectiveness']
n_features = len(features)
products = ['glass', 'mixed', 'metal', 'paper']
n_products = len(products)
glass_mean = [0.5, 0.8, 0.1, 0.01, 0.1, 0.9, 0.95, 0.5 ]
glass_var = [1, 0.4, 0.1, 0.1, 0.4, 0.2, 0.2, 0.3]
mixed_mean = [0.5, 0.3, 0.8, 0.5, 0.7, 0.6, 0.5, 0.3]
mixed_var = [1, 0.2, 0.6, 1, 0.5, 0.5, 0.4, 0.6]
metal_mean = [0.8, 0.01, 0.1, 0.3 , 0.5, 0.7, 0.6, 0.4]
metal_var = [0.1,0.1, 0.2, 0.1, 0.3, 0.3, 0.3, 0.3]
paper_mean = [0.5, 0.2, 0.4, 0.9, 0.2, 0.4, 0.2, 0.1]
paper_var = [1, 0.1, 0.3, 0.4, 0.5, 0.3, 0.3, 0.4]
n_samples = 1000
data = np.zeros((n_samples * n_products, n_features + 1))
for product, means, vars in zip(range(4), [glass_mean, mixed_mean, metal_mean, paper_mean ], [glass_var, mixed_var, metal_var, paper_var]):
for index, feature in enumerate(features):
data[n_samples*product:n_samples *(product+1),index] = np.random.normal(means[index], vars[index]/3, 1000)
data[n_samples*product:n_samples *(product+1),-1] = product
data[:,:-1][data[:,:-1]<0] = 0
data[:,:-1][data[:,:-1]>1] = 1
df = pd.DataFrame(data, columns=[*features, "label"])
df.to_csv("data.csv")