import sys import pandas as pd import numpy as np import matplotlib.pyplot as plt CUTOFF = int(sys.argv[1]) sc = pd.read_csv('who_suicide_statistics.csv') age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2, "35-54 years": 3, "55-74 years": 4, "75+ years": 5} sex = {"male": 0, "female": 1} # Usunięcie niepełnych danych sc.dropna(inplace=True) # Kategoryzacja sc = pd.get_dummies( sc, columns=['age', 'sex', 'country'], prefix='', prefix_sep='') # CUTOFF sc = sc.head(CUTOFF) # podział na train validate i test train, validate, test = np.split(sc.sample(frac=1, random_state=42), [int(.6*len(sc)), int(.8*len(sc))]) # zapis do plików train.to_csv('train.csv') validate.to_csv('validate.csv') test.to_csv('test.csv') print(train) print(validate) print(test)