import sys import pandas as pd import numpy as np import matplotlib.pyplot as plt import os from countries_map import countries import tensorflow as tf from tensorflow.keras.layers import Input, Dense, Activation, Dropout from tensorflow.keras.models import Model from tensorflow.keras.callbacks import EarlyStopping from keras.models import Sequential from sklearn.metrics import mean_squared_error from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.layers.experimental import preprocessing EPOCHS = int(sys.argv[1]) BATCH_SIZE = int(sys.argv[2]) age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2, "35-54 years": 3, "55-74 years": 4, "75+ years": 5} sex = {"male": 0, "female": 1} # wczytanie danych sc = pd.read_csv('who_suicide_statistics.csv') print(sc.shape) # Usunięcie niepełnych danych sc.dropna(inplace=True) # Kategoryzacja sc = pd.get_dummies( sc, columns=['age', 'sex', 'country'], prefix='', prefix_sep='') # podział na train validate i test train, validate, test = np.split(sc.sample(frac=1, random_state=42), [int(.6*len(sc)), int(.8*len(sc))]) # podział train set X_train = train.loc[:, train.columns != 'suicides_no'] y_train = train[['suicides_no']] X_test = test.loc[:, train.columns != 'suicides_no'] y_test = test[['suicides_no']] normalizer = preprocessing.Normalization() normalizer.adapt(np.array(X_train)) first = np.array(X_train[:1]) with np.printoptions(precision=2, suppress=True): print('First example:', first) print() print('Normalized:', normalizer(first).numpy()) model = tf.keras.Sequential([ normalizer, layers.Dense(units=1) ]) model.predict(X_train[:10]) # Compile model model.compile( optimizer=tf.optimizers.Adam(learning_rate=0.1), loss='mean_absolute_error') # Train model history = model.fit( X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.2) model.save_weights('suicide_model.h5') test_results = {} test_results['model'] = model.evaluate( X_test, y_test, verbose=0) test_predictions = model.predict(X_test).flatten() # a = plt.axes(aspect='equal') # plt.scatter(y_test, test_predictions) # plt.xlabel('True values [sucides_no]') # plt.ylabel('Predictions values [sucides_no]') # lims = [0, 5000] # plt.xlim(lims) # plt.ylim(lims) # _ = plt.plot(lims, lims) # plt.show() predictions = model.predict(X_test) pd.DataFrame(predictions).to_csv('results.csv') model.summary()