2021-04-26 02:14:45 +02:00
|
|
|
import sys
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import os
|
|
|
|
from countries_map import countries
|
2021-05-08 23:47:37 +02:00
|
|
|
import tensorflow as tf
|
|
|
|
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
|
|
|
|
from tensorflow.keras.models import Model
|
|
|
|
from tensorflow.keras.callbacks import EarlyStopping
|
|
|
|
from keras.models import Sequential
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
from tensorflow import keras
|
|
|
|
from tensorflow.keras import layers
|
|
|
|
from tensorflow.keras.layers.experimental import preprocessing
|
2021-04-26 02:14:45 +02:00
|
|
|
|
|
|
|
|
2021-05-08 23:47:37 +02:00
|
|
|
age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2,
|
|
|
|
"35-54 years": 3, "55-74 years": 4, "75+ years": 5}
|
|
|
|
sex = {"male": 0, "female": 1}
|
2021-04-26 02:14:45 +02:00
|
|
|
|
2021-05-08 23:47:37 +02:00
|
|
|
# wczytanie danych
|
|
|
|
sc = pd.read_csv('who_suicide_statistics.csv')
|
|
|
|
print(sc.shape)
|
2021-04-26 02:14:45 +02:00
|
|
|
|
2021-05-08 23:47:37 +02:00
|
|
|
# Usunięcie niepełnych danych
|
|
|
|
sc.dropna(inplace=True)
|
2021-04-26 02:14:45 +02:00
|
|
|
|
2021-05-08 23:47:37 +02:00
|
|
|
# Kategoryzacja
|
|
|
|
sc = pd.get_dummies(
|
|
|
|
sc, columns=['age', 'sex', 'country'], prefix='', prefix_sep='')
|
2021-04-26 02:14:45 +02:00
|
|
|
|
|
|
|
|
2021-05-08 23:47:37 +02:00
|
|
|
# podział na train validate i test
|
2021-04-26 02:14:45 +02:00
|
|
|
train, validate, test = np.split(sc.sample(frac=1, random_state=42),
|
|
|
|
[int(.6*len(sc)), int(.8*len(sc))])
|
|
|
|
|
2021-05-08 23:47:37 +02:00
|
|
|
# podział train set
|
|
|
|
X_train = train.loc[:, train.columns != 'suicides_no']
|
|
|
|
y_train = train[['suicides_no']]
|
|
|
|
X_test = test.loc[:, train.columns != 'suicides_no']
|
|
|
|
y_test = test[['suicides_no']]
|
|
|
|
|
|
|
|
normalizer = preprocessing.Normalization()
|
|
|
|
normalizer.adapt(np.array(X_train))
|
|
|
|
|
|
|
|
first = np.array(X_train[:1])
|
|
|
|
with np.printoptions(precision=2, suppress=True):
|
|
|
|
print('First example:', first)
|
|
|
|
print()
|
|
|
|
print('Normalized:', normalizer(first).numpy())
|
|
|
|
|
|
|
|
model = tf.keras.Sequential([
|
|
|
|
normalizer,
|
|
|
|
layers.Dense(units=1)
|
|
|
|
])
|
|
|
|
model.predict(X_train[:10])
|
|
|
|
|
|
|
|
# Compile model
|
|
|
|
model.compile(
|
|
|
|
optimizer=tf.optimizers.Adam(learning_rate=0.1),
|
|
|
|
loss='mean_absolute_error')
|
|
|
|
|
|
|
|
# Train model
|
|
|
|
history = model.fit(
|
|
|
|
X_train, y_train,
|
2021-05-16 16:53:00 +02:00
|
|
|
batch_size=sys.argv[0],
|
|
|
|
epochs=sys.argv[1],
|
2021-05-08 23:47:37 +02:00
|
|
|
validation_split=0.2)
|
|
|
|
|
|
|
|
test_results = {}
|
|
|
|
|
|
|
|
test_results['model'] = model.evaluate(
|
|
|
|
X_test, y_test, verbose=0)
|
|
|
|
|
|
|
|
test_predictions = model.predict(X_test).flatten()
|
|
|
|
|
|
|
|
# a = plt.axes(aspect='equal')
|
|
|
|
# plt.scatter(y_test, test_predictions)
|
|
|
|
# plt.xlabel('True values [sucides_no]')
|
|
|
|
# plt.ylabel('Predictions values [sucides_no]')
|
|
|
|
# lims = [0, 5000]
|
|
|
|
# plt.xlim(lims)
|
|
|
|
# plt.ylim(lims)
|
|
|
|
# _ = plt.plot(lims, lims)
|
|
|
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
predictions = model.predict(X_test)
|
|
|
|
pd.DataFrame(predictions).to_csv('results.csv')
|