From 4aa18c3b0e5fde6ba5038f0ab9355564753e742d Mon Sep 17 00:00:00 2001 From: Maciej Sobkowiak Date: Sat, 12 Jun 2021 22:59:33 +0200 Subject: [PATCH] dvc.yaml --- dvc.yaml | 17 +++++++++++++ dvc_training.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 dvc.yaml create mode 100644 dvc_training.py diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..48f6552 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,17 @@ +stages: + preprocess: + cmd: python3 preprocesing.py + deps: + - preprocesing.py + - who_suicide_statistics.csv + outs: + - train.csv + train: + cmd: python3 training.py + deps: + - training.py + - train.csv + outs: + - results.csv + - suicide_model.h5 + diff --git a/dvc_training.py b/dvc_training.py new file mode 100644 index 0000000..4a3ee14 --- /dev/null +++ b/dvc_training.py @@ -0,0 +1,68 @@ +import sys +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import os +from countries_map import countries +import tensorflow as tf +from tensorflow.keras.layers import Input, Dense, Activation, Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from keras.models import Sequential +from sklearn.metrics import mean_squared_error +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras.layers.experimental import preprocessing + +EPOCHS = int(sys.argv[1]) +BATCH_SIZE = int(sys.argv[2]) + +train = pd.read_csv('train.csv') +validate = pd.read_csv('validate.csv') +test = pd.read_csv('test.csv') + +# podziaƂ train set +X_train = train.loc[:, train.columns != 'suicides_no'] +y_train = train[['suicides_no']] +X_test = test.loc[:, train.columns != 'suicides_no'] +y_test = test[['suicides_no']] + +normalizer = preprocessing.Normalization() +normalizer.adapt(np.array(X_train)) + +first = np.array(X_train[:1]) +with np.printoptions(precision=2, suppress=True): + print('First example:', first) + print() + print('Normalized:', normalizer(first).numpy()) + +model = tf.keras.Sequential([ + normalizer, + layers.Dense(units=1) +]) +model.predict(X_train[:10]) + +# Compile model +model.compile( + optimizer=tf.optimizers.Adam(learning_rate=0.1), + loss='mean_absolute_error') + +# Train model +history = model.fit( + X_train, y_train, + batch_size=BATCH_SIZE, + epochs=EPOCHS, + validation_split=0.2) + +model.save_weights('suicide_model.h5') + +test_results = {} + +test_results['model'] = model.evaluate( + X_test, y_test, verbose=0) + +test_predictions = model.predict(X_test).flatten() + +predictions = model.predict(X_test) +pd.DataFrame(predictions).to_csv('results.csv') +model.summary()