dvc.yaml
All checks were successful
s434784-evaluation/pipeline/head This commit looks good
s434784-training/pipeline/head This commit looks good

This commit is contained in:
Maciej Sobkowiak 2021-06-12 22:59:33 +02:00
parent f403cbba03
commit 4aa18c3b0e
2 changed files with 85 additions and 0 deletions

17
dvc.yaml Normal file
View File

@ -0,0 +1,17 @@
stages:
preprocess:
cmd: python3 preprocesing.py
deps:
- preprocesing.py
- who_suicide_statistics.csv
outs:
- train.csv
train:
cmd: python3 training.py
deps:
- training.py
- train.csv
outs:
- results.csv
- suicide_model.h5

68
dvc_training.py Normal file
View File

@ -0,0 +1,68 @@
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from countries_map import countries
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import Sequential
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
EPOCHS = int(sys.argv[1])
BATCH_SIZE = int(sys.argv[2])
train = pd.read_csv('train.csv')
validate = pd.read_csv('validate.csv')
test = pd.read_csv('test.csv')
# podział train set
X_train = train.loc[:, train.columns != 'suicides_no']
y_train = train[['suicides_no']]
X_test = test.loc[:, train.columns != 'suicides_no']
y_test = test[['suicides_no']]
normalizer = preprocessing.Normalization()
normalizer.adapt(np.array(X_train))
first = np.array(X_train[:1])
with np.printoptions(precision=2, suppress=True):
print('First example:', first)
print()
print('Normalized:', normalizer(first).numpy())
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=1)
])
model.predict(X_train[:10])
# Compile model
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error')
# Train model
history = model.fit(
X_train, y_train,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_split=0.2)
model.save_weights('suicide_model.h5')
test_results = {}
test_results['model'] = model.evaluate(
X_test, y_test, verbose=0)
test_predictions = model.predict(X_test).flatten()
predictions = model.predict(X_test)
pd.DataFrame(predictions).to_csv('results.csv')
model.summary()