dvc.yaml
This commit is contained in:
parent
f403cbba03
commit
4aa18c3b0e
17
dvc.yaml
Normal file
17
dvc.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
stages:
|
||||||
|
preprocess:
|
||||||
|
cmd: python3 preprocesing.py
|
||||||
|
deps:
|
||||||
|
- preprocesing.py
|
||||||
|
- who_suicide_statistics.csv
|
||||||
|
outs:
|
||||||
|
- train.csv
|
||||||
|
train:
|
||||||
|
cmd: python3 training.py
|
||||||
|
deps:
|
||||||
|
- training.py
|
||||||
|
- train.csv
|
||||||
|
outs:
|
||||||
|
- results.csv
|
||||||
|
- suicide_model.h5
|
||||||
|
|
68
dvc_training.py
Normal file
68
dvc_training.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import os
|
||||||
|
from countries_map import countries
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
|
||||||
|
from tensorflow.keras.models import Model
|
||||||
|
from tensorflow.keras.callbacks import EarlyStopping
|
||||||
|
from keras.models import Sequential
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from tensorflow import keras
|
||||||
|
from tensorflow.keras import layers
|
||||||
|
from tensorflow.keras.layers.experimental import preprocessing
|
||||||
|
|
||||||
|
EPOCHS = int(sys.argv[1])
|
||||||
|
BATCH_SIZE = int(sys.argv[2])
|
||||||
|
|
||||||
|
train = pd.read_csv('train.csv')
|
||||||
|
validate = pd.read_csv('validate.csv')
|
||||||
|
test = pd.read_csv('test.csv')
|
||||||
|
|
||||||
|
# podział train set
|
||||||
|
X_train = train.loc[:, train.columns != 'suicides_no']
|
||||||
|
y_train = train[['suicides_no']]
|
||||||
|
X_test = test.loc[:, train.columns != 'suicides_no']
|
||||||
|
y_test = test[['suicides_no']]
|
||||||
|
|
||||||
|
normalizer = preprocessing.Normalization()
|
||||||
|
normalizer.adapt(np.array(X_train))
|
||||||
|
|
||||||
|
first = np.array(X_train[:1])
|
||||||
|
with np.printoptions(precision=2, suppress=True):
|
||||||
|
print('First example:', first)
|
||||||
|
print()
|
||||||
|
print('Normalized:', normalizer(first).numpy())
|
||||||
|
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
normalizer,
|
||||||
|
layers.Dense(units=1)
|
||||||
|
])
|
||||||
|
model.predict(X_train[:10])
|
||||||
|
|
||||||
|
# Compile model
|
||||||
|
model.compile(
|
||||||
|
optimizer=tf.optimizers.Adam(learning_rate=0.1),
|
||||||
|
loss='mean_absolute_error')
|
||||||
|
|
||||||
|
# Train model
|
||||||
|
history = model.fit(
|
||||||
|
X_train, y_train,
|
||||||
|
batch_size=BATCH_SIZE,
|
||||||
|
epochs=EPOCHS,
|
||||||
|
validation_split=0.2)
|
||||||
|
|
||||||
|
model.save_weights('suicide_model.h5')
|
||||||
|
|
||||||
|
test_results = {}
|
||||||
|
|
||||||
|
test_results['model'] = model.evaluate(
|
||||||
|
X_test, y_test, verbose=0)
|
||||||
|
|
||||||
|
test_predictions = model.predict(X_test).flatten()
|
||||||
|
|
||||||
|
predictions = model.predict(X_test)
|
||||||
|
pd.DataFrame(predictions).to_csv('results.csv')
|
||||||
|
model.summary()
|
Loading…
Reference in New Issue
Block a user