From 9477485803eb3775de24f48c6c07be11e856b071 Mon Sep 17 00:00:00 2001 From: s434695 Date: Mon, 17 May 2021 12:29:19 +0200 Subject: [PATCH] m --- MLproject | 11 +++++++ train_evaluate/Dockerfile | 36 ++++++++------------- vgsales-mlflow.py | 67 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 22 deletions(-) create mode 100644 MLproject create mode 100644 vgsales-mlflow.py diff --git a/MLproject b/MLproject new file mode 100644 index 0000000..7c4b16d --- /dev/null +++ b/MLproject @@ -0,0 +1,11 @@ +name: 434695-mlflow + +docker_env: + image: shroomy/ium:1 + +entry_points: + main: + parameters: + epochs: {type: int, default: 15} + batch_size: {type: int, default: 16} + command: "python3 vgsales-mlflow.py {epochs} {batch_size}" diff --git a/train_evaluate/Dockerfile b/train_evaluate/Dockerfile index 3352365..4f24908 100644 --- a/train_evaluate/Dockerfile +++ b/train_evaluate/Dockerfile @@ -5,27 +5,19 @@ FROM ubuntu:latest RUN apt update && apt install -y figlet RUN apt install -y git RUN apt install -y python3-pip -RUN pip3 install --user setuptools -RUN pip3 install --user kaggle -RUN pip3 install --user pandas -RUN pip3 install --user numpy -RUN pip3 install --user seaborn -RUN pip3 install --user sklearn -RUN pip3 install --user matplotlib -RUN pip3 install --user tensorflow -RUN pip3 install --user sacred -RUN pip3 install --user wget -RUN pip3 install --user keras -RUN pip3 install --user GitPython -RUN pip3 install --user pymongo +RUN pip3 install setuptools +RUN pip3 install kaggle +RUN pip3 install pandas +RUN pip3 install numpy +RUN pip3 install seaborn +RUN pip3 install sklearn +RUN pip3 install matplotlib +RUN pip3 install tensorflow +RUN pip3 install sacred +RUN pip3 install wget +RUN pip3 install keras +RUN pip3 install GitPython +RUN pip3 install pymongo +RUN pip3 install mlflow -WORKDIR /app - -COPY ./train.py ./ -COPY ./evaluate.py ./ -COPY ./sacred1.py ./ -COPY ./sacred2.py ./ -COPY ./skrypt.sh ./ -COPY ./zadanie2.py ./ -COPY ./zadanie5.py ./ \ No newline at end of file diff --git a/vgsales-mlflow.py b/vgsales-mlflow.py new file mode 100644 index 0000000..3799c52 --- /dev/null +++ b/vgsales-mlflow.py @@ -0,0 +1,67 @@ +import sys +from keras.backend import batch_dot, mean +import pandas as pd +import numpy as np +from six import int2byte +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.models import Sequential +import mlflow + + + +def my_main(epochs, batch_size): + + vgsales=pd.read_csv('vgsales.csv') + + vgsales['Nintendo'] = vgsales['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) + + Y = vgsales['Nintendo'] + X = vgsales.drop(['Rank','Name','Platform','Year','Genre','Publisher','Nintendo'],axis = 1) + + X_train, X_test, y_train, y_test = train_test_split(X,Y , test_size=0.2,train_size=0.8, random_state=21) + + model = Sequential() + model.add(Dense(9, input_dim = X_train, kernel_initializer='normal', activation='relu')) + model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) + + early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + + + model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + + + model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) + + + prediction = model.predict(X_test) + + + rmse = mean_squared_error(y_test, prediction) + + + model.save('vgsales_model.h5') + + return rmse, model + + + +epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 15 +batch_size = int(sys.argv[2]) if len(sys.argv) > 2 else 16 + + +with mlflow.start_run(): + + rmse, model = my_main(epochs, batch_size) + + mlflow.log_param("epochs", epochs) + mlflow.log_param("batch_size", batch_size) + mlflow.log_metric("rmse", rmse) + mlflow.keras.log_model(model, 'vgsales_model.h5') \ No newline at end of file