ium_434788/Zadanie_08_and_09_MLflow.py

75 lines
2.3 KiB
Python
Raw Normal View History

2021-05-13 19:18:15 +02:00
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
2021-05-15 17:24:05 +02:00
import sys
2021-05-15 16:59:57 +02:00
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.optimizers import Adam
2021-05-15 20:16:39 +02:00
from datetime import datetime
2021-05-15 20:19:58 +02:00
import os
2021-05-16 11:47:17 +02:00
import sys
import mlflow
2021-05-15 15:04:56 +02:00
2021-05-16 11:47:17 +02:00
with mlflow.start_run():
2021-05-13 19:18:15 +02:00
2021-05-16 15:19:28 +02:00
batch_param = int(sys.argv[1]) if len(sys.argv) > 1 else 16
epoch_param = int(sys.argv[2]) if len(sys.argv) > 2 else 15
2021-05-13 19:18:15 +02:00
2021-05-23 14:33:43 +02:00
mlflow.set_experiment("s434788")
2021-05-16 15:12:06 +02:00
mlflow.log_param("batch_size", batch_param)
mlflow.log_param("epochs", epoch_param)
2021-05-13 19:18:15 +02:00
2021-05-16 15:19:28 +02:00
wine=pd.read_csv('winequality-red.csv')
2021-05-13 19:18:15 +02:00
2021-05-15 20:26:44 +02:00
y = wine['quality']
x = wine.drop('quality', axis=1)
2021-05-13 19:18:15 +02:00
2021-05-15 20:26:44 +02:00
citricacid = x['fixed acidity'] * x['citric acid']
citric_acidity = pd.DataFrame(citricacid, columns=['citric_accidity'])
2021-05-13 19:18:15 +02:00
2021-05-15 20:26:44 +02:00
density_acidity = x['fixed acidity'] * x['density']
density_acidity = pd.DataFrame(density_acidity, columns=['density_acidity'])
2021-05-13 19:18:15 +02:00
2021-05-15 20:26:44 +02:00
x = wine.join(citric_acidity).join(density_acidity)
2021-05-15 16:59:57 +02:00
2021-05-15 20:26:44 +02:00
bins = (2, 5, 8)
gnames = ['bad', 'nice']
y = pd.cut(y, bins = bins, labels = gnames)
2021-05-15 16:59:57 +02:00
2021-05-15 20:26:44 +02:00
enc = LabelEncoder()
yenc = enc.fit_transform(y)
2021-05-15 16:59:57 +02:00
2021-05-15 20:26:44 +02:00
scale = StandardScaler()
scaled_x = scale.fit_transform(x)
2021-05-15 16:59:57 +02:00
2021-05-16 15:19:28 +02:00
x_train, x_test, y_train, y_test = train_test_split(scaled_x,yenc , test_size=0.2,train_size=0.8, random_state=21)
2021-05-15 20:26:44 +02:00
NeuralModel = Sequential([
Dense(128, activation='relu', input_shape=(14,)),
Dense(32, activation='relu'),
Dense(64, activation='relu'),
Dense(64, activation='relu'),
Dense(64, activation='relu'),
Dense(1, activation='sigmoid')
])
2021-05-15 16:59:57 +02:00
2021-05-15 20:26:44 +02:00
rms = Adam(lr=0.0003)
2021-05-15 16:59:57 +02:00
2021-05-15 20:26:44 +02:00
NeuralModel.compile(optimizer=rms, loss='binary_crossentropy', metrics=['accuracy'])
2021-05-16 15:19:28 +02:00
NeuralModel.fit(x_train, y_train, batch_size= batch_param, epochs = epoch_param) #verbose = 1
2021-05-15 20:26:44 +02:00
2021-05-16 15:19:28 +02:00
y_pred = NeuralModel.predict(x_test)
2021-05-15 20:45:46 +02:00
y_pred = np.around(y_pred, decimals=0)
2021-05-16 15:19:28 +02:00
results = accuracy_score(y_test,y_pred)
print(f"accuracy: {results}")
2021-05-15 20:45:46 +02:00
2021-05-23 14:33:43 +02:00
mlflow.log_metric("Accuracy", results)
mlflow.keras.save_model(NeuralModel, "my_model")