69 lines
2.1 KiB
Python
69 lines
2.1 KiB
Python
from tensorflow.keras.models import Sequential, load_model
|
|
from tensorflow.keras.layers import Dense
|
|
from sklearn.metrics import accuracy_score, classification_report
|
|
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
import numpy as np
|
|
import sys
|
|
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
|
from tensorflow.keras.optimizers import Adam
|
|
from datetime import datetime
|
|
import os
|
|
import sys
|
|
import mlflow
|
|
|
|
with mlflow.start_run():
|
|
|
|
batch_param = int(sys.argv[1]) if len(sys.argv) > 1 else 16
|
|
epoch_param = int(sys.argv[2]) if len(sys.argv) > 2 else 5
|
|
|
|
#mlflow.log_param("batch_size: ", batch_param)
|
|
#mlflow.log_param("epochs: ", epoch_param)
|
|
|
|
wine=pd.read_csv('train.csv')
|
|
|
|
y = wine['quality']
|
|
x = wine.drop('quality', axis=1)
|
|
|
|
citricacid = x['fixed acidity'] * x['citric acid']
|
|
citric_acidity = pd.DataFrame(citricacid, columns=['citric_accidity'])
|
|
|
|
density_acidity = x['fixed acidity'] * x['density']
|
|
density_acidity = pd.DataFrame(density_acidity, columns=['density_acidity'])
|
|
|
|
|
|
x = wine.join(citric_acidity).join(density_acidity)
|
|
|
|
bins = (2, 5, 8)
|
|
gnames = ['bad', 'nice']
|
|
y = pd.cut(y, bins = bins, labels = gnames)
|
|
|
|
enc = LabelEncoder()
|
|
yenc = enc.fit_transform(y)
|
|
|
|
scale = StandardScaler()
|
|
scaled_x = scale.fit_transform(x)
|
|
|
|
NeuralModel = Sequential([
|
|
Dense(128, activation='relu', input_shape=(14,)),
|
|
Dense(32, activation='relu'),
|
|
Dense(64, activation='relu'),
|
|
Dense(64, activation='relu'),
|
|
Dense(64, activation='relu'),
|
|
Dense(1, activation='sigmoid')
|
|
])
|
|
|
|
rms = Adam(lr=0.0003)
|
|
|
|
NeuralModel.compile(optimizer=rms, loss='binary_crossentropy', metrics=['accuracy'])
|
|
|
|
NeuralModel.fit(scaled_x, yenc, batch_size= batch_param, epochs = epoch_param) #verbose = 1
|
|
|
|
#TO TYLKO NA POTRZEBY ZADANIA
|
|
y_pred = NeuralModel.predict(scaled_x)
|
|
|
|
y_pred = np.around(y_pred, decimals=0)
|
|
|
|
results = accuracy_score(yenc,y_pred)
|
|
|
|
#mlflow.log_metric("Accuracy: ", results) |