This commit is contained in:
zgolebiewska 2024-05-26 14:23:37 +02:00
parent 567074ec4c
commit 48175b11be

View File

@ -1,51 +1,64 @@
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
import tensorflow as tf import tensorflow as tf
import pandas as pd import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import json import json
import mlflow
mlflow.set_tracking_uri("http://localhost:5000") # Ustawienie adresu MLflow Tracking Server ex = Experiment("s464906_experiment")
df = pd.read_csv('OrangeQualityData.csv') mongo_url = "mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017"
ex.observers.append(MongoObserver(url=mongo_url, db_name='sacred'))
ex.observers.append(FileStorageObserver('logs'))
encoder = LabelEncoder() @ex.config
df["Color"] = encoder.fit_transform(df["Color"]) def cfg():
df["Variety"] = encoder.fit_transform(df["Variety"]) epochs = 100
df["Blemishes"] = df["Blemishes (Y/N)"].apply(lambda x: 1 if x.startswith("Y") else 0)
df.drop(columns=["Blemishes (Y/N)"], inplace=True) @ex.automain
def train_model(epochs):
df = pd.read_csv('OrangeQualityData.csv')
X = df.drop(columns=["Quality (1-5)"]) encoder = LabelEncoder()
y = df["Quality (1-5)"] df["Color"] = encoder.fit_transform(df["Color"])
df["Variety"] = encoder.fit_transform(df["Variety"])
df["Blemishes"] = df["Blemishes (Y/N)"].apply(lambda x: 1 if x.startswith("Y") else 0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) df.drop(columns=["Blemishes (Y/N)"], inplace=True)
scaler = StandardScaler() X = df.drop(columns=["Quality (1-5)"])
X_train_scaled = scaler.fit_transform(X_train) y = df["Quality (1-5)"]
X_test_scaled = scaler.transform(X_test)
model = tf.keras.Sequential([ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)), tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(1) tf.keras.layers.Dense(1)
]) ])
model.compile(optimizer='sgd', loss='mse') model.compile(optimizer='sgd', loss='mse')
with mlflow.start_run(): history = model.fit(X_train_scaled, y_train, epochs=epochs, verbose=0, validation_data=(X_test_scaled, y_test))
mlflow.log_param("optimizer", 'sgd')
mlflow.log_param("loss_function", 'mse')
mlflow.log_param("epochs", 100)
history = model.fit(X_train_scaled, y_train, epochs=100, verbose=0, validation_data=(X_test_scaled, y_test)) ex.log_scalar("epochs", epochs)
for key, value in history.history.items(): ex.add_artifact(__file__)
mlflow.log_metric(key, value[-1]) # Logujemy ostatnią wartość metryki
model.save('orange_quality_model_tf.h5') model.save('orange_quality_model_tf.h5')
ex.add_artifact('orange_quality_model_tf.h5')
for key, value in history.history.items():
ex.log_scalar(key, value[-1])
predictions = model.predict(X_test_scaled) predictions = model.predict(X_test_scaled)
with open('predictions_tf.json', 'w') as f: with open('predictions_tf.json', 'w') as f:
json.dump(predictions.tolist(), f, indent=4) json.dump(predictions.tolist(), f, indent=4)
ex.add_artifact('predictions_tf.json')
return 'Training completed successfully'