diff --git a/mlflow_train.py b/mlflow_train.py new file mode 100644 index 0000000..118714c --- /dev/null +++ b/mlflow_train.py @@ -0,0 +1,73 @@ +import tensorflow as tf +import mlflow +import mlflow.sklearn +import pandas as pd +import sklearn +import sklearn.model_selection +import numpy as np +from sklearn.metrics import mean_absolute_error, mean_squared_error + + +def normalize(df,feature_name): + result = df.copy() + max_value = df[feature_name].max() + min_value = df[feature_name].min() + result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value) + return result + +mlflow.set_experiment("s452662") + +cars = pd.read_csv('zbior_ium/Car_Prices_Poland_Kaggle.csv') + +cars = cars.drop(73436) #wiersz z błednymi danymi + +cars_normalized = normalize(cars,'vol_engine') + +cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1) +cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1) +cars_train.rename(columns = {list(cars_train)[0]: 'id'}, inplace = True) +cars_test.rename(columns = {list(cars_test)[0]: 'id'}, inplace = True) +cars_train.to_csv('train.csv') +cars_test.to_csv('test.csv') + +feature_cols = ['year', 'mileage', 'vol_engine'] +inputs = tf.keras.Input(shape=(len(feature_cols),)) + +x = tf.keras.layers.Dense(10, activation='relu')(inputs) +x = tf.keras.layers.Dense(10, activation='relu')(x) +outputs = tf.keras.layers.Dense(1, activation='linear')(x) + +model = tf.keras.Model(inputs=inputs, outputs=outputs) + +model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='mse', metrics=['mae']) + + +with mlflow.start_run() as run: + print("MLflow run experiment_id: {0}".format(run.info.experiment_id)) + print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri)) + + model.fit(cars_train[feature_cols], cars_train['price'], epochs=100) + + model.save('model.h5') + + metrics = model.evaluate(cars_train[feature_cols], cars_train['price']) + + predictions = model.predict(cars_test[feature_cols]) + predicted_prices = [p[0] for p in predictions] + + mae = mean_absolute_error(cars_test['price'], [round(p[0]) for p in predictions]) + mse = mean_squared_error(cars_test['price'], [round(p[0]) for p in predictions]) + rmse = np.sqrt(mse) + + print(" MAE: %s" % mae) + print(" MSE: %s" % mse) + print(" RMSE: %s" % rmse) + + mlflow.log_metric("rmse", rmse) + mlflow.log_metric("mse", mse) + mlflow.log_metric("mae", mae) + + model.save('model.h5') + +