import tensorflow as tf import mlflow import mlflow.sklearn import pandas as pd import sklearn import sklearn.model_selection import numpy as np from sklearn.metrics import mean_absolute_error, mean_squared_error def normalize(df,feature_name): result = df.copy() max_value = df[feature_name].max() min_value = df[feature_name].min() result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value) return result mlflow.set_experiment("s452662") cars = pd.read_csv('zbior_ium/Car_Prices_Poland_Kaggle.csv') cars = cars.drop(73436) #wiersz z błednymi danymi cars_normalized = normalize(cars,'vol_engine') cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1) cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1) cars_train.rename(columns = {list(cars_train)[0]: 'id'}, inplace = True) cars_test.rename(columns = {list(cars_test)[0]: 'id'}, inplace = True) cars_train.to_csv('train.csv') cars_test.to_csv('test.csv') feature_cols = ['year', 'mileage', 'vol_engine'] inputs = tf.keras.Input(shape=(len(feature_cols),)) x = tf.keras.layers.Dense(10, activation='relu')(inputs) x = tf.keras.layers.Dense(10, activation='relu')(x) outputs = tf.keras.layers.Dense(1, activation='linear')(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae']) with mlflow.start_run() as run: print("MLflow run experiment_id: {0}".format(run.info.experiment_id)) print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri)) model.fit(cars_train[feature_cols], cars_train['price'], epochs=100) model.save('model.h5') metrics = model.evaluate(cars_train[feature_cols], cars_train['price']) predictions = model.predict(cars_test[feature_cols]) predicted_prices = [p[0] for p in predictions] mae = mean_absolute_error(cars_test['price'], [round(p[0]) for p in predictions]) mse = mean_squared_error(cars_test['price'], [round(p[0]) for p in predictions]) rmse = np.sqrt(mse) print(" MAE: %s" % mae) print(" MSE: %s" % mse) print(" RMSE: %s" % rmse) mlflow.log_metric("rmse", rmse) mlflow.log_metric("mse", mse) mlflow.log_metric("mae", mae) model.save('model.h5')