ium_452662/mlflow_train.py

import tensorflow as tf
import mlflow
import mlflow.sklearn
import pandas as pd
import sklearn
import sklearn.model_selection
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error


def normalize(df,feature_name):
    result = df.copy()
    max_value = df[feature_name].max()
    min_value = df[feature_name].min()
    result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

mlflow.set_experiment("s452662")

cars = pd.read_csv('zbior_ium/Car_Prices_Poland_Kaggle.csv')

cars = cars.drop(73436) #wiersz z błednymi danymi
    
cars_normalized = normalize(cars,'vol_engine')
    
cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1)
cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1)
cars_train.rename(columns = {list(cars_train)[0]: 'id'}, inplace = True)
cars_test.rename(columns = {list(cars_test)[0]: 'id'}, inplace = True)
cars_train.to_csv('train.csv')
cars_test.to_csv('test.csv')

feature_cols = ['year', 'mileage', 'vol_engine']
inputs = tf.keras.Input(shape=(len(feature_cols),))

x = tf.keras.layers.Dense(10, activation='relu')(inputs)
x = tf.keras.layers.Dense(10, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mse', metrics=['mae'])


with mlflow.start_run() as run:
        print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
        print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))

        model.fit(cars_train[feature_cols], cars_train['price'], epochs=100)

        model.save('model.h5')

        metrics = model.evaluate(cars_train[feature_cols], cars_train['price'])
        
        predictions = model.predict(cars_test[feature_cols])
        predicted_prices = [p[0] for p in predictions]
        
        mae = mean_absolute_error(cars_test['price'], [round(p[0]) for p in predictions])
        mse = mean_squared_error(cars_test['price'], [round(p[0]) for p in predictions])
        rmse = np.sqrt(mse)
        
        print("  MAE: %s" % mae)
        print("  MSE: %s" % mse)
        print("  RMSE: %s" % rmse)
        
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)

        model.save('model.h5')
Prześlij pliki do '' 2023-05-11 18:49:08 +02:00			`import tensorflow as tf`
			`import mlflow`
			`import mlflow.sklearn`
			`import pandas as pd`
			`import sklearn`
			`import sklearn.model_selection`
			`import numpy as np`
			`from sklearn.metrics import mean_absolute_error, mean_squared_error`


			`def normalize(df,feature_name):`
			`result = df.copy()`
			`max_value = df[feature_name].max()`
			`min_value = df[feature_name].min()`
			`result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)`
			`return result`

			`mlflow.set_experiment("s452662")`

			`cars = pd.read_csv('zbior_ium/Car_Prices_Poland_Kaggle.csv')`

			`cars = cars.drop(73436) #wiersz z błednymi danymi`

			`cars_normalized = normalize(cars,'vol_engine')`

			`cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1)`
			`cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1)`
			`cars_train.rename(columns = {list(cars_train)[0]: 'id'}, inplace = True)`
			`cars_test.rename(columns = {list(cars_test)[0]: 'id'}, inplace = True)`
			`cars_train.to_csv('train.csv')`
			`cars_test.to_csv('test.csv')`

			`feature_cols = ['year', 'mileage', 'vol_engine']`
			`inputs = tf.keras.Input(shape=(len(feature_cols),))`

			`x = tf.keras.layers.Dense(10, activation='relu')(inputs)`
			`x = tf.keras.layers.Dense(10, activation='relu')(x)`
			`outputs = tf.keras.layers.Dense(1, activation='linear')(x)`

			`model = tf.keras.Model(inputs=inputs, outputs=outputs)`

			`model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),`
			`loss='mse', metrics=['mae'])`


			`with mlflow.start_run() as run:`
			`print("MLflow run experiment_id: {0}".format(run.info.experiment_id))`
			`print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))`

			`model.fit(cars_train[feature_cols], cars_train['price'], epochs=100)`

			`model.save('model.h5')`

			`metrics = model.evaluate(cars_train[feature_cols], cars_train['price'])`

			`predictions = model.predict(cars_test[feature_cols])`
			`predicted_prices = [p[0] for p in predictions]`

			`mae = mean_absolute_error(cars_test['price'], [round(p[0]) for p in predictions])`
			`mse = mean_squared_error(cars_test['price'], [round(p[0]) for p in predictions])`
			`rmse = np.sqrt(mse)`

			`print(" MAE: %s" % mae)`
			`print(" MSE: %s" % mse)`
			`print(" RMSE: %s" % rmse)`

			`mlflow.log_metric("rmse", rmse)`
			`mlflow.log_metric("mse", mse)`
			`mlflow.log_metric("mae", mae)`

			`model.save('model.h5')`