Prześlij pliki do ''

2023-05-11 18:49:08 +02:00 · 2023-05-11 18:49:08 +02:00 · 16e443f399
commit 16e443f399
parent 198b7a860a
1 changed files with 73 additions and 0 deletions
--- a/mlflow_train.py
+++ b/mlflow_train.py
@ -0,0 +1,73 @@
+import tensorflow as tf
+import mlflow
+import mlflow.sklearn
+import pandas as pd
+import sklearn
+import sklearn.model_selection
+import numpy as np
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+
+
+def normalize(df,feature_name):
+    result = df.copy()
+    max_value = df[feature_name].max()
+    min_value = df[feature_name].min()
+    result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
+    return result
+
+mlflow.set_experiment("s452662")
+
+cars = pd.read_csv('zbior_ium/Car_Prices_Poland_Kaggle.csv')
+
+cars = cars.drop(73436) #wiersz z błednymi danymi
+    
+cars_normalized = normalize(cars,'vol_engine')
+    
+cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1)
+cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1)
+cars_train.rename(columns = {list(cars_train)[0]: 'id'}, inplace = True)
+cars_test.rename(columns = {list(cars_test)[0]: 'id'}, inplace = True)
+cars_train.to_csv('train.csv')
+cars_test.to_csv('test.csv')
+
+feature_cols = ['year', 'mileage', 'vol_engine']
+inputs = tf.keras.Input(shape=(len(feature_cols),))
+
+x = tf.keras.layers.Dense(10, activation='relu')(inputs)
+x = tf.keras.layers.Dense(10, activation='relu')(x)
+outputs = tf.keras.layers.Dense(1, activation='linear')(x)
+
+model = tf.keras.Model(inputs=inputs, outputs=outputs)
+
+model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
+                  loss='mse', metrics=['mae'])
+
+
+with mlflow.start_run() as run:
+        print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
+        print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
+
+        model.fit(cars_train[feature_cols], cars_train['price'], epochs=100)
+
+        model.save('model.h5')
+
+        metrics = model.evaluate(cars_train[feature_cols], cars_train['price'])
+        
+        predictions = model.predict(cars_test[feature_cols])
+        predicted_prices = [p[0] for p in predictions]
+        
+        mae = mean_absolute_error(cars_test['price'], [round(p[0]) for p in predictions])
+        mse = mean_squared_error(cars_test['price'], [round(p[0]) for p in predictions])
+        rmse = np.sqrt(mse)
+        
+        print("  MAE: %s" % mae)
+        print("  MSE: %s" % mse)
+        print("  RMSE: %s" % rmse)
+        
+        mlflow.log_metric("rmse", rmse)
+        mlflow.log_metric("mse", mse)
+        mlflow.log_metric("mae", mae)
+
+        model.save('model.h5')
+
+