diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..f8ab15e --- /dev/null +++ b/predict.py @@ -0,0 +1,34 @@ +import tensorflow as tf +import pandas as pd +import numpy as np +import sklearn +import sklearn.model_selection +from tensorflow.keras.models import load_model +from sklearn.metrics import accuracy_score, precision_score, f1_score + +feature_cols = ['year', 'mileage', 'vol_engine'] + +model = load_model('model.h5') +test_data = pd.read_csv('test.csv') + +predictions = model.predict(test_data[feature_cols]) +predicted_prices = [p[0] for p in predictions] + + +results = pd.DataFrame({'id': test_data['id'], 'year': test_data['year'], 'mileage': test_data['mileage'], 'vol_engine': test_data['vol_engine'], 'predicted_price': predicted_prices}) +results.to_csv('predictions.csv', index=False) + +y_true = test_data['price'] +y_pred = y_pred = [round(p[0]) for p in predictions] + +print(y_pred) +print(y_true) + +accuracy = accuracy_score(y_true, y_pred) +precision = precision_score(y_true, y_pred, average='micro') +f1 = f1_score(y_true, y_pred, average='micro') + +with open('metrics.txt', 'w') as f: + f.write(f"Accuracy: {accuracy:.4f}\n") + f.write(f"Micro-average Precision: {precision:.4f}\n") + f.write(f"Micro-average F1-score: {f1:.4f}\n") diff --git a/train.py b/train.py new file mode 100644 index 0000000..f1ae340 --- /dev/null +++ b/train.py @@ -0,0 +1,60 @@ +import tensorflow as tf +from sacred import Experiment +from sacred.observers import FileStorageObserver +import pandas as pd +import sklearn +import sklearn.model_selection +import numpy as np + +ex = Experiment('452662') +ex.observers.append(FileStorageObserver.create('my_runs')) +#ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) + +def normalize(df,feature_name): + result = df.copy() + max_value = df[feature_name].max() + min_value = df[feature_name].min() + result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value) + return result + + +@ex.automain +def run_experiment(): + cars = pd.read_csv('zbior_ium/Car_Prices_Poland_Kaggle.csv') + + cars = cars.drop(73436) #wiersz z błednymi danymi + + cars_normalized = normalize(cars,'vol_engine') + + cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1) + cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1) + cars_train.rename(columns = {list(cars_train)[0]: 'id'}, inplace = True) + cars_test.rename(columns = {list(cars_test)[0]: 'id'}, inplace = True) + cars_train.to_csv('train.csv') + cars_test.to_csv('test.csv') + + feature_cols = ['year', 'mileage', 'vol_engine'] + inputs = tf.keras.Input(shape=(len(feature_cols),)) + + x = tf.keras.layers.Dense(10, activation='relu')(inputs) + x = tf.keras.layers.Dense(10, activation='relu')(x) + outputs = tf.keras.layers.Dense(1, activation='linear')(x) + + model = tf.keras.Model(inputs=inputs, outputs=outputs) + + model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='mse', metrics=['mae']) + + model.fit(cars_train[feature_cols], cars_train['price'], epochs=100) + + ex.add_resource('train_data.csv') + ex.add_resource('test_data.csv') + + ex.add_artifact(__file__) + + model.save('model.h5') + ex.add_artifact('model.h5') + + metrics = model.evaluate(cars_train[feature_cols], cars_train['price']) + ex.log_scalar('mse', metrics[0]) + ex.log_scalar('mae', metrics[1]) \ No newline at end of file