diff --git a/pred.py b/pred.py new file mode 100644 index 0000000..7a8692d --- /dev/null +++ b/pred.py @@ -0,0 +1,38 @@ +import pandas as pd +import tensorflow as tf +from sklearn.preprocessing import StandardScaler, OneHotEncoder +import numpy as np + +categorical_cols = ['bacteria', 'viruses'] +encoder = OneHotEncoder(sparse=False, handle_unknown='ignore') + +data_test = pd.read_csv('dane/water_test.csv') + +X_test = data_test.drop('is_safe', axis=1) +y_test = data_test['is_safe'] +X_test_encoded = pd.DataFrame(encoder.fit_transform(X_test[categorical_cols])) +X_test_processed = pd.concat([X_test.drop(categorical_cols, axis=1), X_test_encoded], axis=1) +X_test_processed.columns = X_test_processed.columns.astype(str) + +scaler = StandardScaler() +X_test_scaled = scaler.fit_transform(X_test_processed) + +model = tf.keras.models.load_model('savedmodel') + +predictions = model.predict(X_test_scaled) +print(predictions) +prediction_classes = [1 if prob > 0.5 else 0 for prob in np.ravel(predictions)] +print(prediction_classes[:30]) + +with open("predictionsResults.txt", mode='w', newline='') as f: + for pred in predictions: + f.write(str(f'{pred[0]}')) + f.write("\n") + +loss, accuracy, precision, recall = model.evaluate(X_test_scaled, y_test) + +from sklearn.metrics import accuracy_score, precision_score, recall_score + +print(f'Accuracy: {accuracy_score(y_test, prediction_classes):.2f}') +print(f'Precision: {precision_score(y_test, prediction_classes):.2f}') +print(f'Recall: {recall_score(y_test, prediction_classes):.2f}') \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000..28fbadc --- /dev/null +++ b/train.py @@ -0,0 +1,38 @@ +import pandas as pd +import tensorflow as tf +from sklearn.preprocessing import StandardScaler, OneHotEncoder + +data_train = pd.read_csv('dane/water_train.csv') + +X_train = data_train.drop('is_safe', axis=1) +y_train = data_train['is_safe'] + +categorical_cols = ['bacteria', 'viruses'] +encoder = OneHotEncoder(sparse=False, handle_unknown='ignore') +X_train_encoded = pd.DataFrame(encoder.fit_transform(X_train[categorical_cols])) +X_train_processed = pd.concat([X_train.drop(categorical_cols, axis=1), X_train_encoded], axis=1) +X_train_processed.columns = X_train_processed.columns.astype(str) + +scaler = StandardScaler() +X_train_scaled = scaler.fit_transform(X_train_processed) + +model = tf.keras.Sequential([ + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dense(256, activation='relu'), + tf.keras.layers.Dense(256, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid') +]) + +model.compile( + loss=tf.keras.losses.binary_crossentropy, + optimizer=tf.keras.optimizers.Adam(lr=0.03), + metrics=[ + tf.keras.metrics.BinaryAccuracy(name='accuracy'), + tf.keras.metrics.Precision(name='precision'), + tf.keras.metrics.Recall(name='recall') + ] +) + +model.fit(X_train_scaled, y_train, batch_size=32, epochs=5, verbose=2) +model.save("savedmodel") +