diff --git a/Dockerfile b/Dockerfile index 63814bb..7745198 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM ubuntu:latest RUN apt-get update && \ apt-get install -y python3-pip && \ - pip3 install kaggle pandas scikit-learn + pip3 install kaggle pandas scikit-learn tensorflow RUN useradd -ms /bin/bash jenkins @@ -10,6 +10,4 @@ RUN mkdir -p /.kaggle && chown -R jenkins /.kaggle USER jenkins -COPY data_processing.sh . - WORKDIR . \ No newline at end of file diff --git a/create_model.py b/create_model.py new file mode 100644 index 0000000..27ca1db --- /dev/null +++ b/create_model.py @@ -0,0 +1,29 @@ +import pandas as pd +from keras.models import Sequential +from keras.layers import Dense +from keras.optimizers import Adam +from keras import regularizers + +from helper import prepare_tensors + +hp_train = pd.read_csv('hp_train.csv') +hp_dev = pd.read_csv('hp_dev.csv') + +X_train, Y_train = prepare_tensors(hp_train) +X_dev, Y_dev = prepare_tensors(hp_dev) + +model = Sequential() +model.add(Dense(64, input_dim=7, activation='relu', kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(1, activation='linear')) + +adam = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7) +model.compile(optimizer=adam, loss='mean_squared_error') + +model.fit(X_train, Y_train, epochs=20, batch_size=32, validation_data=(X_dev, Y_dev)) + +model.save('hp_model.h5') + + diff --git a/data_processing.py b/data_processing.py index 57c9627..bef7018 100644 --- a/data_processing.py +++ b/data_processing.py @@ -5,13 +5,11 @@ import subprocess subprocess.run(["kaggle", "datasets", "download", "muhammadbinimran/housing-price-prediction-data", "--unzip"]) housing_price_dataset = pd.read_csv('housing_price_dataset.csv') +housing_price_dataset = pd.get_dummies(housing_price_dataset, columns=['Neighborhood']) + hp_train_test, hp_dev = train_test_split(housing_price_dataset, test_size=0.1) hp_train, hp_test = train_test_split(hp_train_test, test_size=1000) -hp_train = pd.get_dummies(hp_train, columns=['Neighborhood']) -hp_dev = pd.get_dummies(hp_dev, columns=['Neighborhood']) -hp_test = pd.get_dummies(hp_test, columns=['Neighborhood']) - hp_train.to_csv('hp_train.csv', index=False) hp_dev.to_csv('hp_dev.csv', index=False) hp_test.to_csv('hp_test.csv', index=False) diff --git a/helper.py b/helper.py new file mode 100644 index 0000000..0ad19cc --- /dev/null +++ b/helper.py @@ -0,0 +1,8 @@ +import tensorflow as tf + +def prepare_tensors(df): + Y = df["Price"] + X = df.drop("Price", axis=1) + X_tensor = tf.convert_to_tensor(X, dtype=tf.float32) + Y_tensor = tf.convert_to_tensor(Y, dtype=tf.float32) + return X_tensor, Y_tensor \ No newline at end of file diff --git a/IUM_02_Dane.ipynb b/old/IUM_02_Dane.ipynb similarity index 100% rename from IUM_02_Dane.ipynb rename to old/IUM_02_Dane.ipynb diff --git a/data_processing.sh b/old/data_processing.sh similarity index 100% rename from data_processing.sh rename to old/data_processing.sh diff --git a/predict_price.py b/predict_price.py new file mode 100644 index 0000000..34010a6 --- /dev/null +++ b/predict_price.py @@ -0,0 +1,15 @@ +import pandas as pd +from keras.models import load_model + +from helper import prepare_tensors + +hp_test = pd.read_csv('hp_test.csv') +X_test, Y_test = prepare_tensors(hp_test) + +model = load_model('hp_model.h5') + +test_predictions = model.predict(X_test) + +predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"]) + +predictions_df.to_csv('hp_test_predictions.csv', index=False)