add tensorflow

2022-04-10 19:55:37 +02:00 · 2022-04-10 19:55:37 +02:00 · a331832136
commit a331832136
parent 591571c3a9
10 changed files with 29593 additions and 68740 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,5 @@
+kaggle.json
+venv
+.vscode
+.idea
+Participants_Data_HPP
--- a/.gitignore
+++ b/.gitignore
@ -216,4 +216,9 @@ fabric.properties
 /house-price-dataset.zip


-kaggle.json
+kaggle.json
+
+venv
+venv/*
+
+training_1
--- a/27
+++ b/27
@ -1,24 +1,33 @@
 FROM ubuntu:latest

+FROM tensorflow/tensorflow:latest
+
 RUN apt update && apt install -y
 RUN apt-get install -y python3
 RUN apt-get install -y unzip
 RUN apt-get install -y python3-pip

-RUN python3 -m pip install kaggle
+# RUN python3 -m pip install kaggle
 RUN python3 -m pip install pandas

 # RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle

 WORKDIR /app

-COPY ./Participants_Data_HPP ./Participants_Data_HPP
-COPY ./startscript1.sh ./
-COPY ./src/task1python.py ./src/task1python.py
-COPY ./src/pythonTest.py ./src/pythonTest.py
+COPY . .
+RUN sed -i.bak 's/\r$//' ./startscript1.sh 
+RUN sed -i.bak 's/\r$//' ./runPythonScripts.sh
+# COPY ./Participants_Data_HPP ./Participants_Data_HPP
+# COPY ./startscript1.sh ./
+# COPY ./src/task1python.py ./src/task1python.py
+# COPY ./src/pythonTest.py ./src/pythonTest.py
+# COPY ./src/trainScript.py ./src/trainScript.py
+# COPY ./runPythonScript.sh ./runPythonScript.sh

-RUN chmod +x ./startscript1.sh
-RUN chmod +x ./src/task1python.py
-RUN chmod +x ./src/pythonTest.py
+# RUN chmod +x ./startscript1.sh
+# RUN chmod +x ./src/task1python.py
+# RUN chmod +x /app/runPythonScript.sh

-CMD python3 ./src/task1python.py
+# FROM tensorflow/tensorflow:latest
+
+RUN ./startscript1.sh
--- a/Participants_Data_HPP/Dev.csv
+++ b/Participants_Data_HPP/Dev.csv
--- a/Participants_Data_HPP/Test.csv
+++ b/Participants_Data_HPP/Test.csv
--- a/runPythonScripts.sh
+++ b/runPythonScripts.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+python3 ./src/task1python.py
+python3 ./src/trainScript.py
--- a/src/task1.ipynb
+++ b/src/task1.ipynb
@ -31,9 +31,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dev:14725\n",
+      "test:14725\n",
+      "train:29451\n"
+     ]
+    }
+   ],
   "source": [
    "import pandas as pd\n",
    "\n",
@ -62,11 +72,16 @@
    "        name = \"Test\"\n",
    "    df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)\n",
    "\n",
-    "#df_1 = pd.read_csv(\"../Participants_Data_HPP/Dev.csv\")\n",
+    "df_1 = pd.read_csv(\"../Participants_Data_HPP/Dev.csv\")\n",
    "\n",
-    "#df_2 = pd.read_csv(\"../Participants_Data_HPP/Test.csv\")\n",
+    "df_2 = pd.read_csv(\"../Participants_Data_HPP/Test.csv\")\n",
    "\n",
-    "#df_2 = pd.read_csv(\"../Participants_Data_HPP/Train.csv\")\n"
+    "df_3 = pd.read_csv(\"../Participants_Data_HPP/Train.csv\")\n",
+    "\n",
+    "print(\"dev:\" + str(len(df_1)))\n",
+    "print(\"test:\" + str(len(df_2)))\n",
+    "print(\"train:\" + str(len(df_3)))\n",
+    "\n"
   ]
  },
  {
--- a/src/task1python.py
+++ b/src/task1python.py
@ -1,8 +1,13 @@
+import os
+import sys
 import pandas as pd

+
+cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
+
 # paths
-filePathTest = "../Participants_Data_HPP/Train.csv"
-filePathTrain = "../Participants_Data_HPP/Test.csv"
+filePathTest = cwd + "/../Participants_Data_HPP/Train.csv"
+filePathTrain = cwd + "/../Participants_Data_HPP/Test.csv"

 dataTest = pd.read_csv(filePathTest)
 dataTrain = pd.read_csv(filePathTrain)
@ -23,7 +28,7 @@ for i in range(k):
        name = "Dev"
    else:
        name = "Test"
-    df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)
+    df.to_csv(cwd + '/../Participants_Data_HPP/' + name + '.csv', index=False)

 #df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")

@ -31,7 +36,7 @@ for i in range(k):

 #df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")

-dataPath = '../Participants_Data_HPP/Train.csv'
+dataPath = cwd + '/../Participants_Data_HPP/Train.csv'

 #data informations
 data = pd.read_csv(dataPath)
--- a/src/trainScript.py
+++ b/src/trainScript.py
@ -0,0 +1,79 @@
+import os
+import sys
+import pandas as pd
+import numpy as np
+
+import tensorflow as tf
+from tensorflow.keras import layers
+
+cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
+
+pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
+pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
+
+features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
+
+# get dataset
+house_price_train = pd.read_csv(pathTrain)[features]
+
+# get test dataset
+house_price_test = pd.read_csv(pathTest)[features]
+
+
+house_price_features = house_price_train.copy()
+# pop column
+house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
+
+# process data
+normalize = layers.Normalization()
+normalize.adapt(house_price_features)
+
+feature_test_sample = house_price_test.sample(10)
+labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
+
+house_price_test_features = house_price_test.copy()
+# pop column
+house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
+
+# to np.array
+# house_price_test =  np.array(house_price_test)
+# house_price_test_expected = np.array(house_price_test_expected)
+
+house_price_features = np.array(house_price_features)
+
+# checkoints
+checkpoint_path = "training_1/cp.ckpt"
+checkpoint_dir = os.path.dirname(checkpoint_path)
+# Create a callback that saves the model's weights
+# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)
+# model keras.Sequential
+# one output tensor
+linear_model = tf.keras.Sequential([
+  normalize,
+  layers.Dense(1)
+])
+linear_model.compile(loss = tf.losses.MeanSquaredError(),
+                      optimizer = tf.optimizers.Adam(1))
+
+# train model
+history = linear_model.fit(house_price_features, house_price_labels, epochs=10, )
+#callbacks=[cp_callback])
+# print(history)
+
+test_results = {}
+test_results['linear_model'] = linear_model.evaluate(
+    house_price_test_features, house_price_test_expected, verbose=0)
+
+def flatten(t):
+    return [item for sublist in t for item in sublist]
+
+pred = np.array(linear_model.predict(feature_test_sample))
+flatten_pred = flatten(pred)
+
+# print("predictions: " + str(flatten_pred))
+# print("expected: " + str(np.array(labels_test_sample)))
+
+with open(cwd + "/../result.txt", "w+") as resultFile:
+  resultFile.write("predictions: " + str(flatten_pred) + '\n')
+  resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
+
--- a/startscript1.sh
+++ b/startscript1.sh
@ -5,3 +5,4 @@ echo $CUTOFF

 head -n $CUTOFF ./Participants_Data_HPP/Train.csv > data.txt
 head -n $CUTOFF ./Participants_Data_HPP/Test.csv > dataTest.txt
+./runPythonScripts.sh