add tensorflow

2022-04-10 19:55:37 +02:00 · 2022-04-10 19:55:37 +02:00 · a331832136
commit a331832136
parent 591571c3a9
10 changed files with 29593 additions and 68740 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,5 @@
 kaggle.json
 venv
 .vscode
 .idea
 Participants_Data_HPP
--- a/.gitignore
+++ b/.gitignore
@ -217,3 +217,8 @@ fabric.properties
 kaggle.json
 venv
 venv/*
 training_1
--- a/27
+++ b/27
@ -1,24 +1,33 @@
 FROM ubuntu:latest
 FROM tensorflow/tensorflow:latest
 RUN apt update && apt install -y
 RUN apt-get install -y python3
 RUN apt-get install -y unzip
 RUN apt-get install -y python3-pip
-RUN python3 -m pip install kaggle
+# RUN python3 -m pip install kaggle
 RUN python3 -m pip install pandas
 # RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
 WORKDIR /app
-COPY ./Participants_Data_HPP ./Participants_Data_HPP
+COPY . .
-COPY ./startscript1.sh ./
+RUN sed -i.bak 's/\r$//' ./startscript1.sh 
-COPY ./src/task1python.py ./src/task1python.py
+RUN sed -i.bak 's/\r$//' ./runPythonScripts.sh
-COPY ./src/pythonTest.py ./src/pythonTest.py
+# COPY ./Participants_Data_HPP ./Participants_Data_HPP
 # COPY ./startscript1.sh ./
 # COPY ./src/task1python.py ./src/task1python.py
 # COPY ./src/pythonTest.py ./src/pythonTest.py
 # COPY ./src/trainScript.py ./src/trainScript.py
 # COPY ./runPythonScript.sh ./runPythonScript.sh
-RUN chmod +x ./startscript1.sh
+# RUN chmod +x ./startscript1.sh
-RUN chmod +x ./src/task1python.py
+# RUN chmod +x ./src/task1python.py
-RUN chmod +x ./src/pythonTest.py
+# RUN chmod +x /app/runPythonScript.sh
-CMD python3 ./src/task1python.py
+# FROM tensorflow/tensorflow:latest
 RUN ./startscript1.sh
--- a/Participants_Data_HPP/Dev.csv
+++ b/Participants_Data_HPP/Dev.csv
--- a/Participants_Data_HPP/Test.csv
+++ b/Participants_Data_HPP/Test.csv
--- a/runPythonScripts.sh
+++ b/runPythonScripts.sh
@ -0,0 +1,3 @@
 #!/bin/sh
 python3 ./src/task1python.py
 python3 ./src/trainScript.py
--- a/src/task1.ipynb
+++ b/src/task1.ipynb
@ -31,9 +31,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dev:14725\n",
      "test:14725\n",
      "train:29451\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
@ -62,11 +72,16 @@
    "        name = \"Test\"\n",
    "    df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)\n",
    "\n",
-    "#df_1 = pd.read_csv(\"../Participants_Data_HPP/Dev.csv\")\n",
+    "df_1 = pd.read_csv(\"../Participants_Data_HPP/Dev.csv\")\n",
    "\n",
-    "#df_2 = pd.read_csv(\"../Participants_Data_HPP/Test.csv\")\n",
+    "df_2 = pd.read_csv(\"../Participants_Data_HPP/Test.csv\")\n",
    "\n",
-    "#df_2 = pd.read_csv(\"../Participants_Data_HPP/Train.csv\")\n"
+    "df_3 = pd.read_csv(\"../Participants_Data_HPP/Train.csv\")\n",
    "\n",
    "print(\"dev:\" + str(len(df_1)))\n",
    "print(\"test:\" + str(len(df_2)))\n",
    "print(\"train:\" + str(len(df_3)))\n",
    "\n"
   ]
  },
  {
--- a/src/task1python.py
+++ b/src/task1python.py
@ -1,8 +1,13 @@
 import os
 import sys
 import pandas as pd
 cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
 # paths
-filePathTest = "../Participants_Data_HPP/Train.csv"
+filePathTest = cwd + "/../Participants_Data_HPP/Train.csv"
-filePathTrain = "../Participants_Data_HPP/Test.csv"
+filePathTrain = cwd + "/../Participants_Data_HPP/Test.csv"
 dataTest = pd.read_csv(filePathTest)
 dataTrain = pd.read_csv(filePathTrain)
@ -23,7 +28,7 @@ for i in range(k):
        name = "Dev"
    else:
        name = "Test"
-    df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)
+    df.to_csv(cwd + '/../Participants_Data_HPP/' + name + '.csv', index=False)
 #df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")
@ -31,7 +36,7 @@ for i in range(k):
 #df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")
-dataPath = '../Participants_Data_HPP/Train.csv'
+dataPath = cwd + '/../Participants_Data_HPP/Train.csv'
 #data informations
 data = pd.read_csv(dataPath)
--- a/src/trainScript.py
+++ b/src/trainScript.py
@ -0,0 +1,79 @@
 import os
 import sys
 import pandas as pd
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras import layers
 cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
 pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
 pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
 features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
 # get dataset
 house_price_train = pd.read_csv(pathTrain)[features]
 # get test dataset
 house_price_test = pd.read_csv(pathTest)[features]
 house_price_features = house_price_train.copy()
 # pop column
 house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
 # process data
 normalize = layers.Normalization()
 normalize.adapt(house_price_features)
 feature_test_sample = house_price_test.sample(10)
 labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
 house_price_test_features = house_price_test.copy()
 # pop column
 house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
 # to np.array
 # house_price_test =  np.array(house_price_test)
 # house_price_test_expected = np.array(house_price_test_expected)
 house_price_features = np.array(house_price_features)
 # checkoints
 checkpoint_path = "training_1/cp.ckpt"
 checkpoint_dir = os.path.dirname(checkpoint_path)
 # Create a callback that saves the model's weights
 # cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)
 # model keras.Sequential
 # one output tensor
 linear_model = tf.keras.Sequential([
  normalize,
  layers.Dense(1)
 ])
 linear_model.compile(loss = tf.losses.MeanSquaredError(),
                      optimizer = tf.optimizers.Adam(1))
 # train model
 history = linear_model.fit(house_price_features, house_price_labels, epochs=10, )
 #callbacks=[cp_callback])
 # print(history)
 test_results = {}
 test_results['linear_model'] = linear_model.evaluate(
    house_price_test_features, house_price_test_expected, verbose=0)
 def flatten(t):
    return [item for sublist in t for item in sublist]
 pred = np.array(linear_model.predict(feature_test_sample))
 flatten_pred = flatten(pred)
 # print("predictions: " + str(flatten_pred))
 # print("expected: " + str(np.array(labels_test_sample)))
 with open(cwd + "/../result.txt", "w+") as resultFile:
  resultFile.write("predictions: " + str(flatten_pred) + '\n')
  resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
--- a/startscript1.sh
+++ b/startscript1.sh
@ -5,3 +5,4 @@ echo $CUTOFF
 head -n $CUTOFF ./Participants_Data_HPP/Train.csv > data.txt
 head -n $CUTOFF ./Participants_Data_HPP/Test.csv > dataTest.txt
 ./runPythonScripts.sh