add tensorflow

This commit is contained in:
s444417 2022-04-10 19:55:37 +02:00
parent 591571c3a9
commit a331832136
10 changed files with 29593 additions and 68740 deletions

5
.dockerignore Normal file
View File

@ -0,0 +1,5 @@
kaggle.json
venv
.vscode
.idea
Participants_Data_HPP

5
.gitignore vendored
View File

@ -217,3 +217,8 @@ fabric.properties
kaggle.json kaggle.json
venv
venv/*
training_1

View File

@ -1,24 +1,33 @@
FROM ubuntu:latest FROM ubuntu:latest
FROM tensorflow/tensorflow:latest
RUN apt update && apt install -y RUN apt update && apt install -y
RUN apt-get install -y python3 RUN apt-get install -y python3
RUN apt-get install -y unzip RUN apt-get install -y unzip
RUN apt-get install -y python3-pip RUN apt-get install -y python3-pip
RUN python3 -m pip install kaggle # RUN python3 -m pip install kaggle
RUN python3 -m pip install pandas RUN python3 -m pip install pandas
# RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle # RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
WORKDIR /app WORKDIR /app
COPY ./Participants_Data_HPP ./Participants_Data_HPP COPY . .
COPY ./startscript1.sh ./ RUN sed -i.bak 's/\r$//' ./startscript1.sh
COPY ./src/task1python.py ./src/task1python.py RUN sed -i.bak 's/\r$//' ./runPythonScripts.sh
COPY ./src/pythonTest.py ./src/pythonTest.py # COPY ./Participants_Data_HPP ./Participants_Data_HPP
# COPY ./startscript1.sh ./
# COPY ./src/task1python.py ./src/task1python.py
# COPY ./src/pythonTest.py ./src/pythonTest.py
# COPY ./src/trainScript.py ./src/trainScript.py
# COPY ./runPythonScript.sh ./runPythonScript.sh
RUN chmod +x ./startscript1.sh # RUN chmod +x ./startscript1.sh
RUN chmod +x ./src/task1python.py # RUN chmod +x ./src/task1python.py
RUN chmod +x ./src/pythonTest.py # RUN chmod +x /app/runPythonScript.sh
CMD python3 ./src/task1python.py # FROM tensorflow/tensorflow:latest
RUN ./startscript1.sh

14726
Participants_Data_HPP/Dev.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3
runPythonScripts.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/sh
python3 ./src/task1python.py
python3 ./src/trainScript.py

View File

@ -31,9 +31,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dev:14725\n",
"test:14725\n",
"train:29451\n"
]
}
],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
@ -62,11 +72,16 @@
" name = \"Test\"\n", " name = \"Test\"\n",
" df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)\n", " df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)\n",
"\n", "\n",
"#df_1 = pd.read_csv(\"../Participants_Data_HPP/Dev.csv\")\n", "df_1 = pd.read_csv(\"../Participants_Data_HPP/Dev.csv\")\n",
"\n", "\n",
"#df_2 = pd.read_csv(\"../Participants_Data_HPP/Test.csv\")\n", "df_2 = pd.read_csv(\"../Participants_Data_HPP/Test.csv\")\n",
"\n", "\n",
"#df_2 = pd.read_csv(\"../Participants_Data_HPP/Train.csv\")\n" "df_3 = pd.read_csv(\"../Participants_Data_HPP/Train.csv\")\n",
"\n",
"print(\"dev:\" + str(len(df_1)))\n",
"print(\"test:\" + str(len(df_2)))\n",
"print(\"train:\" + str(len(df_3)))\n",
"\n"
] ]
}, },
{ {

View File

@ -1,8 +1,13 @@
import os
import sys
import pandas as pd import pandas as pd
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
# paths # paths
filePathTest = "../Participants_Data_HPP/Train.csv" filePathTest = cwd + "/../Participants_Data_HPP/Train.csv"
filePathTrain = "../Participants_Data_HPP/Test.csv" filePathTrain = cwd + "/../Participants_Data_HPP/Test.csv"
dataTest = pd.read_csv(filePathTest) dataTest = pd.read_csv(filePathTest)
dataTrain = pd.read_csv(filePathTrain) dataTrain = pd.read_csv(filePathTrain)
@ -23,7 +28,7 @@ for i in range(k):
name = "Dev" name = "Dev"
else: else:
name = "Test" name = "Test"
df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False) df.to_csv(cwd + '/../Participants_Data_HPP/' + name + '.csv', index=False)
#df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv") #df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")
@ -31,7 +36,7 @@ for i in range(k):
#df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv") #df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")
dataPath = '../Participants_Data_HPP/Train.csv' dataPath = cwd + '/../Participants_Data_HPP/Train.csv'
#data informations #data informations
data = pd.read_csv(dataPath) data = pd.read_csv(dataPath)

79
src/trainScript.py Normal file
View File

@ -0,0 +1,79 @@
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
# get dataset
house_price_train = pd.read_csv(pathTrain)[features]
# get test dataset
house_price_test = pd.read_csv(pathTest)[features]
house_price_features = house_price_train.copy()
# pop column
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
# process data
normalize = layers.Normalization()
normalize.adapt(house_price_features)
feature_test_sample = house_price_test.sample(10)
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
house_price_test_features = house_price_test.copy()
# pop column
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
# to np.array
# house_price_test = np.array(house_price_test)
# house_price_test_expected = np.array(house_price_test_expected)
house_price_features = np.array(house_price_features)
# checkoints
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)
# model keras.Sequential
# one output tensor
linear_model = tf.keras.Sequential([
normalize,
layers.Dense(1)
])
linear_model.compile(loss = tf.losses.MeanSquaredError(),
optimizer = tf.optimizers.Adam(1))
# train model
history = linear_model.fit(house_price_features, house_price_labels, epochs=10, )
#callbacks=[cp_callback])
# print(history)
test_results = {}
test_results['linear_model'] = linear_model.evaluate(
house_price_test_features, house_price_test_expected, verbose=0)
def flatten(t):
return [item for sublist in t for item in sublist]
pred = np.array(linear_model.predict(feature_test_sample))
flatten_pred = flatten(pred)
# print("predictions: " + str(flatten_pred))
# print("expected: " + str(np.array(labels_test_sample)))
with open(cwd + "/../result.txt", "w+") as resultFile:
resultFile.write("predictions: " + str(flatten_pred) + '\n')
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))

View File

@ -5,3 +5,4 @@ echo $CUTOFF
head -n $CUTOFF ./Participants_Data_HPP/Train.csv > data.txt head -n $CUTOFF ./Participants_Data_HPP/Train.csv > data.txt
head -n $CUTOFF ./Participants_Data_HPP/Test.csv > dataTest.txt head -n $CUTOFF ./Participants_Data_HPP/Test.csv > dataTest.txt
./runPythonScripts.sh