dvc
This commit is contained in:
parent
25fbe052c3
commit
84bb8ee94e
3
.dvc/.gitignore
vendored
Normal file
3
.dvc/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
/config.local
|
||||||
|
/tmp
|
||||||
|
/cache
|
6
.dvc/config
Normal file
6
.dvc/config
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
[core]
|
||||||
|
remote = ium_ssh_remote
|
||||||
|
['remote "ium_ssh_remote"']
|
||||||
|
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
||||||
|
['remote "my_local_remote"']
|
||||||
|
url = /dvcstore
|
4
.dvcignore
Normal file
4
.dvcignore
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Add patterns of files dvc should ignore, which could improve
|
||||||
|
# the performance. Learn more at
|
||||||
|
# https://dvc.org/doc/user-guide/dvcignore
|
||||||
|
kaggle.json
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -225,4 +225,5 @@ Participants_Data_HPP/
|
|||||||
my_runs
|
my_runs
|
||||||
saved_model
|
saved_model
|
||||||
|
|
||||||
mlruns
|
mlruns
|
||||||
|
/Participants_Data_HPP
|
||||||
|
@ -15,11 +15,12 @@ RUN python3 -m pip install kaggle
|
|||||||
RUN python3 -m pip install pandas
|
RUN python3 -m pip install pandas
|
||||||
RUN pip3 install matplotlib
|
RUN pip3 install matplotlib
|
||||||
RUN pip3 install sacred
|
RUN pip3 install sacred
|
||||||
RUN pip3 install sacred
|
|
||||||
RUN pip3 install pymongo
|
RUN pip3 install pymongo
|
||||||
RUN pip3 install mlflow
|
RUN pip3 install mlflow
|
||||||
RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
|
# RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
|
||||||
|
|
||||||
|
RUN python3 -m pip install dvc 'dvc[ssh]' paramiko
|
||||||
|
RUN useradd -r -u 111 jenkins
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
5
Participants_Data_HPP.dvc
Normal file
5
Participants_Data_HPP.dvc
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
outs:
|
||||||
|
- md5: 40550846a6d8eec3f49155996444fc15.dir
|
||||||
|
size: 5182261
|
||||||
|
nfiles: 4
|
||||||
|
path: Participants_Data_HPP
|
@ -31,6 +31,6 @@ Zadanie 1
|
|||||||
|
|
||||||
Zadanie 2
|
Zadanie 2
|
||||||
1. plik lab8/trainScript.py, używa MLflow, zawiera input_example,
|
1. plik lab8/trainScript.py, używa MLflow, zawiera input_example,
|
||||||
2. model z artifactów pobierany w lab8/Jenkinsfile.artifact, predykcja zrobiona skryptem lab8/predictArtifact.py, output predykcji konsola w [projekcie](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-predict-s449288/)
|
2. model z artifactów pobierany w lab8/Jenkinsfile.artifact, predykcja zrobiona skryptem lab8/predictArtifact.py, output predykcji konsola w [projekcie](https://tzietkiewicz.vs444m.wmi.amu.edu.pl:8080/job/s444417-predict-s449288/)
|
||||||
3. zarejestronwany model np. http://tzietkiewicz.vm.wmi.amu.edu.pl/#/experiments/17/runs/811420769d2642b8be694693c75b3587/artifactPath/linear-model, model jest rejestrowany w pliku lab8/trainScript.py
|
3. zarejestronwany model np. http://tzietkiewicz.vm.wmi.amu.edu.pl/#/experiments/17/runs/811420769d2642b8be694693c75b3587/artifactPath/linear-model, model jest rejestrowany w pliku lab8/trainScript.py
|
||||||
4. [projekt](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-predict-s449288-from-registry/) predykcja realizowana skryptem lab8/predictMlflow.py i printowana w consoli builda,
|
4. [projekt](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-predict-s449288-from-registry/) predykcja realizowana skryptem lab8/predictMlflow.py i printowana w consoli builda,
|
4
dvc.yaml
Normal file
4
dvc.yaml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
stages:
|
||||||
|
prepare:
|
||||||
|
cmd: \ -p startscript1.sh \ -d .\lab10\task1python.py -d Participants_Data_HPP
|
||||||
|
\ python .\lab10\trainScript.py Participants_Data_HPP
|
18
lab10/Jenkinsfile
vendored
Normal file
18
lab10/Jenkinsfile
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage("Check out from version control") {
|
||||||
|
steps {
|
||||||
|
checkout scm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("DVC"){
|
||||||
|
withCredentials([sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY')]) {
|
||||||
|
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
||||||
|
sh "dvc pull"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
68
lab10/task1python.py
Normal file
68
lab10/task1python.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||||
|
|
||||||
|
# paths
|
||||||
|
filePathTest = cwd + "./Participants_Data_HPP/Train.csv"
|
||||||
|
filePathTrain = cwd + "./Participants_Data_HPP/Test.csv"
|
||||||
|
|
||||||
|
dataTest = pd.read_csv(filePathTest)
|
||||||
|
dataTrain = pd.read_csv(filePathTrain)
|
||||||
|
|
||||||
|
number_lines = len(dataTest.index)
|
||||||
|
row_size = number_lines // 2
|
||||||
|
|
||||||
|
# start looping through data writing it to a new file for each set
|
||||||
|
# no of csv files with row size
|
||||||
|
k = 2
|
||||||
|
size = row_size
|
||||||
|
|
||||||
|
# split test data to test and dev
|
||||||
|
for i in range(k):
|
||||||
|
df = dataTest[size * i:size * (i + 1)]
|
||||||
|
name = ""
|
||||||
|
if i == 0:
|
||||||
|
name = "Dev"
|
||||||
|
else:
|
||||||
|
name = "Test"
|
||||||
|
df.to_csv(cwd + './Participants_Data_HPP/' + name + '.csv', index=False)
|
||||||
|
|
||||||
|
#df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")
|
||||||
|
|
||||||
|
#df_2 = pd.read_csv("../Participants_Data_HPP/Test.csv")
|
||||||
|
|
||||||
|
#df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")
|
||||||
|
|
||||||
|
dataPath = cwd + './Participants_Data_HPP/Train.csv'
|
||||||
|
|
||||||
|
#data informations
|
||||||
|
data = pd.read_csv(dataPath)
|
||||||
|
|
||||||
|
description = data.describe(include="all")
|
||||||
|
|
||||||
|
corr = data.corr()
|
||||||
|
|
||||||
|
#select the most significant
|
||||||
|
data = data[['TARGET(PRICE_IN_LACS)', 'SQUARE_FT', 'BHK_NO.', 'RESALE']]
|
||||||
|
#normalize price column and flat area using min max technique
|
||||||
|
columnName1 = 'TARGET(PRICE_IN_LACS)'
|
||||||
|
columnName2 = 'SQUARE_FT'
|
||||||
|
|
||||||
|
column1Min = data[columnName1].min()
|
||||||
|
column1Max = data[columnName1].max()
|
||||||
|
column2Min = data[columnName2].min()
|
||||||
|
column2Max = data[columnName2].max()
|
||||||
|
|
||||||
|
data[columnName1] = (data[columnName1] - column1Min) / (column1Max - column1Min)
|
||||||
|
data[columnName2] = (data[columnName2] - column2Min) / (column2Max - column2Min)
|
||||||
|
|
||||||
|
print(description)
|
||||||
|
|
||||||
|
print(corr)
|
||||||
|
|
||||||
|
print(data.describe(include="all"))
|
||||||
|
|
||||||
|
print(data.head())
|
97
lab10/trainScript.py
Normal file
97
lab10/trainScript.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras import layers
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
#train params
|
||||||
|
numberOfEpoch = sys.argv[1]
|
||||||
|
|
||||||
|
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||||
|
|
||||||
|
pathTrain = cwd + "./Participants_Data_HPP/Train.csv"
|
||||||
|
pathTest = cwd + "./Participants_Data_HPP/Test.csv"
|
||||||
|
|
||||||
|
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
|
||||||
|
|
||||||
|
# get dataset
|
||||||
|
house_price_train = pd.read_csv(pathTrain)[features]
|
||||||
|
|
||||||
|
# get test dataset
|
||||||
|
house_price_test = pd.read_csv(pathTest)[features]
|
||||||
|
|
||||||
|
|
||||||
|
house_price_features = house_price_train.copy()
|
||||||
|
# pop column
|
||||||
|
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
|
||||||
|
|
||||||
|
# process data
|
||||||
|
normalize = layers.Normalization()
|
||||||
|
normalize.adapt(house_price_features)
|
||||||
|
|
||||||
|
feature_test_sample = house_price_test.sample(10)
|
||||||
|
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
|
||||||
|
|
||||||
|
house_price_test_features = house_price_test.copy()
|
||||||
|
# pop column
|
||||||
|
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
|
||||||
|
|
||||||
|
# to np.array
|
||||||
|
# house_price_test = np.array(house_price_test)
|
||||||
|
# house_price_test_expected = np.array(house_price_test_expected)
|
||||||
|
|
||||||
|
house_price_features = np.array(house_price_features)
|
||||||
|
|
||||||
|
# checkoints
|
||||||
|
# checkpoint_path = "training_1/cp.ckpt"
|
||||||
|
# checkpoint_dir = os.path.dirname(checkpoint_path)
|
||||||
|
# Create a callback that saves the model's weights
|
||||||
|
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)
|
||||||
|
# model keras.Sequential
|
||||||
|
# one output tensor
|
||||||
|
|
||||||
|
modelPath = 'saved_model/MyModel_tf'
|
||||||
|
try:
|
||||||
|
linear_model = tf.keras.models.load_model(modelPath)
|
||||||
|
print("open existing model")
|
||||||
|
except Exception as ex:
|
||||||
|
print(ex)
|
||||||
|
linear_model = tf.keras.Sequential([
|
||||||
|
normalize,
|
||||||
|
layers.Dense(1)
|
||||||
|
])
|
||||||
|
linear_model.compile(loss = tf.losses.MeanSquaredError(),
|
||||||
|
optimizer = tf.optimizers.Adam(1))
|
||||||
|
print("creating new model")
|
||||||
|
|
||||||
|
# train model
|
||||||
|
history = linear_model.fit(
|
||||||
|
house_price_features,
|
||||||
|
house_price_labels,
|
||||||
|
epochs=int(numberOfEpoch),
|
||||||
|
validation_split=0.33,
|
||||||
|
verbose=1)
|
||||||
|
#callbacks=[cp_callback])
|
||||||
|
|
||||||
|
# save model
|
||||||
|
linear_model.save(modelPath, save_format='tf')
|
||||||
|
|
||||||
|
test_results = {}
|
||||||
|
test_results['linear_model'] = linear_model.evaluate(
|
||||||
|
house_price_test_features, house_price_test_expected, verbose=0)
|
||||||
|
|
||||||
|
def flatten(t):
|
||||||
|
return [item for sublist in t for item in sublist]
|
||||||
|
|
||||||
|
pred = np.array(linear_model.predict(feature_test_sample))
|
||||||
|
flatten_pred = flatten(pred)
|
||||||
|
|
||||||
|
# print("predictions: " + str(flatten_pred))
|
||||||
|
# print("expected: " + str(np.array(labels_test_sample)))
|
||||||
|
|
||||||
|
with open(cwd + "/../result.txt", "w+") as resultFile:
|
||||||
|
resultFile.write("predictions: " + str(flatten_pred) + '\n')
|
||||||
|
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
|
Loading…
Reference in New Issue
Block a user