dvc
This commit is contained in:
parent
25fbe052c3
commit
84bb8ee94e
3
.dvc/.gitignore
vendored
Normal file
3
.dvc/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/config.local
|
||||
/tmp
|
||||
/cache
|
6
.dvc/config
Normal file
6
.dvc/config
Normal file
@ -0,0 +1,6 @@
|
||||
[core]
|
||||
remote = ium_ssh_remote
|
||||
['remote "ium_ssh_remote"']
|
||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
||||
['remote "my_local_remote"']
|
||||
url = /dvcstore
|
4
.dvcignore
Normal file
4
.dvcignore
Normal file
@ -0,0 +1,4 @@
|
||||
# Add patterns of files dvc should ignore, which could improve
|
||||
# the performance. Learn more at
|
||||
# https://dvc.org/doc/user-guide/dvcignore
|
||||
kaggle.json
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -226,3 +226,4 @@ my_runs
|
||||
saved_model
|
||||
|
||||
mlruns
|
||||
/Participants_Data_HPP
|
||||
|
@ -15,11 +15,12 @@ RUN python3 -m pip install kaggle
|
||||
RUN python3 -m pip install pandas
|
||||
RUN pip3 install matplotlib
|
||||
RUN pip3 install sacred
|
||||
RUN pip3 install sacred
|
||||
RUN pip3 install pymongo
|
||||
RUN pip3 install mlflow
|
||||
RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
|
||||
# RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
|
||||
|
||||
RUN python3 -m pip install dvc 'dvc[ssh]' paramiko
|
||||
RUN useradd -r -u 111 jenkins
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
5
Participants_Data_HPP.dvc
Normal file
5
Participants_Data_HPP.dvc
Normal file
@ -0,0 +1,5 @@
|
||||
outs:
|
||||
- md5: 40550846a6d8eec3f49155996444fc15.dir
|
||||
size: 5182261
|
||||
nfiles: 4
|
||||
path: Participants_Data_HPP
|
@ -31,6 +31,6 @@ Zadanie 1
|
||||
|
||||
Zadanie 2
|
||||
1. plik lab8/trainScript.py, używa MLflow, zawiera input_example,
|
||||
2. model z artifactów pobierany w lab8/Jenkinsfile.artifact, predykcja zrobiona skryptem lab8/predictArtifact.py, output predykcji konsola w [projekcie](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-predict-s449288/)
|
||||
2. model z artifactów pobierany w lab8/Jenkinsfile.artifact, predykcja zrobiona skryptem lab8/predictArtifact.py, output predykcji konsola w [projekcie](https://tzietkiewicz.vs444m.wmi.amu.edu.pl:8080/job/s444417-predict-s449288/)
|
||||
3. zarejestronwany model np. http://tzietkiewicz.vm.wmi.amu.edu.pl/#/experiments/17/runs/811420769d2642b8be694693c75b3587/artifactPath/linear-model, model jest rejestrowany w pliku lab8/trainScript.py
|
||||
4. [projekt](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-predict-s449288-from-registry/) predykcja realizowana skryptem lab8/predictMlflow.py i printowana w consoli builda,
|
4
dvc.yaml
Normal file
4
dvc.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
stages:
|
||||
prepare:
|
||||
cmd: \ -p startscript1.sh \ -d .\lab10\task1python.py -d Participants_Data_HPP
|
||||
\ python .\lab10\trainScript.py Participants_Data_HPP
|
18
lab10/Jenkinsfile
vendored
Normal file
18
lab10/Jenkinsfile
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
pipeline {
|
||||
agent {
|
||||
dockerfile true
|
||||
}
|
||||
stages {
|
||||
stage("Check out from version control") {
|
||||
steps {
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
stage("DVC"){
|
||||
withCredentials([sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY')]) {
|
||||
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
||||
sh "dvc pull"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
68
lab10/task1python.py
Normal file
68
lab10/task1python.py
Normal file
@ -0,0 +1,68 @@
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
|
||||
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
|
||||
# paths
|
||||
filePathTest = cwd + "./Participants_Data_HPP/Train.csv"
|
||||
filePathTrain = cwd + "./Participants_Data_HPP/Test.csv"
|
||||
|
||||
dataTest = pd.read_csv(filePathTest)
|
||||
dataTrain = pd.read_csv(filePathTrain)
|
||||
|
||||
number_lines = len(dataTest.index)
|
||||
row_size = number_lines // 2
|
||||
|
||||
# start looping through data writing it to a new file for each set
|
||||
# no of csv files with row size
|
||||
k = 2
|
||||
size = row_size
|
||||
|
||||
# split test data to test and dev
|
||||
for i in range(k):
|
||||
df = dataTest[size * i:size * (i + 1)]
|
||||
name = ""
|
||||
if i == 0:
|
||||
name = "Dev"
|
||||
else:
|
||||
name = "Test"
|
||||
df.to_csv(cwd + './Participants_Data_HPP/' + name + '.csv', index=False)
|
||||
|
||||
#df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")
|
||||
|
||||
#df_2 = pd.read_csv("../Participants_Data_HPP/Test.csv")
|
||||
|
||||
#df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")
|
||||
|
||||
dataPath = cwd + './Participants_Data_HPP/Train.csv'
|
||||
|
||||
#data informations
|
||||
data = pd.read_csv(dataPath)
|
||||
|
||||
description = data.describe(include="all")
|
||||
|
||||
corr = data.corr()
|
||||
|
||||
#select the most significant
|
||||
data = data[['TARGET(PRICE_IN_LACS)', 'SQUARE_FT', 'BHK_NO.', 'RESALE']]
|
||||
#normalize price column and flat area using min max technique
|
||||
columnName1 = 'TARGET(PRICE_IN_LACS)'
|
||||
columnName2 = 'SQUARE_FT'
|
||||
|
||||
column1Min = data[columnName1].min()
|
||||
column1Max = data[columnName1].max()
|
||||
column2Min = data[columnName2].min()
|
||||
column2Max = data[columnName2].max()
|
||||
|
||||
data[columnName1] = (data[columnName1] - column1Min) / (column1Max - column1Min)
|
||||
data[columnName2] = (data[columnName2] - column2Min) / (column2Max - column2Min)
|
||||
|
||||
print(description)
|
||||
|
||||
print(corr)
|
||||
|
||||
print(data.describe(include="all"))
|
||||
|
||||
print(data.head())
|
97
lab10/trainScript.py
Normal file
97
lab10/trainScript.py
Normal file
@ -0,0 +1,97 @@
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras import layers
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
#train params
|
||||
numberOfEpoch = sys.argv[1]
|
||||
|
||||
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
|
||||
pathTrain = cwd + "./Participants_Data_HPP/Train.csv"
|
||||
pathTest = cwd + "./Participants_Data_HPP/Test.csv"
|
||||
|
||||
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
|
||||
|
||||
# get dataset
|
||||
house_price_train = pd.read_csv(pathTrain)[features]
|
||||
|
||||
# get test dataset
|
||||
house_price_test = pd.read_csv(pathTest)[features]
|
||||
|
||||
|
||||
house_price_features = house_price_train.copy()
|
||||
# pop column
|
||||
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
|
||||
|
||||
# process data
|
||||
normalize = layers.Normalization()
|
||||
normalize.adapt(house_price_features)
|
||||
|
||||
feature_test_sample = house_price_test.sample(10)
|
||||
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
|
||||
|
||||
house_price_test_features = house_price_test.copy()
|
||||
# pop column
|
||||
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
|
||||
|
||||
# to np.array
|
||||
# house_price_test = np.array(house_price_test)
|
||||
# house_price_test_expected = np.array(house_price_test_expected)
|
||||
|
||||
house_price_features = np.array(house_price_features)
|
||||
|
||||
# checkoints
|
||||
# checkpoint_path = "training_1/cp.ckpt"
|
||||
# checkpoint_dir = os.path.dirname(checkpoint_path)
|
||||
# Create a callback that saves the model's weights
|
||||
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)
|
||||
# model keras.Sequential
|
||||
# one output tensor
|
||||
|
||||
modelPath = 'saved_model/MyModel_tf'
|
||||
try:
|
||||
linear_model = tf.keras.models.load_model(modelPath)
|
||||
print("open existing model")
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
linear_model = tf.keras.Sequential([
|
||||
normalize,
|
||||
layers.Dense(1)
|
||||
])
|
||||
linear_model.compile(loss = tf.losses.MeanSquaredError(),
|
||||
optimizer = tf.optimizers.Adam(1))
|
||||
print("creating new model")
|
||||
|
||||
# train model
|
||||
history = linear_model.fit(
|
||||
house_price_features,
|
||||
house_price_labels,
|
||||
epochs=int(numberOfEpoch),
|
||||
validation_split=0.33,
|
||||
verbose=1)
|
||||
#callbacks=[cp_callback])
|
||||
|
||||
# save model
|
||||
linear_model.save(modelPath, save_format='tf')
|
||||
|
||||
test_results = {}
|
||||
test_results['linear_model'] = linear_model.evaluate(
|
||||
house_price_test_features, house_price_test_expected, verbose=0)
|
||||
|
||||
def flatten(t):
|
||||
return [item for sublist in t for item in sublist]
|
||||
|
||||
pred = np.array(linear_model.predict(feature_test_sample))
|
||||
flatten_pred = flatten(pred)
|
||||
|
||||
# print("predictions: " + str(flatten_pred))
|
||||
# print("expected: " + str(np.array(labels_test_sample)))
|
||||
|
||||
with open(cwd + "/../result.txt", "w+") as resultFile:
|
||||
resultFile.write("predictions: " + str(flatten_pred) + '\n')
|
||||
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
|
Loading…
Reference in New Issue
Block a user