feat: add DVC pipeline and Jenkinsfile
This commit is contained in:
parent
98e9299097
commit
90e0218976
34
Jenkinsfile-dvc
Normal file
34
Jenkinsfile-dvc
Normal file
@ -0,0 +1,34 @@
|
||||
node {
|
||||
|
||||
stage('Git clone') {
|
||||
//cloning git repo
|
||||
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]])
|
||||
}
|
||||
|
||||
|
||||
stage('Dockerfile build') {
|
||||
|
||||
|
||||
sh "chmod +x -R ${env.WORKSPACE}"
|
||||
copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||
|
||||
def dockerImage = docker.build("s424714-model")
|
||||
|
||||
dockerImage.inside {
|
||||
|
||||
withCredentials(
|
||||
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) {
|
||||
|
||||
stage("Docker: DVC")
|
||||
{
|
||||
sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
|
||||
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
||||
sh 'dvc pull'
|
||||
sh 'dvc exp run'
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,4 +4,4 @@
|
||||
### Filip Patyk
|
||||
### 424714
|
||||
|
||||
[https://git.wmi.amu.edu.pl/AITech/aitech-iumkv](https://git.wmi.amu.edu.pl/AITech/aitech-ium)
|
||||
[https://git.wmi.amu.edu.pl/AITech/aitech-iumk](https://git.wmi.amu.edu.pl/AITech/aitech-ium)
|
38
dvc.lock
Normal file
38
dvc.lock
Normal file
@ -0,0 +1,38 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
test:
|
||||
cmd: python src/main.py --test
|
||||
deps:
|
||||
- path: data/dataset/test.csv
|
||||
md5: 9dd26ae5484ab058be21ae695098b9c6
|
||||
size: 10985891
|
||||
- path: data/dataset/test.csv
|
||||
md5: 9dd26ae5484ab058be21ae695098b9c6
|
||||
size: 10985891
|
||||
- path: data/dataset/test.csv
|
||||
md5: 9dd26ae5484ab058be21ae695098b9c6
|
||||
size: 10985891
|
||||
outs:
|
||||
- path: results/results.csv
|
||||
md5: 0a4ca4ee424a0668a2748987de74236a
|
||||
size: 815
|
||||
train:
|
||||
cmd: python src/main.py --train --data_len=200 -e=1
|
||||
deps:
|
||||
- path: data/dataset/test.csv
|
||||
md5: 9dd26ae5484ab058be21ae695098b9c6
|
||||
size: 10985891
|
||||
- path: data/dataset/train.csv
|
||||
md5: b4c5dfbc36c057a27137766197adbeca
|
||||
size: 89199212
|
||||
- path: data/dataset/val.csv
|
||||
md5: 64b7d344d7d4d7117b2cf9d7e31be771
|
||||
size: 11229062
|
||||
params:
|
||||
params.yaml:
|
||||
train.data_len: 200
|
||||
train.epochs: 1
|
||||
outs:
|
||||
- path: results/model.pt
|
||||
md5: c6a52e8320ccee83efb634f18c09a5c7
|
||||
size: 433319573
|
20
dvc.yaml
Normal file
20
dvc.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
stages:
|
||||
train:
|
||||
deps:
|
||||
- data/dataset/train.csv
|
||||
- data/dataset/val.csv
|
||||
- data/dataset/test.csv
|
||||
outs:
|
||||
- results/model.pt
|
||||
params:
|
||||
- train.data_len
|
||||
- train.epochs
|
||||
cmd: python src/main.py --train --data_len=${train.data_len} -e=${train.epochs}
|
||||
test:
|
||||
deps:
|
||||
- data/dataset/test.csv
|
||||
- data/dataset/test.csv
|
||||
- data/dataset/test.csv
|
||||
outs:
|
||||
- results/results.csv
|
||||
cmd: python src/main.py --test
|
7
params.yaml
Normal file
7
params.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
train:
|
||||
data_len: 200
|
||||
epochs: 1
|
||||
|
||||
test:
|
||||
data_len: 100
|
||||
|
44
src/main.py
44
src/main.py
@ -1,13 +1,11 @@
|
||||
import argparse
|
||||
import shutil
|
||||
|
||||
|
||||
import torch
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
from datasets import NewsDataset
|
||||
from evaluate import evaluate
|
||||
from models import BertClassifier, utils
|
||||
from sacred import Experiment
|
||||
from train import train
|
||||
|
||||
# argument parser
|
||||
@ -22,50 +20,30 @@ parser.add_argument("--results_path", type=str, default="results/results.csv")
|
||||
parser.add_argument("--data_acc_path", type=str, default="./results/acc.csv")
|
||||
parser.add_argument("--build_id", type=str, default="0")
|
||||
|
||||
|
||||
# HYPER PARAMETERS
|
||||
parser.add_argument("--batch", "-b", type=int, default=2)
|
||||
parser.add_argument("--learning_rate", "--lr", type=float, default=1e-6)
|
||||
parser.add_argument("--num_epochs", "--epochs", "-e", type=int, default=3)
|
||||
parser.add_argument("--data_len", type=int, default=1000)
|
||||
|
||||
|
||||
# sacred stuff
|
||||
ex = Experiment(name="s424714", interactive=True)
|
||||
SACRED_DIR_PATH = "./sacred"
|
||||
if not torch.cuda.is_available():
|
||||
ex.observers.append(MongoObserver(url="mongodb://admin:IUM_2021@172.17.0.1:27017", db_name="sacred"))
|
||||
# ex.observers.append(MongoObserver(url="mongodb://admin:IUM_2021@172.17.0.1:27017", db_name="sacred"))
|
||||
ex.observers.append(FileStorageObserver(SACRED_DIR_PATH))
|
||||
|
||||
ex.add_source_file("./src/train.py")
|
||||
|
||||
|
||||
@ex.main
|
||||
def main(_run):
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
ex.open_resource(filename="./data/dataset/train.csv", mode="r")
|
||||
ex.open_resource(filename="./data/dataset/test.csv", mode="r")
|
||||
ex.open_resource(filename="./data/dataset/val.csv", mode="r")
|
||||
|
||||
INITIAL_LR = args.learning_rate
|
||||
NUM_EPOCHS = args.num_epochs
|
||||
BATCH_SIZE = args.batch
|
||||
print(BATCH_SIZE)
|
||||
|
||||
@ex.config
|
||||
def hyper_parameters():
|
||||
initial_lr = INITIAL_LR # noqa: F841
|
||||
num_epochs = NUM_EPOCHS # noqa: F841
|
||||
batch_size = BATCH_SIZE # noqa: F841
|
||||
|
||||
DATA_LEN = args.data_len
|
||||
print("INITIAL_LR: ", INITIAL_LR)
|
||||
print("NUM_EPOCHS: ", NUM_EPOCHS)
|
||||
print("BATCH_SIZE: ", BATCH_SIZE)
|
||||
print("DATA_LEN: ", DATA_LEN)
|
||||
print("CUDA: ", torch.cuda.is_available())
|
||||
|
||||
# raise
|
||||
# loading & spliting data
|
||||
news_dataset = NewsDataset(data_dir_path="data", data_lenght=1000)
|
||||
news_dataset = NewsDataset(data_dir_path="data", data_lenght=DATA_LEN)
|
||||
|
||||
train_data = news_dataset.train
|
||||
test_data = news_dataset.test
|
||||
@ -96,11 +74,6 @@ def main(_run):
|
||||
batch_size=BATCH_SIZE,
|
||||
)
|
||||
utils.save_model(model=trained_model, model_path=args.model_path)
|
||||
ex.add_artifact(args.model_path)
|
||||
_run.log_scalar("train_loss", metrics["train_loss"])
|
||||
_run.log_scalar("val_loss", metrics["val_loss"])
|
||||
_run.log_scalar("train_acc", metrics["train_acc"])
|
||||
_run.log_scalar("val_acc", metrics["val_acc"])
|
||||
|
||||
# evaluating model
|
||||
if args.test:
|
||||
@ -119,5 +92,4 @@ def main(_run):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ex.run()
|
||||
shutil.make_archive(base_name="./results/sacred-artifacts", format="zip", root_dir=SACRED_DIR_PATH)
|
||||
main()
|
||||
|
@ -79,7 +79,7 @@ def train(
|
||||
total_acc_val += acc
|
||||
|
||||
print(
|
||||
f"Epochs: {epoch + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
|
||||
f"Epochs: {epoch + 1}/{epochs} | Train Loss: {total_loss_train / len(train_data): .3f} \
|
||||
| Train Accuracy: {total_acc_train / len(train_data): .3f} \
|
||||
| Val Loss: {total_loss_val / len(val_data): .3f} \
|
||||
| Val Accuracy: {total_acc_val / len(val_data): .3f}"
|
||||
|
Loading…
Reference in New Issue
Block a user