Compare commits

...

21 Commits

Author SHA1 Message Date
Alicja Szulecka 52aa376edb Update Jenkinsfile 2024-05-04 16:48:31 +02:00
Alicja Szulecka c84935dd0f Update plot.py 2024-04-30 19:32:52 +02:00
Alicja Szulecka 6e7d740463 Update Jenkinsfile 2024-04-30 19:30:32 +02:00
Alicja Szulecka f866ef4bf7 Update Jenkinsfile 2024-04-30 19:25:40 +02:00
Alicja Szulecka 773d932415 Update Jenkinsfile 2024-04-30 19:09:17 +02:00
Alicja Szulecka cfbf877ac2 Update Jenkinsfile 2024-04-30 19:02:47 +02:00
Alicja Szulecka 42408c00ea Update Dockerfile 2024-04-30 16:29:48 +02:00
Alicja Szulecka 99b9b9c70b Update plot.py 2024-04-30 16:29:27 +02:00
Alicja Szulecka 520206ef22 plot 2024-04-30 16:25:37 +02:00
Alicja Szulecka 65bf01c425 Update Jenkinsfile 2024-04-30 16:11:33 +02:00
Alicja Szulecka e6d4c07a7a Update Jenkinsfile 2024-04-30 16:08:38 +02:00
Alicja Szulecka 5dfd11b904 jenkins evaluation 2024-04-30 16:03:02 +02:00
Alicja Szulecka 6a0b357945 Update model.py 2024-04-29 21:47:03 +02:00
Alicja Szulecka b45d036d42 Update Jenkinsfile 2024-04-29 21:45:09 +02:00
Alicja Szulecka 45beb68c25 Update Jenkinsfile 2024-04-29 21:43:25 +02:00
Alicja Szulecka 03f4d0b47a Update model.py 2024-04-29 21:27:45 +02:00
Alicja Szulecka ca24c39ada Update Jenkinsfile 2024-04-29 21:22:42 +02:00
Alicja Szulecka f883cd5e17 add parameter 2024-04-29 21:21:21 +02:00
Alicja Szulecka ac93029123 Update Jenkinsfile 2024-04-29 21:09:13 +02:00
Alicja Szulecka 5ff6e66c4f Update Jenkinsfile 2024-04-29 21:08:45 +02:00
Alicja Szulecka 66d15ac8f4 Update Jenkinsfile 2024-04-29 21:02:47 +02:00
7 changed files with 116309 additions and 39 deletions

View File

@ -4,7 +4,7 @@ RUN apt update && apt install -y python3-pip
RUN apt install unzip
RUN apt install bc
RUN pip3 install kaggle pandas scikit-learn torch
RUN pip3 install kaggle pandas scikit-learn torch matplotlib
WORKDIR /app

72
Jenkinsfile vendored
View File

@ -1,49 +1,59 @@
pipeline {
agent any
parameters {
string(name: 'KAGGLE_USERNAME', defaultValue: 'alicjaszulecka', description: 'Kaggle username')
password(name: 'KAGGLE_KEY', defaultValue:'', description: 'Kaggle Key')
string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
}
buildSelector (
defaultSelector: lastSuccessful(),
description: 'Build for copying artifacts',
name: 'BUILD_SELECTOR'
)
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'model', name: 'BRANCH', type: 'PT_BRANCH'
}
triggers {
upstream(upstreamProjects: 's464914-training/' + params.BRANCH + '/', threshold: hudson.model.Result.SUCCESS)
}
stages {
stage('Git Checkout') {
steps {
checkout scm
}
}
stage('Download dataset') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'pip install kaggle'
sh 'kaggle datasets download -d uciml/forest-cover-type-dataset'
sh 'unzip -o forest-cover-type-dataset.zip'
sh 'rm forest-cover-type-dataset.zip'
stage('Copy Artifacts') {
steps {
copyArtifacts fingerprintArtifacts: true, projectName: 'z-s464914-create-dataset', selector: buildParameter('BUILD_SELECTOR')
copyArtifacts filter: '*', projectName: 's464914-training/' + params.BRANCH + '/', selector: buildParameter('BUILD_SELECTOR')
copyArtifacts filter: '*', projectName: 's464914-evaluation/evaluation/', selector: buildParameter('BUILD_SELECTOR'), optional: true
}
}
}
}
stage('Build') {
steps {
script {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
def customImage = docker.build("custom-image")
customImage.inside {
sh 'python3 ./IUM_2.py'
archiveArtifacts artifacts: 'covtype.csv, forest_train.csv, forest_test.csv, forest_val.csv', onlyIfSuccessful: true
}
}
}
}
}
stage('Train and Predict') {
stage('Prediction') {
steps {
script {
def customImage = docker.build("custom-image")
customImage.inside {
sh 'python3 ./model.py'
sh 'python3 ./prediction.py'
archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
archiveArtifacts artifacts: 'predictions.txt', onlyIfSuccessful: true
}
}
}
}
stage('Metrics') {
steps {
script {
def customImage = docker.build("custom-image")
customImage.inside {
sh 'python3 ./metrics.py'
archiveArtifacts artifacts: 'metrics.txt', onlyIfSuccessful: true
}
}
}
}
stage('Plot Accuracy') {
steps {
script {
def customImage = docker.build("custom-image")
customImage.inside {
sh 'python3 ./plot.py'
archiveArtifacts artifacts: 'accuracy.png', onlyIfSuccessful: true
}
}
}

24
metrics.py Normal file
View File

@ -0,0 +1,24 @@
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
import numpy as np
true_labels = []
predicted_labels = []
f = open("predictions.txt", "r")
for line in f:
parts = line.strip().split(' ')
true_labels.append(int(parts[3]))
predicted_labels.append(int(parts[1]))
accuracy = accuracy_score(true_labels, predicted_labels)
precision_micro = precision_score(true_labels, predicted_labels, average='micro')
recall_micro = recall_score(true_labels, predicted_labels, average='micro')
f1_micro = f1_score(true_labels, predicted_labels, average='micro')
rmse = np.sqrt(mean_squared_error(true_labels, predicted_labels))
with open(r'metrics.txt', 'a') as fp:
fp.write(f"Accuracy: {accuracy}\n")
fp.write(f"Precision: {precision_micro}\n")
fp.write(f"Recall: {recall_micro}\n")
fp.write(f"F1-score: {f1_micro}\n")
fp.write(f"RMSE: {rmse}\n")

View File

@ -6,6 +6,7 @@ import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch.nn.functional as F
import sys
device = (
@ -30,6 +31,9 @@ class Model(nn.Module):
return x
def main():
epochs = int(sys.argv[1])
print(epochs)
forest_train = pd.read_csv('forest_train.csv')
forest_val = pd.read_csv('forest_val.csv')
@ -59,7 +63,6 @@ def main():
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
# Training loop
epochs = 10
for epoch in range(epochs):
model.train() # Set model to training mode
running_loss = 0.0

21
plot.py Normal file
View File

@ -0,0 +1,21 @@
import matplotlib.pyplot as plt
import numpy as np
accuracy = []
f = open("metrics.txt", "r")
for line in f:
parts = line.strip().split(' ')
if(parts[0] == 'Accuracy:'):
accuracy.append(float(parts[1]))
build_numbers = np.arange(1, len(accuracy) + 1)
plt.plot(build_numbers, accuracy, marker='o', linestyle='-', color='b')
plt.xlabel('Build Number')
plt.ylabel('Accuracy')
plt.title('Accuracy Plot')
plt.grid(True)
plt.show()
plt.savefig('accuracy.png')

View File

@ -6,6 +6,8 @@ import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
import numpy as np
device = (
"cuda"
@ -41,7 +43,6 @@ def predict(model, input_data):
return predicted_class.item() # Return the predicted class label
def main():
forest_test = pd.read_csv('forest_test.csv')
@ -55,15 +56,23 @@ def main():
load_model(model, model_path)
predictions = []
for input_data in X_test:
predicted_class = predict(model, input_data)
predictions.append(predicted_class)
correct = 0
total = 0
with torch.no_grad():
for input_data, target in zip(X_test, y_test):
output = model(input_data)
_, predicted_class = torch.max(output, 0)
prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
predictions.append(prediction_entry)
total += 1
if predicted_class.item() == target:
correct += 1
with open(r'predictions.txt', 'w') as fp:
for item in predictions:
# write each item on a new line
fp.write("%s\n" % item)
if __name__ == "__main__":
main()

116203
predictions.txt Normal file

File diff suppressed because it is too large Load Diff