This commit is contained in:
Jakub Zaręba 2023-05-10 15:44:36 +02:00
parent 5499b32f1f
commit 3f421a765b
5 changed files with 7 additions and 22 deletions

19
Jenkinsfile vendored
View File

@ -51,13 +51,11 @@ pipeline {
#!/bin/bash
pip install kaggle
kaggle datasets download -d nitishsharma01/olympics-124-years-datasettill-2020
unzip -o olympics-124-years-datasettill-2020.zip
git clone https://git.wmi.amu.edu.pl/s487187/ium_487187.git
echo "Processed Data" > output.txt
'''
sh "head -n ${params.CUTOFF} olympics-124-years-datasettill-2020/Athletes_summer_games.csv"
sh "head -n ${params.CUTOFF} data.csv"
}
} catch (err) {
error "Failed to build: ${err.message}"
@ -66,19 +64,6 @@ pipeline {
}
}
stage('Clone Git Repository') {
when { expression { params.KAGGLE_USERNAME && params.KAGGLE_KEY } }
steps {
script {
try {
git 'https://git.wmi.amu.edu.pl/s487187/ium_487187.git'
} catch (err) {
error "Failed to clone repository: ${err.message}"
}
}
}
}
stage('End') {
when { expression { params.KAGGLE_USERNAME && params.KAGGLE_KEY } }
steps {

View File

@ -47,7 +47,7 @@ pipeline {
sh '''
#!/bin/bash
python3 count_lines.py --input_file olympics-124-years-datasettill-2020/Athletes_summer_games.csv > output.txt
python3 count_lines.py --input_file olympics-124-years-datasettill-2020/Athletes_winter_games.csv > output.txt
'''
}
}

View File

@ -12,7 +12,7 @@ pipeline {
stage('Pobierz dane') {
steps {
script {
copyArtifacts(projectName: 's487187-create-dataset', filter: '*.csv', target: 'data', fingerprintArtifacts: true)
copyArtifacts(projectName: 's487187-create-dataset', filter: '*.csv', target: 'Athletes_winter_games.csv', fingerprintArtifacts: true)
}
}
}

View File

@ -4,7 +4,7 @@ from sklearn.preprocessing import MinMaxScaler
model = tf.keras.models.load_model('model.h5')
data = pd.read_csv('data.csv', sep=';')
data = pd.read_csv('Athletes_winter_games.csv', sep=';')
data = pd.get_dummies(data, columns=['Sex', 'Medal'])
data = data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event'])

View File

@ -5,7 +5,7 @@ import tensorflow as tf
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
data = pd.read_csv('data.csv', sep=';')
data = pd.read_csv('Athletes_winter_games.csv', sep=';')
print('Total rows:', len(data))
print('Rows with medal:', len(data.dropna(subset=['Medal'])))