Added new jenkinsfile and kuggle download script

This commit is contained in:
AWieczarek 2024-03-24 14:39:33 +01:00
parent de92a2cf3b
commit 4a08061465
8 changed files with 99 additions and 28 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

8
.idea/ium_464979.iml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ium_464979.iml" filepath="$PROJECT_DIR$/.idea/ium_464979.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

60
Jenkinsfile vendored
View File

@ -1,29 +1,33 @@
pipeline {
agent any
//Definijuemy parametry, kt<6B>re b<>dzie mo<6D>na poda<64> podczas wywo<77>ywania zadania
parameters {
string (
defaultValue: 'Hello World!',
description: 'Tekst, kt<6B>rym chcesz przywita<74> <20>wiat',
name: 'INPUT_TEXT',
trim: false
)
}
stages {
stage('Hello') {
steps {
//Wypisz warto<74><6F> parametru w konsoli (To nie jest polecenie bash, tylko groovy!)
echo "INPUT_TEXT: promoscan"
//Wywo<77>aj w konsoli komend<6E> "figlet", kt<6B>ra generuje ASCI-art
sh "figlet \"promoscan\" | tee output.txt"
}
}
stage('Goodbye!') {
steps {
echo 'Goodbye!'
//Zarchiwizuj wynik
archiveArtifacts 'output.txt'
}
}
}
}
agent any
parameters {
properties([
parameters([
string(name: 'KAGGLE_DATASET_ID', defaultValue: '', description: 'Kaggle dataset'),
string(name: 'REPO_URL', defaultValue: '', description: 'Git Url'),
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
])
])
}
stages {
stage('Clone Repository') {
steps {
git url: "${params.REPO_URL}"
}
}
stage('Download, Process, and Split Dataset') {
steps {
withEnv([
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
"KAGGLE_KEY=${env.KAGGLE_KEY}"
]) {
sh "bash ./kuggle_download.sh ${params.KAGGLE_DATASET_ID} ${params.CUTOFF}"
sh './kuggle_download.sh'
}
}
}
}
}

27
kuggle_download.sh Normal file
View File

@ -0,0 +1,27 @@
#!/bin/bash
pip install kaggle
kaggle datasets download -d $DATASET_ID
unzip -o $DATASET_FILE
shuf $DATASET_FILE > shuffled_dataset.csv
split -l 80000 shuffled_dataset.csv train.csv
split -l 10000 train.csv dev.csv
mv shuffled_dataset.csv test.csv
head -n 1000 train.csv > train_head.csv
tail -n 1000 train.csv > train_tail.csv
if [ -n "$CUTOFF" ]; then
head -n "$CUTOFF" train.csv > train_cutoff.csv
fi
tar -czf artifacts.tar.gz train.csv dev.csv test.csv train_head.csv train_tail.csv train_cutoff.csv
rm $DATASET_FILE shuffled_dataset.csv
echo "artifacts.tar.gz"