diff --git a/Jenkinsfile b/Jenkinsfile index cc731a5..fabcd48 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -31,8 +31,16 @@ node { "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { sh 'kaggle datasets download -d mssmartypants/water-quality > output.txt' sh 'unzip -o water-quality.zip >> output.txt' - sh 'head -n ${params.CUT} waterQuality1.csv > temp.csv' - sh 'mv temp.csv waterQuality1.csv' + def csvFile = new File('waterQuality1.csv') + def lines = csvFile.readLines() + def cut = ${params.CUT}.toInteger() + def randomLines = new Random().with { random -> + (0..lines.size() - 1).sort { random.nextInt() }[0..(cut - 1)].collect { lines[it] } + } + def tempFile = new File('temp.csv') + tempFile.write(randomLines.join('\n')) + csvFile.text = tempFile.text + tempFile.delete() sh 'python3 create_dataset.py >> output.txt' archiveArtifacts artifacts: 'waterQuality.csv, output.txt' }