Docker-jenkins loading data test

2022-04-03 11:34:18 +02:00 · 2022-04-03 11:34:18 +02:00 · c41f389915
commit c41f389915
parent 4af4543bb3
3 changed files with 48 additions and 5 deletions
--- a/18
+++ b/18
@ -1,14 +1,22 @@
 # Inherit from some existing image
 FROM ubuntu:latest
 # Install required dependencies
 RUN apt update && apt install -y figlet
 # Create the /app directory
 WORKDIR /app
 # Install required dependencies
 ADD . .
 RUN apt-get update && \
    apt-get install -y python3.8 python3-pip figlet unzip
 RUN pip install -r requirements.txt
 ARG KAGGLE_USERNAME
 ARG KAGGLE_KEY
 # Copy scripts to the catalog
-COPY ./figlet-loop.sh ./
+COPY ./load_data.sh /
 # COPY ./kaggle.json /root/.kaggle/kaggle.json
 # Run the copied script
-CMD ./figlet-loop.sh
+RUN chmod +x /load_data.sh
 RUN /load_data.sh
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 pipeline {
    // properties([[$class: 'GogsProjectProperty', gogsBranchFilter: '', gogsSecret: <object of type hudson.util.Secret>, gogsUsePayload: false], [$class: 'RebuildSettings', autoRebuild: false, rebuildDisabled: false], parameters([string('CUTOFF')]), pipelineTriggers([pollSCM('')])])
    agent {
        dockerfile {
            additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-args KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset"
        }
    }
    stages {
        stage('sh: Shell script') {
            steps {
                sh 'chmod u+x ./data_stats.sh'
                sh './data_stats.sh'
            }
        }
        stage('Archive arifacts') {
            archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false
        }
    }
 }
--- a/load_data.sh
+++ b/load_data.sh
@ -1,13 +1,26 @@
 #!/bin/bash
 figlet "Welcome $KAGGLE_USERNAME"
 # Clean the previous files
 rm -r avocado.data*
 echo "Removed previous data files"
 # Install kaggle and python modules
 # pip3 install --user kaggle
 # pip3 install --user pandas
 # Download the data 
 echo "Loading dataset..."
 kaggle datasets download -d neuromusic/avocado-prices
 echo "Extracting files from zip archive..."
 unzip -o avocado-prices.zip
 # Dividing data 
 echo "Start the data splitting..."
 tail -n +2 avocado.csv | shuf > avocado_shuf.csv
 head -n 14000 avocado_shuf.csv > avocado.data.train
 tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
 tail -n 2000 avocado_shuf.csv > avocado.data.test
 # Saving simple stats in a text file 
 echo "Getting simple stats..."
 wc -l avocado.data* > results.txt