diff --git a/Dockerfile b/Dockerfile index 1b53cbf..f2a4561 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,22 @@ # Inherit from some existing image FROM ubuntu:latest -# Install required dependencies -RUN apt update && apt install -y figlet - # Create the /app directory WORKDIR /app +# Install required dependencies +ADD . . +RUN apt-get update && \ + apt-get install -y python3.8 python3-pip figlet unzip +RUN pip install -r requirements.txt + +ARG KAGGLE_USERNAME +ARG KAGGLE_KEY + # Copy scripts to the catalog -COPY ./figlet-loop.sh ./ +COPY ./load_data.sh / +# COPY ./kaggle.json /root/.kaggle/kaggle.json # Run the copied script -CMD ./figlet-loop.sh \ No newline at end of file +RUN chmod +x /load_data.sh +RUN /load_data.sh \ No newline at end of file diff --git a/Jenkinsfile_docker b/Jenkinsfile_docker new file mode 100644 index 0000000..d269a49 --- /dev/null +++ b/Jenkinsfile_docker @@ -0,0 +1,22 @@ +pipeline { + // properties([[$class: 'GogsProjectProperty', gogsBranchFilter: '', gogsSecret: , gogsUsePayload: false], [$class: 'RebuildSettings', autoRebuild: false, rebuildDisabled: false], parameters([string('CUTOFF')]), pipelineTriggers([pollSCM('')])]) + + agent { + dockerfile { + additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-args KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset" + } + } + + stages { + stage('sh: Shell script') { + steps { + sh 'chmod u+x ./data_stats.sh' + sh './data_stats.sh' + } + } + stage('Archive arifacts') { + archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false + } + } +} + diff --git a/load_data.sh b/load_data.sh index dfeaf85..36d642b 100644 --- a/load_data.sh +++ b/load_data.sh @@ -1,13 +1,26 @@ +#!/bin/bash figlet "Welcome $KAGGLE_USERNAME" +# Clean the previous files rm -r avocado.data* +echo "Removed previous data files" +# Install kaggle and python modules +# pip3 install --user kaggle +# pip3 install --user pandas +# Download the data +echo "Loading dataset..." kaggle datasets download -d neuromusic/avocado-prices +echo "Extracting files from zip archive..." unzip -o avocado-prices.zip +# Dividing data +echo "Start the data splitting..." tail -n +2 avocado.csv | shuf > avocado_shuf.csv head -n 14000 avocado_shuf.csv > avocado.data.train tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid tail -n 2000 avocado_shuf.csv > avocado.data.test +# Saving simple stats in a text file +echo "Getting simple stats..." wc -l avocado.data* > results.txt \ No newline at end of file