Docker-jenkins loading data test

2022-04-03 11:34:18 +02:00 · 2022-04-03 11:34:18 +02:00 · c41f389915
commit c41f389915
parent 4af4543bb3
3 changed files with 48 additions and 5 deletions
--- a/18
+++ b/18
@ -1,14 +1,22 @@
 # Inherit from some existing image
 FROM ubuntu:latest

-# Install required dependencies
-RUN apt update && apt install -y figlet
-
 # Create the /app directory
 WORKDIR /app

+# Install required dependencies
+ADD . .
+RUN apt-get update && \
+    apt-get install -y python3.8 python3-pip figlet unzip
+RUN pip install -r requirements.txt
+
+ARG KAGGLE_USERNAME
+ARG KAGGLE_KEY
+
 # Copy scripts to the catalog
-COPY ./figlet-loop.sh ./
+COPY ./load_data.sh /
+# COPY ./kaggle.json /root/.kaggle/kaggle.json

 # Run the copied script
-CMD ./figlet-loop.sh
+RUN chmod +x /load_data.sh
+RUN /load_data.sh
--- a/22
+++ b/22
@ -0,0 +1,22 @@
+pipeline {
+    // properties([[$class: 'GogsProjectProperty', gogsBranchFilter: '', gogsSecret: <object of type hudson.util.Secret>, gogsUsePayload: false], [$class: 'RebuildSettings', autoRebuild: false, rebuildDisabled: false], parameters([string('CUTOFF')]), pipelineTriggers([pollSCM('')])])
+
+    agent {
+        dockerfile {
+            additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-args KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset"
+        }
+    }
+
+    stages {
+        stage('sh: Shell script') {
+            steps {
+                sh 'chmod u+x ./data_stats.sh'
+                sh './data_stats.sh'
+            }
+        }
+        stage('Archive arifacts') {
+            archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false
+        }
+    }
+}
+
--- a/load_data.sh
+++ b/load_data.sh
@ -1,13 +1,26 @@
+#!/bin/bash
 figlet "Welcome $KAGGLE_USERNAME"

+# Clean the previous files
 rm -r avocado.data*
+echo "Removed previous data files"
+# Install kaggle and python modules
+# pip3 install --user kaggle
+# pip3 install --user pandas

+# Download the data 
+echo "Loading dataset..."
 kaggle datasets download -d neuromusic/avocado-prices
+echo "Extracting files from zip archive..."
 unzip -o avocado-prices.zip

+# Dividing data 
+echo "Start the data splitting..."
 tail -n +2 avocado.csv | shuf > avocado_shuf.csv
 head -n 14000 avocado_shuf.csv > avocado.data.train
 tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
 tail -n 2000 avocado_shuf.csv > avocado.data.test

+# Saving simple stats in a text file 
+echo "Getting simple stats..."
 wc -l avocado.data* > results.txt