Docker-jenkins loading data test

This commit is contained in:
MatOgr 2022-04-03 11:34:18 +02:00
parent 4af4543bb3
commit c41f389915
3 changed files with 48 additions and 5 deletions

View File

@ -1,14 +1,22 @@
# Inherit from some existing image # Inherit from some existing image
FROM ubuntu:latest FROM ubuntu:latest
# Install required dependencies
RUN apt update && apt install -y figlet
# Create the /app directory # Create the /app directory
WORKDIR /app WORKDIR /app
# Install required dependencies
ADD . .
RUN apt-get update && \
apt-get install -y python3.8 python3-pip figlet unzip
RUN pip install -r requirements.txt
ARG KAGGLE_USERNAME
ARG KAGGLE_KEY
# Copy scripts to the catalog # Copy scripts to the catalog
COPY ./figlet-loop.sh ./ COPY ./load_data.sh /
# COPY ./kaggle.json /root/.kaggle/kaggle.json
# Run the copied script # Run the copied script
CMD ./figlet-loop.sh RUN chmod +x /load_data.sh
RUN /load_data.sh

22
Jenkinsfile_docker Normal file
View File

@ -0,0 +1,22 @@
pipeline {
// properties([[$class: 'GogsProjectProperty', gogsBranchFilter: '', gogsSecret: <object of type hudson.util.Secret>, gogsUsePayload: false], [$class: 'RebuildSettings', autoRebuild: false, rebuildDisabled: false], parameters([string('CUTOFF')]), pipelineTriggers([pollSCM('')])])
agent {
dockerfile {
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-args KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset"
}
}
stages {
stage('sh: Shell script') {
steps {
sh 'chmod u+x ./data_stats.sh'
sh './data_stats.sh'
}
}
stage('Archive arifacts') {
archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false
}
}
}

View File

@ -1,13 +1,26 @@
#!/bin/bash
figlet "Welcome $KAGGLE_USERNAME" figlet "Welcome $KAGGLE_USERNAME"
# Clean the previous files
rm -r avocado.data* rm -r avocado.data*
echo "Removed previous data files"
# Install kaggle and python modules
# pip3 install --user kaggle
# pip3 install --user pandas
# Download the data
echo "Loading dataset..."
kaggle datasets download -d neuromusic/avocado-prices kaggle datasets download -d neuromusic/avocado-prices
echo "Extracting files from zip archive..."
unzip -o avocado-prices.zip unzip -o avocado-prices.zip
# Dividing data
echo "Start the data splitting..."
tail -n +2 avocado.csv | shuf > avocado_shuf.csv tail -n +2 avocado.csv | shuf > avocado_shuf.csv
head -n 14000 avocado_shuf.csv > avocado.data.train head -n 14000 avocado_shuf.csv > avocado.data.train
tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
tail -n 2000 avocado_shuf.csv > avocado.data.test tail -n 2000 avocado_shuf.csv > avocado.data.test
# Saving simple stats in a text file
echo "Getting simple stats..."
wc -l avocado.data* > results.txt wc -l avocado.data* > results.txt