Docker-jenkins loading data test
This commit is contained in:
parent
4af4543bb3
commit
c41f389915
18
Dockerfile
18
Dockerfile
@ -1,14 +1,22 @@
|
|||||||
# Inherit from some existing image
|
# Inherit from some existing image
|
||||||
FROM ubuntu:latest
|
FROM ubuntu:latest
|
||||||
|
|
||||||
# Install required dependencies
|
|
||||||
RUN apt update && apt install -y figlet
|
|
||||||
|
|
||||||
# Create the /app directory
|
# Create the /app directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install required dependencies
|
||||||
|
ADD . .
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y python3.8 python3-pip figlet unzip
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
ARG KAGGLE_USERNAME
|
||||||
|
ARG KAGGLE_KEY
|
||||||
|
|
||||||
# Copy scripts to the catalog
|
# Copy scripts to the catalog
|
||||||
COPY ./figlet-loop.sh ./
|
COPY ./load_data.sh /
|
||||||
|
# COPY ./kaggle.json /root/.kaggle/kaggle.json
|
||||||
|
|
||||||
# Run the copied script
|
# Run the copied script
|
||||||
CMD ./figlet-loop.sh
|
RUN chmod +x /load_data.sh
|
||||||
|
RUN /load_data.sh
|
22
Jenkinsfile_docker
Normal file
22
Jenkinsfile_docker
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
pipeline {
|
||||||
|
// properties([[$class: 'GogsProjectProperty', gogsBranchFilter: '', gogsSecret: <object of type hudson.util.Secret>, gogsUsePayload: false], [$class: 'RebuildSettings', autoRebuild: false, rebuildDisabled: false], parameters([string('CUTOFF')]), pipelineTriggers([pollSCM('')])])
|
||||||
|
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-args KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stages {
|
||||||
|
stage('sh: Shell script') {
|
||||||
|
steps {
|
||||||
|
sh 'chmod u+x ./data_stats.sh'
|
||||||
|
sh './data_stats.sh'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive arifacts') {
|
||||||
|
archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
13
load_data.sh
13
load_data.sh
@ -1,13 +1,26 @@
|
|||||||
|
#!/bin/bash
|
||||||
figlet "Welcome $KAGGLE_USERNAME"
|
figlet "Welcome $KAGGLE_USERNAME"
|
||||||
|
|
||||||
|
# Clean the previous files
|
||||||
rm -r avocado.data*
|
rm -r avocado.data*
|
||||||
|
echo "Removed previous data files"
|
||||||
|
# Install kaggle and python modules
|
||||||
|
# pip3 install --user kaggle
|
||||||
|
# pip3 install --user pandas
|
||||||
|
|
||||||
|
# Download the data
|
||||||
|
echo "Loading dataset..."
|
||||||
kaggle datasets download -d neuromusic/avocado-prices
|
kaggle datasets download -d neuromusic/avocado-prices
|
||||||
|
echo "Extracting files from zip archive..."
|
||||||
unzip -o avocado-prices.zip
|
unzip -o avocado-prices.zip
|
||||||
|
|
||||||
|
# Dividing data
|
||||||
|
echo "Start the data splitting..."
|
||||||
tail -n +2 avocado.csv | shuf > avocado_shuf.csv
|
tail -n +2 avocado.csv | shuf > avocado_shuf.csv
|
||||||
head -n 14000 avocado_shuf.csv > avocado.data.train
|
head -n 14000 avocado_shuf.csv > avocado.data.train
|
||||||
tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
|
tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
|
||||||
tail -n 2000 avocado_shuf.csv > avocado.data.test
|
tail -n 2000 avocado_shuf.csv > avocado.data.test
|
||||||
|
|
||||||
|
# Saving simple stats in a text file
|
||||||
|
echo "Getting simple stats..."
|
||||||
wc -l avocado.data* > results.txt
|
wc -l avocado.data* > results.txt
|
Loading…
Reference in New Issue
Block a user