Docker-jenkins loading data test
This commit is contained in:
parent
4af4543bb3
commit
c41f389915
18
Dockerfile
18
Dockerfile
@ -1,14 +1,22 @@
|
||||
# Inherit from some existing image
|
||||
FROM ubuntu:latest
|
||||
|
||||
# Install required dependencies
|
||||
RUN apt update && apt install -y figlet
|
||||
|
||||
# Create the /app directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install required dependencies
|
||||
ADD . .
|
||||
RUN apt-get update && \
|
||||
apt-get install -y python3.8 python3-pip figlet unzip
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
ARG KAGGLE_USERNAME
|
||||
ARG KAGGLE_KEY
|
||||
|
||||
# Copy scripts to the catalog
|
||||
COPY ./figlet-loop.sh ./
|
||||
COPY ./load_data.sh /
|
||||
# COPY ./kaggle.json /root/.kaggle/kaggle.json
|
||||
|
||||
# Run the copied script
|
||||
CMD ./figlet-loop.sh
|
||||
RUN chmod +x /load_data.sh
|
||||
RUN /load_data.sh
|
22
Jenkinsfile_docker
Normal file
22
Jenkinsfile_docker
Normal file
@ -0,0 +1,22 @@
|
||||
pipeline {
|
||||
// properties([[$class: 'GogsProjectProperty', gogsBranchFilter: '', gogsSecret: <object of type hudson.util.Secret>, gogsUsePayload: false], [$class: 'RebuildSettings', autoRebuild: false, rebuildDisabled: false], parameters([string('CUTOFF')]), pipelineTriggers([pollSCM('')])])
|
||||
|
||||
agent {
|
||||
dockerfile {
|
||||
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-args KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset"
|
||||
}
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('sh: Shell script') {
|
||||
steps {
|
||||
sh 'chmod u+x ./data_stats.sh'
|
||||
sh './data_stats.sh'
|
||||
}
|
||||
}
|
||||
stage('Archive arifacts') {
|
||||
archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
13
load_data.sh
13
load_data.sh
@ -1,13 +1,26 @@
|
||||
#!/bin/bash
|
||||
figlet "Welcome $KAGGLE_USERNAME"
|
||||
|
||||
# Clean the previous files
|
||||
rm -r avocado.data*
|
||||
echo "Removed previous data files"
|
||||
# Install kaggle and python modules
|
||||
# pip3 install --user kaggle
|
||||
# pip3 install --user pandas
|
||||
|
||||
# Download the data
|
||||
echo "Loading dataset..."
|
||||
kaggle datasets download -d neuromusic/avocado-prices
|
||||
echo "Extracting files from zip archive..."
|
||||
unzip -o avocado-prices.zip
|
||||
|
||||
# Dividing data
|
||||
echo "Start the data splitting..."
|
||||
tail -n +2 avocado.csv | shuf > avocado_shuf.csv
|
||||
head -n 14000 avocado_shuf.csv > avocado.data.train
|
||||
tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
|
||||
tail -n 2000 avocado_shuf.csv > avocado.data.test
|
||||
|
||||
# Saving simple stats in a text file
|
||||
echo "Getting simple stats..."
|
||||
wc -l avocado.data* > results.txt
|
Loading…
Reference in New Issue
Block a user