Folders clean-up

This commit is contained in:
MatOgr 2022-04-03 20:17:21 +02:00
parent e028156542
commit 70f369e725
4 changed files with 11 additions and 8 deletions

View File

@ -5,7 +5,7 @@ FROM ubuntu:latest
WORKDIR /app WORKDIR /app
# Install required dependencies # Install required dependencies
ADD . . COPY ./requirements.txt .
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y python3.8 python3-pip figlet unzip apt-get install -y python3.8 python3-pip figlet unzip
RUN pip3 install -r requirements.txt RUN pip3 install -r requirements.txt
@ -15,7 +15,7 @@ ARG KAGGLE_KEY
# Copy scripts to the catalog # Copy scripts to the catalog
COPY ./scripts/. / COPY ./scripts/. /
# COPY ./kaggle.json /root/.kaggle/kaggle.json COPY ./kaggle.json /root/.kaggle/kaggle.json
# Run the copied script # Run the copied script
RUN chmod +x /load_data.sh && /load_data.sh RUN chmod +x /load_data.sh && /load_data.sh

View File

@ -12,7 +12,7 @@ pipeline {
steps { steps {
sh 'chmod u+x ./scripts/data_stats.sh' sh 'chmod u+x ./scripts/data_stats.sh'
sh './scripts/data_stats.sh' sh './scripts/data_stats.sh'
archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false archiveArtifacts artifacts: '*/avocado.data*', followSymlinks: false
} }
} }
// stage('Archive arifacts') { // stage('Archive arifacts') {

View File

@ -2,9 +2,9 @@ import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.preprocessing import StandardScaler, MinMaxScaler
cols = list(pd.read_csv("avocado.csv", nrows=1)) cols = list(pd.read_csv("data/avocado.csv", nrows=1))
# print("###\n", cols, "\n###") # print("###\n", cols, "\n###")
avocados = pd.read_csv("avocado.csv", usecols=cols[1:]) avocados = pd.read_csv("data/avocado.csv", usecols=cols[1:])
avocados.describe(include="all") avocados.describe(include="all")
float_cols = ['AveragePrice','Total Volume','4046','4225','4770','Total Bags','Small Bags','Large Bags','XLarge Bags'] float_cols = ['AveragePrice','Total Volume','4046','4225','4770','Total Bags','Small Bags','Large Bags','XLarge Bags']
@ -22,6 +22,6 @@ print("Train\n", avocado_train.describe(include="all"), "\n")
print("Valid\n", avocado_valid.describe(include="all"), "\n") print("Valid\n", avocado_valid.describe(include="all"), "\n")
print("Test\n", avocado_test.describe(include="all")) print("Test\n", avocado_test.describe(include="all"))
avocado_train.to_csv("avocado.data.train", index=False) avocado_train.to_csv("data/avocado.data.train", index=False)
avocado_valid.to_csv("avocado.data.valid", index=False) avocado_valid.to_csv("data/avocado.data.valid", index=False)
avocado_test.to_csv("avocado.data.test", index=False) avocado_test.to_csv("data/avocado.data.test", index=False)

View File

@ -13,6 +13,9 @@ echo "Loading dataset..."
kaggle datasets download -d neuromusic/avocado-prices kaggle datasets download -d neuromusic/avocado-prices
echo "Extracting files from zip archive..." echo "Extracting files from zip archive..."
unzip -o avocado-prices.zip unzip -o avocado-prices.zip
rm avocado-prizes.zip
mkdir data
mv avocado.csv data/.
echo Done echo Done
# Dividing data # Dividing data
# echo "Start the data splitting..." # echo "Start the data splitting..."