Folders clean-up
This commit is contained in:
parent
e028156542
commit
70f369e725
@ -5,7 +5,7 @@ FROM ubuntu:latest
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install required dependencies
|
# Install required dependencies
|
||||||
ADD . .
|
COPY ./requirements.txt .
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y python3.8 python3-pip figlet unzip
|
apt-get install -y python3.8 python3-pip figlet unzip
|
||||||
RUN pip3 install -r requirements.txt
|
RUN pip3 install -r requirements.txt
|
||||||
@ -15,7 +15,7 @@ ARG KAGGLE_KEY
|
|||||||
|
|
||||||
# Copy scripts to the catalog
|
# Copy scripts to the catalog
|
||||||
COPY ./scripts/. /
|
COPY ./scripts/. /
|
||||||
# COPY ./kaggle.json /root/.kaggle/kaggle.json
|
COPY ./kaggle.json /root/.kaggle/kaggle.json
|
||||||
|
|
||||||
# Run the copied script
|
# Run the copied script
|
||||||
RUN chmod +x /load_data.sh && /load_data.sh
|
RUN chmod +x /load_data.sh && /load_data.sh
|
||||||
|
@ -12,7 +12,7 @@ pipeline {
|
|||||||
steps {
|
steps {
|
||||||
sh 'chmod u+x ./scripts/data_stats.sh'
|
sh 'chmod u+x ./scripts/data_stats.sh'
|
||||||
sh './scripts/data_stats.sh'
|
sh './scripts/data_stats.sh'
|
||||||
archiveArtifacts artifacts: 'avocado.data*', followSymlinks: false
|
archiveArtifacts artifacts: '*/avocado.data*', followSymlinks: false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// stage('Archive arifacts') {
|
// stage('Archive arifacts') {
|
||||||
|
@ -2,9 +2,9 @@ import pandas as pd
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||||
|
|
||||||
cols = list(pd.read_csv("avocado.csv", nrows=1))
|
cols = list(pd.read_csv("data/avocado.csv", nrows=1))
|
||||||
# print("###\n", cols, "\n###")
|
# print("###\n", cols, "\n###")
|
||||||
avocados = pd.read_csv("avocado.csv", usecols=cols[1:])
|
avocados = pd.read_csv("data/avocado.csv", usecols=cols[1:])
|
||||||
avocados.describe(include="all")
|
avocados.describe(include="all")
|
||||||
|
|
||||||
float_cols = ['AveragePrice','Total Volume','4046','4225','4770','Total Bags','Small Bags','Large Bags','XLarge Bags']
|
float_cols = ['AveragePrice','Total Volume','4046','4225','4770','Total Bags','Small Bags','Large Bags','XLarge Bags']
|
||||||
@ -22,6 +22,6 @@ print("Train\n", avocado_train.describe(include="all"), "\n")
|
|||||||
print("Valid\n", avocado_valid.describe(include="all"), "\n")
|
print("Valid\n", avocado_valid.describe(include="all"), "\n")
|
||||||
print("Test\n", avocado_test.describe(include="all"))
|
print("Test\n", avocado_test.describe(include="all"))
|
||||||
|
|
||||||
avocado_train.to_csv("avocado.data.train", index=False)
|
avocado_train.to_csv("data/avocado.data.train", index=False)
|
||||||
avocado_valid.to_csv("avocado.data.valid", index=False)
|
avocado_valid.to_csv("data/avocado.data.valid", index=False)
|
||||||
avocado_test.to_csv("avocado.data.test", index=False)
|
avocado_test.to_csv("data/avocado.data.test", index=False)
|
||||||
|
@ -13,6 +13,9 @@ echo "Loading dataset..."
|
|||||||
kaggle datasets download -d neuromusic/avocado-prices
|
kaggle datasets download -d neuromusic/avocado-prices
|
||||||
echo "Extracting files from zip archive..."
|
echo "Extracting files from zip archive..."
|
||||||
unzip -o avocado-prices.zip
|
unzip -o avocado-prices.zip
|
||||||
|
rm avocado-prizes.zip
|
||||||
|
mkdir data
|
||||||
|
mv avocado.csv data/.
|
||||||
echo Done
|
echo Done
|
||||||
# Dividing data
|
# Dividing data
|
||||||
# echo "Start the data splitting..."
|
# echo "Start the data splitting..."
|
||||||
|
Loading…
Reference in New Issue
Block a user