update
This commit is contained in:
parent
8b54c42295
commit
2cf15fcc25
17
Dockerfile
17
Dockerfile
@ -2,15 +2,16 @@ FROM ubuntu:latest
|
|||||||
|
|
||||||
WORKDIR /ium
|
WORKDIR /ium
|
||||||
|
|
||||||
COPY ./skrypt.sh ./
|
RUN apt update && apt install -y python3-pip unzip
|
||||||
COPY ./statystyki.sh ./
|
|
||||||
|
|
||||||
RUN chmod u+x ./skrypt.sh ./statystyki.sh
|
RUN pip3 install --user kaggle
|
||||||
|
RUN echo "export PATH=\"\$HOME/.local/bin:\$PATH\"" >> ~/.bashrc
|
||||||
|
|
||||||
RUN apt update && apt install -y python3-pip zip
|
RUN pip3 install --user pandas
|
||||||
|
RUN pip3 install --user numpy
|
||||||
|
RUN pip3 install --user sklearn
|
||||||
|
|
||||||
RUN pip3 install kaggle
|
COPY ./download_dataset.sh ./
|
||||||
|
COPY ./process_data.py ./
|
||||||
|
|
||||||
RUN mkdir ~/.kaggle
|
RUN chmod u+x ./download_dataset.sh ./process_data.py
|
||||||
RUN echo "{\"username\":\"kaerde\",\"key\":\"9831adcf128300863404c16935674937\"}" > ~/.kaggle/kaggle.json
|
|
||||||
RUN echo "export PATH=\"\$HOME/.local/bin:\$PATH\"" >> ~/.bashrc
|
|
32
Jenkinsfile
vendored
Normal file
32
Jenkinsfile
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
pipeline {
|
||||||
|
parameters{
|
||||||
|
string(
|
||||||
|
defaultValue: 'kaerde',
|
||||||
|
description: 'Kaggle username',
|
||||||
|
name: 'KAGGLE_USERNAME',
|
||||||
|
trim: false
|
||||||
|
)
|
||||||
|
password(
|
||||||
|
defaultValue: 'e3fdd794699dbaf3ca7517dd8bb15d16',
|
||||||
|
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
||||||
|
name: 'KAGGLE_KEY'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Stage 1') {
|
||||||
|
steps {
|
||||||
|
echo 'Downloading dataset...'
|
||||||
|
sh './download_dataset.sh'
|
||||||
|
echo 'Dataset downloaded'
|
||||||
|
echo 'Processing dataset...'
|
||||||
|
sh 'python3 process_dataset.py'
|
||||||
|
echo 'Dataset processed'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
3
download_dataset.sh
Normal file
3
download_dataset.sh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
kaggle datasets download -d mterzolo/lego-sets
|
||||||
|
unzip -o lego-sets.zip
|
30
process_dataset.py
Normal file
30
process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8, random_state=1)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5, random_state=1)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
Loading…
Reference in New Issue
Block a user