Zad 04.Jenkins - Konteneryzacja
This commit is contained in:
parent
ea5f7a6a04
commit
924738d4b3
19
Dockerfile
Normal file
19
Dockerfile
Normal file
@ -0,0 +1,19 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3-pip
|
||||
RUN apt install -y unzip
|
||||
RUN pip install --user kaggle pandas seaborn sklearn
|
||||
|
||||
RUN mkdir ~/.kaggle/
|
||||
RUN echo '{"username":"ikami1","key":"c70ff184133bfabb351608b128e76cd2"}' > ~/.kaggle/kaggle.json
|
||||
|
||||
WORKDIR /ium
|
||||
|
||||
#COPY ./download_dataset.sh ./
|
||||
COPY ./Steel_industry_data.csv ./
|
||||
COPY ./process_dataset.py ./
|
||||
#COPY ./stats.sh ./
|
||||
|
||||
#CMD ./download_dataset.sh
|
||||
CMD python3 process_dataset.py
|
||||
#CMD ./stats.sh
|
2
download_dataset.sh
Normal file
2
download_dataset.sh
Normal file
@ -0,0 +1,2 @@
|
||||
kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force
|
||||
unzip -o -j steel-industry-energy-consumption.zip
|
25
process_dataset.py
Normal file
25
process_dataset.py
Normal file
@ -0,0 +1,25 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
energy_data = pd.read_csv('Steel_industry_data.csv')
|
||||
|
||||
train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)
|
||||
test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)
|
||||
|
||||
# stats
|
||||
print(energy_data.describe(include='all'))
|
||||
|
||||
print('Training set size:')
|
||||
print(train_data.shape)
|
||||
print('Testing set size:')
|
||||
print(test_data.shape)
|
||||
print('Dev set size:')
|
||||
print(dev_data.shape)
|
||||
|
||||
#print(train_data.describe(include='all'))
|
||||
#print(test_data.describe(include='all'))
|
||||
#print(dev_data.describe(include='all'))
|
||||
|
||||
test_data.to_csv("steel_industry_data_test.csv", encoding="utf-8", index=False)
|
||||
dev_data.to_csv("steel_industry_data_dev.csv", encoding="utf-8", index=False)
|
||||
train_data.to_csv("steel_industry_data_train.csv", encoding="utf-8", index=False)
|
Loading…
Reference in New Issue
Block a user