separate repo for lab4
This commit is contained in:
parent
fb5d31d05e
commit
5f70d65a4d
12
lab4/Dockerfile
Normal file
12
lab4/Dockerfile
Normal file
@ -0,0 +1,12 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
WORKDIR /ium
|
||||
|
||||
RUN apt update && apt install -y python3-pip
|
||||
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install numpy
|
||||
RUN pip3 install sklearn
|
||||
|
||||
COPY ./lego_sets.csv ./
|
||||
COPY ./process_dataset.py ./
|
15
lab4/Jenkinsfile
vendored
Normal file
15
lab4/Jenkinsfile
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
pipeline {
|
||||
agent {
|
||||
dockerfile true
|
||||
}
|
||||
stages {
|
||||
stage('Stage 1') {
|
||||
steps {
|
||||
sh 'chmod u+x ./process_dataset.py'
|
||||
echo 'Processing dataset...'
|
||||
sh 'python3 process_dataset.py'
|
||||
echo 'Dataset processed'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
lab4/Jenkinsfile1
Normal file
15
lab4/Jenkinsfile1
Normal file
@ -0,0 +1,15 @@
|
||||
pipeline {
|
||||
agent {
|
||||
docker { image 's449288/ium:2.0' }
|
||||
}
|
||||
stages {
|
||||
stage('Stage 1') {
|
||||
steps {
|
||||
sh 'chmod u+x ./process_dataset.py'
|
||||
echo 'Processing dataset...'
|
||||
sh 'python3 process_dataset.py'
|
||||
echo 'Dataset processed'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
130196
lab4/lego_sets.csv
Executable file
130196
lab4/lego_sets.csv
Executable file
File diff suppressed because it is too large
Load Diff
30
lab4/process_dataset.py
Normal file
30
lab4/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# usuwamy przy okazji puste pola
|
||||
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||
|
||||
# list_price moze byc do dwoch miejsc po przecinku
|
||||
lego['list_price'] = lego['list_price'].round(2)
|
||||
|
||||
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||
|
||||
# wglad, statystyki
|
||||
print(lego)
|
||||
print(lego.describe(include='all'))
|
||||
|
||||
# pierwszy podzial, wydzielamy zbior treningowy
|
||||
lego_train, lego_rem = train_test_split(lego, train_size=0.8, random_state=1)
|
||||
|
||||
# drugi podział, wydzielamy walidacyjny i testowy
|
||||
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5, random_state=1)
|
||||
|
||||
# zapis
|
||||
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
Loading…
Reference in New Issue
Block a user