cleanup of lab6 files
This commit is contained in:
parent
75d849741e
commit
509fd41c5f
@ -11,6 +11,6 @@ RUN pip3 install tensorflow
|
|||||||
RUN pip3 install matplotlib
|
RUN pip3 install matplotlib
|
||||||
RUN pip3 install keras
|
RUN pip3 install keras
|
||||||
|
|
||||||
|
COPY ./lego_sets.csv ./
|
||||||
|
COPY ./process_dataset.py ./
|
||||||
COPY ./simple_regression.py ./
|
COPY ./simple_regression.py ./
|
||||||
COPY ./evaluate.py ./
|
|
||||||
COPY ./plot.py ./
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent {
|
agent {
|
||||||
dockerfile true
|
dockerfile {
|
||||||
|
dir 'lab6'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
parameters {
|
parameters {
|
||||||
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'master', name: 'BRANCH', type: 'PT_BRANCH'
|
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'master', name: 'BRANCH', type: 'PT_BRANCH'
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent {
|
agent {
|
||||||
dockerfile true
|
dockerfile {
|
||||||
|
dir 'lab6'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
parameters {
|
parameters {
|
||||||
string(
|
string(
|
||||||
|
130196
lab6/lego_sets.csv
Executable file
130196
lab6/lego_sets.csv
Executable file
File diff suppressed because it is too large
Load Diff
30
lab6/process_dataset.py
Normal file
30
lab6/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8, random_state=1)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5, random_state=1)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
69
lab6/simple_regression.py
Normal file
69
lab6/simple_regression.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from keras import layers
|
||||||
|
from keras.models import save_model
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Pobranie przykładowego argumentu trenowania
|
||||||
|
EPOCHS_NUM = int(sys.argv[1])
|
||||||
|
|
||||||
|
# Wczytanie danych
|
||||||
|
data_train = pd.read_csv('lego_sets_clean_train.csv')
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
|
||||||
|
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
|
||||||
|
train_piece_counts = np.array(data_train['piece_count'])
|
||||||
|
train_prices = np.array(data_train['list_price'])
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Normalizacja
|
||||||
|
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
|
||||||
|
normalizer.adapt(train_piece_counts)
|
||||||
|
|
||||||
|
# Inicjalizacja
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
normalizer,
|
||||||
|
layers.Dense(units=1)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Kompilacja
|
||||||
|
model.compile(
|
||||||
|
optimizer=tf.optimizers.Adam(learning_rate=0.1),
|
||||||
|
loss='mean_absolute_error'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trening
|
||||||
|
history = model.fit(
|
||||||
|
train_piece_counts,
|
||||||
|
train_prices,
|
||||||
|
epochs=EPOCHS_NUM,
|
||||||
|
verbose=0,
|
||||||
|
validation_split=0.2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wykonanie predykcji na danych ze zbioru testującego
|
||||||
|
y_pred = model.predict(test_piece_counts)
|
||||||
|
|
||||||
|
# Zapis predykcji do pliku
|
||||||
|
results = pd.DataFrame({'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
|
||||||
|
results.to_csv('lego_reg_results.csv', index=False, header=True)
|
||||||
|
|
||||||
|
# Zapis modelu do pliku
|
||||||
|
model.save('lego_reg_model')
|
||||||
|
|
||||||
|
# Opcjonalne statystyki, wykresy
|
||||||
|
'''
|
||||||
|
hist = pd.DataFrame(history.history)
|
||||||
|
hist['epoch'] = history.epoch
|
||||||
|
print(hist.tail())
|
||||||
|
|
||||||
|
plt.scatter(train_piece_counts, train_prices, label='Data')
|
||||||
|
plt.plot(x, y_pred, color='k', label='Predictions')
|
||||||
|
plt.xlabel('pieces')
|
||||||
|
plt.ylabel('price')
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
'''
|
@ -13,6 +13,6 @@ RUN pip3 install keras
|
|||||||
RUN pip3 install sacred
|
RUN pip3 install sacred
|
||||||
RUN pip3 install pymongo
|
RUN pip3 install pymongo
|
||||||
|
|
||||||
|
COPY ./lego_sets.csv ./
|
||||||
|
COPY ./process_dataset.py ./
|
||||||
COPY ./simple_regression.py ./
|
COPY ./simple_regression.py ./
|
||||||
COPY ./evaluate.py ./
|
|
||||||
COPY ./plot.py ./
|
|
||||||
|
130196
lab7/lego_sets.csv
Executable file
130196
lab7/lego_sets.csv
Executable file
File diff suppressed because it is too large
Load Diff
30
lab7/process_dataset.py
Normal file
30
lab7/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8, random_state=1)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5, random_state=1)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
Loading…
Reference in New Issue
Block a user