This commit is contained in:
Kacper 2022-04-08 18:16:40 +02:00
parent 7dde85eca0
commit b534c4a05d
4 changed files with 83 additions and 3 deletions

View File

@ -7,6 +7,10 @@ RUN apt update && apt install -y python3-pip
RUN pip3 install pandas RUN pip3 install pandas
RUN pip3 install numpy RUN pip3 install numpy
RUN pip3 install sklearn RUN pip3 install sklearn
RUN pip3 install tensorflow
RUN pip3 install matplotlib
RUN pip3 install keras
COPY ./lego_sets.csv ./ COPY ./lego_sets.csv ./
COPY ./process_dataset.py ./ COPY ./process_dataset.py ./
COPY ./simple_regression.py ./

5
Jenkinsfile vendored
View File

@ -5,10 +5,13 @@ pipeline {
stages { stages {
stage('Stage 1') { stage('Stage 1') {
steps { steps {
sh 'chmod u+x ./process_dataset.py' sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
echo 'Processing dataset...' echo 'Processing dataset...'
sh 'python3 process_dataset.py' sh 'python3 process_dataset.py'
echo 'Dataset processed' echo 'Dataset processed'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py'
echo 'Model predictions saved'
} }
} }
} }

View File

@ -5,10 +5,13 @@ pipeline {
stages { stages {
stage('Stage 1') { stage('Stage 1') {
steps { steps {
sh 'chmod u+x ./process_dataset.py' sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
echo 'Processing dataset...' echo 'Processing dataset...'
sh 'python3 process_dataset.py' sh 'python3 process_dataset.py'
echo 'Dataset processed' echo 'Dataset processed'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py'
echo 'Model predictions saved'
} }
} }
} }

70
simple_regression.py Normal file
View File

@ -0,0 +1,70 @@
import tensorflow as tf
from keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Wczytanie danych
data_train = pd.read_csv('lego_sets_clean_train.csv')
data_test = pd.read_csv('lego_sets_clean_test.csv')
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
train_piece_counts = np.array(data_train['piece_count'])
train_prices = np.array(data_train['list_price'])
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Normalizacja
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
normalizer.adapt(train_piece_counts)
# Inicjalizacja
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=1)
])
# Kompilacja
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error'
)
# Trening
history = model.fit(
train_piece_counts,
train_prices,
epochs=100,
verbose=0,
validation_split=0.2
)
# Prosta ewaluacja
test_results = {'model': model.evaluate(
test_piece_counts,
test_prices, verbose=0)
}
# Wykonanie wielu predykcji
x = tf.linspace(100, 7000, 6901)
y = model.predict(x)
# Zapis predykcji do pliku
results = pd.DataFrame({"input_piece_count": x.numpy().tolist(), "predicted_price": [a[0] for a in y.tolist()]})
results.to_csv(r'lego_linreg_results.csv', index=False, header=True)
# Opcjonalne statystyki, wykresy
'''
print(test_results)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
plt.scatter(train_piece_counts, train_prices, label='Data')
plt.plot(x, y, color='k', label='Predictions')
plt.xlabel('pieces')
plt.ylabel('price')
plt.legend()
plt.show()
'''