This commit is contained in:
Kacper 2022-04-08 18:16:40 +02:00
parent 7dde85eca0
commit b534c4a05d
4 changed files with 83 additions and 3 deletions

View File

@ -7,6 +7,10 @@ RUN apt update && apt install -y python3-pip
RUN pip3 install pandas
RUN pip3 install numpy
RUN pip3 install sklearn
RUN pip3 install tensorflow
RUN pip3 install matplotlib
RUN pip3 install keras
COPY ./lego_sets.csv ./
COPY ./process_dataset.py ./
COPY ./simple_regression.py ./

5
Jenkinsfile vendored
View File

@ -5,10 +5,13 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./process_dataset.py'
sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
echo 'Processing dataset...'
sh 'python3 process_dataset.py'
echo 'Dataset processed'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py'
echo 'Model predictions saved'
}
}
}

View File

@ -5,10 +5,13 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./process_dataset.py'
sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
echo 'Processing dataset...'
sh 'python3 process_dataset.py'
echo 'Dataset processed'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py'
echo 'Model predictions saved'
}
}
}

70
simple_regression.py Normal file
View File

@ -0,0 +1,70 @@
import tensorflow as tf
from keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Wczytanie danych
data_train = pd.read_csv('lego_sets_clean_train.csv')
data_test = pd.read_csv('lego_sets_clean_test.csv')
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
train_piece_counts = np.array(data_train['piece_count'])
train_prices = np.array(data_train['list_price'])
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Normalizacja
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
normalizer.adapt(train_piece_counts)
# Inicjalizacja
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=1)
])
# Kompilacja
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error'
)
# Trening
history = model.fit(
train_piece_counts,
train_prices,
epochs=100,
verbose=0,
validation_split=0.2
)
# Prosta ewaluacja
test_results = {'model': model.evaluate(
test_piece_counts,
test_prices, verbose=0)
}
# Wykonanie wielu predykcji
x = tf.linspace(100, 7000, 6901)
y = model.predict(x)
# Zapis predykcji do pliku
results = pd.DataFrame({"input_piece_count": x.numpy().tolist(), "predicted_price": [a[0] for a in y.tolist()]})
results.to_csv(r'lego_linreg_results.csv', index=False, header=True)
# Opcjonalne statystyki, wykresy
'''
print(test_results)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
plt.scatter(train_piece_counts, train_prices, label='Data')
plt.plot(x, y, color='k', label='Predictions')
plt.xlabel('pieces')
plt.ylabel('price')
plt.legend()
plt.show()
'''