diff --git a/Dockerfile b/Dockerfile index 4928c9a..d021e3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,10 @@ RUN apt update && apt install -y python3-pip RUN pip3 install pandas RUN pip3 install numpy RUN pip3 install sklearn +RUN pip3 install tensorflow +RUN pip3 install matplotlib +RUN pip3 install keras COPY ./lego_sets.csv ./ -COPY ./process_dataset.py ./ \ No newline at end of file +COPY ./process_dataset.py ./ +COPY ./simple_regression.py ./ diff --git a/Jenkinsfile b/Jenkinsfile index b939185..4998482 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,10 +5,13 @@ pipeline { stages { stage('Stage 1') { steps { - sh 'chmod u+x ./process_dataset.py' + sh 'chmod u+x ./process_dataset.py ./simple_regression.py' echo 'Processing dataset...' sh 'python3 process_dataset.py' echo 'Dataset processed' + echo 'Conducting simple regression model test' + sh 'python3 simple_regression.py' + echo 'Model predictions saved' } } } diff --git a/Jenkinsfile1 b/Jenkinsfile1 index ae4e9b6..f384553 100644 --- a/Jenkinsfile1 +++ b/Jenkinsfile1 @@ -5,10 +5,13 @@ pipeline { stages { stage('Stage 1') { steps { - sh 'chmod u+x ./process_dataset.py' + sh 'chmod u+x ./process_dataset.py ./simple_regression.py' echo 'Processing dataset...' sh 'python3 process_dataset.py' echo 'Dataset processed' + echo 'Conducting simple regression model test' + sh 'python3 simple_regression.py' + echo 'Model predictions saved' } } } diff --git a/simple_regression.py b/simple_regression.py new file mode 100644 index 0000000..8a45617 --- /dev/null +++ b/simple_regression.py @@ -0,0 +1,70 @@ +import tensorflow as tf +from keras import layers +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + +# Wczytanie danych +data_train = pd.read_csv('lego_sets_clean_train.csv') +data_test = pd.read_csv('lego_sets_clean_test.csv') + +# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera +train_piece_counts = np.array(data_train['piece_count']) +train_prices = np.array(data_train['list_price']) +test_piece_counts = np.array(data_test['piece_count']) +test_prices = np.array(data_test['list_price']) + +# Normalizacja +normalizer = layers.Normalization(input_shape=[1, ], axis=None) +normalizer.adapt(train_piece_counts) + +# Inicjalizacja +model = tf.keras.Sequential([ + normalizer, + layers.Dense(units=1) +]) + +# Kompilacja +model.compile( + optimizer=tf.optimizers.Adam(learning_rate=0.1), + loss='mean_absolute_error' +) + +# Trening +history = model.fit( + train_piece_counts, + train_prices, + epochs=100, + verbose=0, + validation_split=0.2 +) + +# Prosta ewaluacja +test_results = {'model': model.evaluate( + test_piece_counts, + test_prices, verbose=0) +} + +# Wykonanie wielu predykcji +x = tf.linspace(100, 7000, 6901) +y = model.predict(x) + +# Zapis predykcji do pliku +results = pd.DataFrame({"input_piece_count": x.numpy().tolist(), "predicted_price": [a[0] for a in y.tolist()]}) +results.to_csv(r'lego_linreg_results.csv', index=False, header=True) + +# Opcjonalne statystyki, wykresy +''' +print(test_results) + +hist = pd.DataFrame(history.history) +hist['epoch'] = history.epoch +print(hist.tail()) + +plt.scatter(train_piece_counts, train_prices, label='Data') +plt.plot(x, y, color='k', label='Predictions') +plt.xlabel('pieces') +plt.ylabel('price') +plt.legend() +plt.show() +'''