cw5

2022-04-08 18:16:40 +02:00 · 2022-04-08 18:16:40 +02:00 · b534c4a05d
commit b534c4a05d
parent 7dde85eca0
4 changed files with 83 additions and 3 deletions
--- a/6
+++ b/6
@ -7,6 +7,10 @@ RUN apt update && apt install -y python3-pip
 RUN pip3 install pandas
 RUN pip3 install numpy
 RUN pip3 install sklearn
+RUN pip3 install tensorflow
+RUN pip3 install matplotlib
+RUN pip3 install keras

 COPY ./lego_sets.csv ./
-COPY ./process_dataset.py ./
+COPY ./process_dataset.py ./
+COPY ./simple_regression.py ./
--- a/5
+++ b/5
@ -5,10 +5,13 @@ pipeline {
    stages {
        stage('Stage 1') {
            steps {
-				    sh 'chmod u+x ./process_dataset.py'
+				    sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
                    echo 'Processing dataset...'
                    sh 'python3 process_dataset.py'
 					echo 'Dataset processed'
+					echo 'Conducting simple regression model test'
+					sh 'python3 simple_regression.py'
+					echo 'Model predictions saved'
 				}
            }
        }		
--- a/5
+++ b/5
@ -5,10 +5,13 @@ pipeline {
    stages {
        stage('Stage 1') {
            steps {
-				    sh 'chmod u+x ./process_dataset.py'
+				    sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
                    echo 'Processing dataset...'
                    sh 'python3 process_dataset.py'
 					echo 'Dataset processed'
+					echo 'Conducting simple regression model test'
+					sh 'python3 simple_regression.py'
+					echo 'Model predictions saved'
 				}
            }
        }		
--- a/simple_regression.py
+++ b/simple_regression.py
@ -0,0 +1,70 @@
+import tensorflow as tf
+from keras import layers
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Wczytanie danych
+data_train = pd.read_csv('lego_sets_clean_train.csv')
+data_test = pd.read_csv('lego_sets_clean_test.csv')
+
+# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
+train_piece_counts = np.array(data_train['piece_count'])
+train_prices = np.array(data_train['list_price'])
+test_piece_counts = np.array(data_test['piece_count'])
+test_prices = np.array(data_test['list_price'])
+
+# Normalizacja
+normalizer = layers.Normalization(input_shape=[1, ], axis=None)
+normalizer.adapt(train_piece_counts)
+
+# Inicjalizacja
+model = tf.keras.Sequential([
+    normalizer,
+    layers.Dense(units=1)
+])
+
+# Kompilacja
+model.compile(
+    optimizer=tf.optimizers.Adam(learning_rate=0.1),
+    loss='mean_absolute_error'
+)
+
+# Trening
+history = model.fit(
+    train_piece_counts,
+    train_prices,
+    epochs=100,
+    verbose=0,
+    validation_split=0.2
+)
+
+# Prosta ewaluacja
+test_results = {'model': model.evaluate(
+    test_piece_counts,
+    test_prices, verbose=0)
+}
+
+# Wykonanie wielu predykcji
+x = tf.linspace(100, 7000, 6901)
+y = model.predict(x)
+
+# Zapis predykcji do pliku
+results = pd.DataFrame({"input_piece_count": x.numpy().tolist(), "predicted_price": [a[0] for a in y.tolist()]})
+results.to_csv(r'lego_linreg_results.csv', index=False, header=True)
+
+# Opcjonalne statystyki, wykresy
+'''
+print(test_results)
+
+hist = pd.DataFrame(history.history)
+hist['epoch'] = history.epoch
+print(hist.tail())
+
+plt.scatter(train_piece_counts, train_prices, label='Data')
+plt.plot(x, y, color='k', label='Predictions')
+plt.xlabel('pieces')
+plt.ylabel('price')
+plt.legend()
+plt.show()
+'''