cw5

2022-04-08 18:16:40 +02:00 · 2022-04-08 18:16:40 +02:00 · b534c4a05d
commit b534c4a05d
parent 7dde85eca0
4 changed files with 83 additions and 3 deletions
--- a/4
+++ b/4
@ -7,6 +7,10 @@ RUN apt update && apt install -y python3-pip
 RUN pip3 install pandas
 RUN pip3 install numpy
 RUN pip3 install sklearn
 RUN pip3 install tensorflow
 RUN pip3 install matplotlib
 RUN pip3 install keras
 COPY ./lego_sets.csv ./
 COPY ./process_dataset.py ./
 COPY ./simple_regression.py ./
--- a/5
+++ b/5
@ -5,10 +5,13 @@ pipeline {
    stages {
        stage('Stage 1') {
            steps {
-				    sh 'chmod u+x ./process_dataset.py'
+				    sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
                    echo 'Processing dataset...'
                    sh 'python3 process_dataset.py'
 					echo 'Dataset processed'
 					echo 'Conducting simple regression model test'
 					sh 'python3 simple_regression.py'
 					echo 'Model predictions saved'
 				}
            }
        }		
--- a/5
+++ b/5
@ -5,10 +5,13 @@ pipeline {
    stages {
        stage('Stage 1') {
            steps {
-				    sh 'chmod u+x ./process_dataset.py'
+				    sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
                    echo 'Processing dataset...'
                    sh 'python3 process_dataset.py'
 					echo 'Dataset processed'
 					echo 'Conducting simple regression model test'
 					sh 'python3 simple_regression.py'
 					echo 'Model predictions saved'
 				}
            }
        }		
--- a/simple_regression.py
+++ b/simple_regression.py
@ -0,0 +1,70 @@
 import tensorflow as tf
 from keras import layers
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 # Wczytanie danych
 data_train = pd.read_csv('lego_sets_clean_train.csv')
 data_test = pd.read_csv('lego_sets_clean_test.csv')
 # Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
 train_piece_counts = np.array(data_train['piece_count'])
 train_prices = np.array(data_train['list_price'])
 test_piece_counts = np.array(data_test['piece_count'])
 test_prices = np.array(data_test['list_price'])
 # Normalizacja
 normalizer = layers.Normalization(input_shape=[1, ], axis=None)
 normalizer.adapt(train_piece_counts)
 # Inicjalizacja
 model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
 ])
 # Kompilacja
 model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error'
 )
 # Trening
 history = model.fit(
    train_piece_counts,
    train_prices,
    epochs=100,
    verbose=0,
    validation_split=0.2
 )
 # Prosta ewaluacja
 test_results = {'model': model.evaluate(
    test_piece_counts,
    test_prices, verbose=0)
 }
 # Wykonanie wielu predykcji
 x = tf.linspace(100, 7000, 6901)
 y = model.predict(x)
 # Zapis predykcji do pliku
 results = pd.DataFrame({"input_piece_count": x.numpy().tolist(), "predicted_price": [a[0] for a in y.tolist()]})
 results.to_csv(r'lego_linreg_results.csv', index=False, header=True)
 # Opcjonalne statystyki, wykresy
 '''
 print(test_results)
 hist = pd.DataFrame(history.history)
 hist['epoch'] = history.epoch
 print(hist.tail())
 plt.scatter(train_piece_counts, train_prices, label='Data')
 plt.plot(x, y, color='k', label='Predictions')
 plt.xlabel('pieces')
 plt.ylabel('price')
 plt.legend()
 plt.show()
 '''