IUM_05

2024-04-16 18:58:43 +02:00 · 2024-04-16 18:58:43 +02:00 · e067972a05
commit e067972a05
parent 50c62c859c
5 changed files with 87 additions and 15 deletions
--- a/7
+++ b/7
@ -10,12 +10,13 @@ ENV TZ=Etc/UTC
 RUN apt update && \
    apt install -y python3 python3-pip unzip
-RUN pip install kaggle pandas seaborn scikit-learn
+RUN pip install kaggle pandas seaborn scikit-learn tensorflow
 WORKDIR /app
-COPY dataset_stats.py /app/
+COPY IUM_05-model.py ./
-COPY IUM_02.py /app/
+COPY IUM_05-predict.py ./
 COPY IUM_05-split.py ./
 CMD ["python3", "IUM_02.py"]
--- a/IUM_05-model.py
+++ b/IUM_05-model.py
@ -0,0 +1,27 @@
 import pandas as pd
 import tensorflow as tf
 train_data = pd.read_csv('./beer_reviews_train.csv')
 X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
 y_train = train_data['review_overall']
 tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
 tokenizer.fit_on_texts(X_train)
 X_train_seq = tokenizer.texts_to_sequences(X_train)
 X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
 model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
 ])
 model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
 model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
 model.save('beer_review_sentiment_model.h5')
--- a/IUM_05-predict.py
+++ b/IUM_05-predict.py
@ -0,0 +1,18 @@
 import pandas as pd
 import numpy as np
 import tensorflow as tf
 test_data = pd.read_csv('./beer_reviews_test.csv')
 X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
 model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
 tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
 X_test_seq = tokenizer.texts_to_sequences(X_test)
 X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
 predictions = model.predict(X_test_pad)
 np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
--- a/IUM_05-split.py
+++ b/IUM_05-split.py
@ -0,0 +1,9 @@
 import pandas as pd
 from sklearn.model_selection import train_test_split
 data = pd.read_csv('./beer_reviews.csv')
 train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
 train_data.to_csv('beer_reviews_train.csv', index=False)
 test_data.to_csv('beer_reviews_test.csv', index=False)
--- a/41
+++ b/41
@ -1,5 +1,5 @@
 pipeline {
-    agent { dockerfile true }
+    agent any
    parameters {
        string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
@ -13,22 +13,39 @@ pipeline {
                git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
            }
        }
-
+        stage('Download dataset') {
        stage('Download, Process, and Split Dataset') {
            steps {
-                withEnv([
+                 withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
-                    "KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
+                    sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
                    "KAGGLE_KEY=${env.KAGGLE_KEY}"
                ]) {
                    sh 'export KAGGLE_USERNAME=${env.KAGGLE_USERNAME}"'
                    sh 'export KAGGLE_KEY=${env.KAGGLE_KEY}"'
                    sh "python3 IUM_02.py"
                }
            }
        }
-        stage('Archive Results') {
+        stage('Process and Split Dataset') {
            agent {
                dockerfile {
                    filename 'Dockerfile'
                    reuseNode true
                }
            }
            steps {
-                archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+                sh "chmod +x ./IUM_05-split.py"
                sh "python ./IUM_05-split.py"
                archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
            }
        }
        stage("Run") {
            agent {
                dockerfile {
                    filename 'Dockerfile'
                    reuseNode true
                }
            }
            steps {
                sh "chmod +x ./IUM_05-model.py"
                sh "chmod +x ./IUM_05-predict.py"
                sh "python ./IUM_05-model.py"
                sh "python ./IUM_05-predict.py"
                archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
            }
        }
    }