IUM_05

2024-04-16 18:58:43 +02:00 · 2024-04-16 18:58:43 +02:00 · e067972a05
commit e067972a05
parent 50c62c859c
5 changed files with 87 additions and 15 deletions
--- a/7
+++ b/7
@ -10,12 +10,13 @@ ENV TZ=Etc/UTC
 RUN apt update && \
    apt install -y python3 python3-pip unzip

-RUN pip install kaggle pandas seaborn scikit-learn
+RUN pip install kaggle pandas seaborn scikit-learn tensorflow

 WORKDIR /app

-COPY dataset_stats.py /app/
-COPY IUM_02.py /app/
+COPY IUM_05-model.py ./
+COPY IUM_05-predict.py ./
+COPY IUM_05-split.py ./

 CMD ["python3", "IUM_02.py"]

--- a/IUM_05-model.py
+++ b/IUM_05-model.py
@ -0,0 +1,27 @@
+import pandas as pd
+import tensorflow as tf
+
+train_data = pd.read_csv('./beer_reviews_train.csv')
+X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
+y_train = train_data['review_overall']
+
+tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
+tokenizer.fit_on_texts(X_train)
+X_train_seq = tokenizer.texts_to_sequences(X_train)
+
+X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
+
+model = tf.keras.Sequential([
+    tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
+    tf.keras.layers.GlobalAveragePooling1D(),
+    tf.keras.layers.Dense(16, activation='relu'),
+    tf.keras.layers.Dense(1, activation='sigmoid')
+])
+
+model.compile(optimizer='adam',
+              loss='binary_crossentropy',
+              metrics=['accuracy'])
+
+model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
+
+model.save('beer_review_sentiment_model.h5')
--- a/IUM_05-predict.py
+++ b/IUM_05-predict.py
@ -0,0 +1,18 @@
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+
+test_data = pd.read_csv('./beer_reviews_test.csv')
+X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
+
+model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
+
+tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
+
+
+X_test_seq = tokenizer.texts_to_sequences(X_test)
+X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
+
+predictions = model.predict(X_test_pad)
+
+np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
--- a/IUM_05-split.py
+++ b/IUM_05-split.py
@ -0,0 +1,9 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+data = pd.read_csv('./beer_reviews.csv')
+
+train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
+
+train_data.to_csv('beer_reviews_train.csv', index=False)
+test_data.to_csv('beer_reviews_test.csv', index=False)
--- a/41
+++ b/41
@ -1,5 +1,5 @@
 pipeline {
-    agent { dockerfile true }
+    agent any

    parameters {
        string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
@ -13,22 +13,39 @@ pipeline {
                git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
            }
        }
-
-        stage('Download, Process, and Split Dataset') {
+        stage('Download dataset') {
            steps {
-                withEnv([
-                    "KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
-                    "KAGGLE_KEY=${env.KAGGLE_KEY}"
-                ]) {
-                    sh 'export KAGGLE_USERNAME=${env.KAGGLE_USERNAME}"'
-                    sh 'export KAGGLE_KEY=${env.KAGGLE_KEY}"'
-                    sh "python3 IUM_02.py"
+                 withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
+                    sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
                }
            }
        }
-        stage('Archive Results') {
+        stage('Process and Split Dataset') {
+            agent {
+                dockerfile {
+                    filename 'Dockerfile'
+                    reuseNode true
+                }
+            }
            steps {
-                archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+                sh "chmod +x ./IUM_05-split.py"
+                sh "python ./IUM_05-split.py"
+                archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
+            }
+        }
+        stage("Run") {
+            agent {
+                dockerfile {
+                    filename 'Dockerfile'
+                    reuseNode true
+                }
+            }
+            steps {
+                sh "chmod +x ./IUM_05-model.py"
+                sh "chmod +x ./IUM_05-predict.py"
+                sh "python ./IUM_05-model.py"
+                sh "python ./IUM_05-predict.py"
+                archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
            }
        }
    }