Add sacred

2021-05-17 11:32:32 +02:00 · 2021-05-17 11:32:32 +02:00 · 00d6a1908b
commit 00d6a1908b
parent 4058aacccb
7 changed files with 110 additions and 3 deletions
--- a/2
+++ b/2
@ -9,3 +9,5 @@ RUN pip3 install numpy
 RUN pip3 install matplotlib
 RUN pip3 install sklearn
 RUN pip3 install silence-tensorflow
 RUN pip3 install sacred
 RUN pip3 install mlflow
--- a/2
+++ b/2
@ -42,7 +42,7 @@ pipeline {
 		}
        	stage('archiveArtifacts') {
-			steps{
+			steps {
 				archiveArtifacts 'evaluation.txt'
 			}
 		}
--- a/eval-tensorflow.py
+++ b/eval-tensorflow.py
@ -21,8 +21,8 @@ predictions = [1 if p > 0.5 else 0 for p in predictions]
 accuracy = accuracy_score(test_y, predictions)
 f1 = f1_score(test_y, predictions)
-file = open('evaluation.txt', 'w')
+file = open('evaluation.txt', 'a')
 file.writelines(accuracy.__str__() + '\n')
 file.writelines(f1.__str__())
 file.close()
--- a/train-tensorflow-sacred.py
+++ b/train-tensorflow-sacred.py
@ -0,0 +1,105 @@
 import pandas as pd
 from sacred.observers import FileStorageObserver
 from silence_tensorflow import silence_tensorflow
 from tensorflow import keras
 silence_tensorflow()
 from tensorflow.keras.preprocessing.text import Tokenizer
 from collections import Counter
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 from tensorflow.keras import layers
 from sacred import Experiment
 ex = Experiment("file_observer")
 ex.observers.append(FileStorageObserver('my_runs'))
@ex.config
 def my_config():
    epochs = 15
    batch_size = 16
@ex.capture
 def prepare_model(epochs, batch_size):
    df = pd.read_csv('data.csv')
    train_df = pd.read_csv('train.csv')
    val_df = pd.read_csv('dev.csv')
    test_df = pd.read_csv('test.csv')
    df.dropna(subset = ['reviews.text'], inplace = True)
    val_df.dropna(subset = ['reviews.text'], inplace = True)
    test_df.dropna(subset = ['reviews.text'], inplace = True)
    train_df.dropna(subset = ['reviews.text'], inplace = True)
    train_sentences = train_df['reviews.text'].to_numpy()
    train_labels = train_df['reviews.doRecommend'].to_numpy()
    val_sentences = val_df['reviews.text'].to_numpy()
    val_labels = val_df['reviews.doRecommend'].to_numpy()
    test_sentences = test_df['reviews.text'].to_numpy()
    test_labels = test_df['reviews.doRecommend'].to_numpy()
    # print(train_labels.shape)
    # print(train_sentences.shape)
    counter = counter_word(df['reviews.text'])
    num_unique_words = len(counter)
    tokenizer = Tokenizer(num_words=num_unique_words)
    tokenizer.fit_on_texts(train_sentences)
    word_index = tokenizer.word_index
    train_sequences = tokenizer.texts_to_sequences(train_sentences)
    val_sequences = tokenizer.texts_to_sequences(val_sentences)
    test_sequences = tokenizer.texts_to_sequences(test_sentences)
    max_length = 30
    train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post", truncating="post")
    val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
    test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
    test_df['reviews.text'] = test_padded
    test_df.to_csv('test.csv')
    model = keras.models.Sequential()
    model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
    model.add(layers.LSTM(64, dropout=0.1))
    model.add(layers.Dense(1, activation="sigmoid"))
    model.summary()
    loss = keras.losses.BinaryCrossentropy(from_logits=False)
    optim = keras.optimizers.Adam(lr = 0.001)
    metrics = ["accuracy"]
    model.compile(loss = loss, optimizer = optim, metrics = metrics)
    model.fit(train_padded, train_labels, epochs = 20, validation_data=(val_padded, val_labels), verbose=2)
    predictions = model.predict(test_padded)
    predictions = [1 if p > 0.5 else 0 for p in predictions]
    model.save('trained_model')
    file = open('results.txt', 'w')
    file.write(predictions.__str__())
    file.close()
 def counter_word(text_col):
    count = Counter()
    for text in text_col.values:
        for word in text.split():
            count[word] += 1
    return count
@ex.automain
 def my_main(epochs, batch_size):
    prepare_model()
 ex.run()
 ex.add_artifact('trained_model')
--- a/trained_model/saved_model.pb
+++ b/trained_model/saved_model.pb
--- a/trained_model/variables/variables.data-00000-of-00001
+++ b/trained_model/variables/variables.data-00000-of-00001
--- a/trained_model/variables/variables.index
+++ b/trained_model/variables/variables.index