Add sacred
Some checks failed
s434780-training/pipeline/head There was a failure building this commit
Some checks failed
s434780-training/pipeline/head There was a failure building this commit
This commit is contained in:
parent
4058aacccb
commit
00d6a1908b
@ -9,3 +9,5 @@ RUN pip3 install numpy
|
|||||||
RUN pip3 install matplotlib
|
RUN pip3 install matplotlib
|
||||||
RUN pip3 install sklearn
|
RUN pip3 install sklearn
|
||||||
RUN pip3 install silence-tensorflow
|
RUN pip3 install silence-tensorflow
|
||||||
|
RUN pip3 install sacred
|
||||||
|
RUN pip3 install mlflow
|
||||||
|
@ -42,7 +42,7 @@ pipeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
stage('archiveArtifacts') {
|
stage('archiveArtifacts') {
|
||||||
steps{
|
steps {
|
||||||
archiveArtifacts 'evaluation.txt'
|
archiveArtifacts 'evaluation.txt'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,8 +21,8 @@ predictions = [1 if p > 0.5 else 0 for p in predictions]
|
|||||||
accuracy = accuracy_score(test_y, predictions)
|
accuracy = accuracy_score(test_y, predictions)
|
||||||
f1 = f1_score(test_y, predictions)
|
f1 = f1_score(test_y, predictions)
|
||||||
|
|
||||||
file = open('evaluation.txt', 'w')
|
file = open('evaluation.txt', 'a')
|
||||||
file.writelines(accuracy.__str__() + '\n')
|
file.writelines(accuracy.__str__() + '\n')
|
||||||
file.writelines(f1.__str__())
|
|
||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
105
train-tensorflow-sacred.py
Normal file
105
train-tensorflow-sacred.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sacred.observers import FileStorageObserver
|
||||||
|
from silence_tensorflow import silence_tensorflow
|
||||||
|
from tensorflow import keras
|
||||||
|
silence_tensorflow()
|
||||||
|
from tensorflow.keras.preprocessing.text import Tokenizer
|
||||||
|
from collections import Counter
|
||||||
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
||||||
|
from tensorflow.keras import layers
|
||||||
|
|
||||||
|
from sacred import Experiment
|
||||||
|
|
||||||
|
|
||||||
|
ex = Experiment("file_observer")
|
||||||
|
ex.observers.append(FileStorageObserver('my_runs'))
|
||||||
|
|
||||||
|
|
||||||
|
@ex.config
|
||||||
|
def my_config():
|
||||||
|
epochs = 15
|
||||||
|
batch_size = 16
|
||||||
|
|
||||||
|
|
||||||
|
@ex.capture
|
||||||
|
def prepare_model(epochs, batch_size):
|
||||||
|
df = pd.read_csv('data.csv')
|
||||||
|
train_df = pd.read_csv('train.csv')
|
||||||
|
val_df = pd.read_csv('dev.csv')
|
||||||
|
test_df = pd.read_csv('test.csv')
|
||||||
|
|
||||||
|
df.dropna(subset = ['reviews.text'], inplace = True)
|
||||||
|
val_df.dropna(subset = ['reviews.text'], inplace = True)
|
||||||
|
test_df.dropna(subset = ['reviews.text'], inplace = True)
|
||||||
|
train_df.dropna(subset = ['reviews.text'], inplace = True)
|
||||||
|
|
||||||
|
train_sentences = train_df['reviews.text'].to_numpy()
|
||||||
|
train_labels = train_df['reviews.doRecommend'].to_numpy()
|
||||||
|
val_sentences = val_df['reviews.text'].to_numpy()
|
||||||
|
val_labels = val_df['reviews.doRecommend'].to_numpy()
|
||||||
|
test_sentences = test_df['reviews.text'].to_numpy()
|
||||||
|
test_labels = test_df['reviews.doRecommend'].to_numpy()
|
||||||
|
|
||||||
|
# print(train_labels.shape)
|
||||||
|
# print(train_sentences.shape)
|
||||||
|
|
||||||
|
counter = counter_word(df['reviews.text'])
|
||||||
|
num_unique_words = len(counter)
|
||||||
|
|
||||||
|
tokenizer = Tokenizer(num_words=num_unique_words)
|
||||||
|
tokenizer.fit_on_texts(train_sentences)
|
||||||
|
|
||||||
|
word_index = tokenizer.word_index
|
||||||
|
|
||||||
|
train_sequences = tokenizer.texts_to_sequences(train_sentences)
|
||||||
|
val_sequences = tokenizer.texts_to_sequences(val_sentences)
|
||||||
|
test_sequences = tokenizer.texts_to_sequences(test_sentences)
|
||||||
|
|
||||||
|
max_length = 30
|
||||||
|
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||||
|
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||||
|
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post", truncating="post")
|
||||||
|
|
||||||
|
test_df['reviews.text'] = test_padded
|
||||||
|
test_df.to_csv('test.csv')
|
||||||
|
|
||||||
|
model = keras.models.Sequential()
|
||||||
|
model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))
|
||||||
|
model.add(layers.LSTM(64, dropout=0.1))
|
||||||
|
model.add(layers.Dense(1, activation="sigmoid"))
|
||||||
|
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
loss = keras.losses.BinaryCrossentropy(from_logits=False)
|
||||||
|
optim = keras.optimizers.Adam(lr = 0.001)
|
||||||
|
metrics = ["accuracy"]
|
||||||
|
|
||||||
|
model.compile(loss = loss, optimizer = optim, metrics = metrics)
|
||||||
|
model.fit(train_padded, train_labels, epochs = 20, validation_data=(val_padded, val_labels), verbose=2)
|
||||||
|
|
||||||
|
predictions = model.predict(test_padded)
|
||||||
|
|
||||||
|
predictions = [1 if p > 0.5 else 0 for p in predictions]
|
||||||
|
|
||||||
|
model.save('trained_model')
|
||||||
|
|
||||||
|
file = open('results.txt', 'w')
|
||||||
|
file.write(predictions.__str__())
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
def counter_word(text_col):
|
||||||
|
count = Counter()
|
||||||
|
for text in text_col.values:
|
||||||
|
for word in text.split():
|
||||||
|
count[word] += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
@ex.automain
|
||||||
|
def my_main(epochs, batch_size):
|
||||||
|
prepare_model()
|
||||||
|
|
||||||
|
|
||||||
|
ex.run()
|
||||||
|
ex.add_artifact('trained_model')
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user